# ruff: noqa: E402
import sys
import asyncio
import signal
import time
import os as os_module
import argparse
from contextlib import asynccontextmanager
from logging.handlers import RotatingFileHandler
from pathlib import Path
import uvicorn
import inspect
from typing import Annotated, Any, AsyncGenerator, Protocol, cast
from fastapi import FastAPI, APIRouter, Depends
from fastapi.staticfiles import StaticFiles
from kamiwaza.services.models.api import model_router
from kamiwaza.serving.api import (
    serving_router as serving_router,
    serving_exception_handlers,
    start_ray,
)
from kamiwaza.serving.api_config import config_router
from kamiwaza.serving.config import settings as serving_settings
from kamiwaza.cluster.config import settings as cluster_settings
from kamiwaza.services.vectordb.api import vectordb_router
from kamiwaza.services.catalog.api.utils import setup_datahub_urn_converters

setup_datahub_urn_converters()
from kamiwaza.services.catalog.router import catalog_router
from kamiwaza.services.prompts.api import promptd_api
from kamiwaza.services.embedding.api import embedding_router
from kamiwaza.services.activity.api import activity_router
from kamiwaza.services.ingestion.api import ingestion_router
from kamiwaza.services.retrieval import router as retrieval_router

# Integrated auth service routers
from kamiwaza.services.auth.api import auth_router, forward_auth_router
from kamiwaza.services.authz.routes import router as authz_router
from kamiwaza.services.auth.bootstrap import init_auth
from kamiwaza.cluster.api import cluster_router

from kamiwaza.services.news.api import news_router

from kamiwaza.services.logger.api import router as logger_router
from kamiwaza.services.security import security_router
from kamiwaza.serving.garden.apps.apps_api import (
    app_router as app_serving_router,
    template_router as app_template_router,
)
from kamiwaza.serving.garden.tool.tool_api import tool_router
from kamiwaza.serving.services import ServingService
from kamiwaza.cluster.services import ClusterService
from kamiwaza.serving.traefik import TraefikService
from kamiwaza.node.api import node_router
from kamiwaza.services.activity.middleware import activity_logger
from kamiwaza.scheduler.launch import task_schedule
from kamiwaza.util.netutil import node_hostip
from kamiwaza.lib.util import (
    get_kamiwaza_root,
    port_in_use,
    validate_license,
    is_community,
)
from kamiwaza.cluster.util.bootstrap import bootstrap_config
from kamiwaza.util.garden import initialize_garden_apps
from kamiwaza.dependencies.auth import get_user
from kamiwaza.lib.otel_config import otel_config
from kamiwaza.lib.metrics import metrics_config

import ray
from ray import serve
import logging
from fastapi.middleware.cors import CORSMiddleware
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.requests import Request
from starlette.responses import Response
from kamiwaza.lib.logging import setup_logging
from kamiwaza.lib.logging.utils import get_debug_level
from kamiwaza.lib.logging.audit_context import AuditContextMiddleware


# Minimal protocol to satisfy type checking when invoking Ray Serve deployments.
class _BindableDeployment(Protocol):
    def bind(self, *args: Any, **_kwargs: Any) -> Any: ...


class StaticFileCacheMiddleware(BaseHTTPMiddleware):
    """Add Cache-Control headers for static file paths."""

    async def dispatch(self, request: Request, call_next) -> Response:
        response = await call_next(request)
        # Add cache headers for app-garden-images (4 hours = 14400 seconds)
        if request.url.path.startswith("/app-garden-images"):
            response.headers["Cache-Control"] = "public, max-age=14400"
        return response


# Exception handler setup
exception_handler_available = False
try:
    from kamiwaza.lib.logging.exception_handler import install_exception_handlers

    exception_handler_available = True
except ImportError:
    pass

# Initialize modern structured logging
# The logging system will automatically detect KAMIWAZA_DEBUG
logger = setup_logging(service_name="kamiwaza-core", component=__name__)

KAMIWAZA_ROOT = get_kamiwaza_root()
os_module.environ["KAMIWAZA_ROOT"] = KAMIWAZA_ROOT

# Install top-level exception handlers for main application
if exception_handler_available:
    try:
        install_exception_handlers(service_name="kamiwaza-core", component="main")
        logger.debug(
            "Top-level exception handlers installed for main application",
            extra={
                "event_type": "service_start",
                "service": "kamiwaza-core",
                "component": "main",
                "exception_handlers": True,
            },
        )
    except Exception as e:
        logger.warning(f"Failed to install exception handlers in main: {e}")


def configure_ray_worker_logging():
    """Configure modern structured logging for Ray worker processes.

    This function sets up logging for Ray workers using the standard Kamiwaza
    logging system (setup_logging), which writes to:
    - application.log (text format)
    - application.jsonl (JSON format)
    - audit-events.jsonl (audit events with event_category="audit")
    - debug.log (verbose debugging)

    Additionally, for backward compatibility, logs are also written to
    kamiwazad-core.log which users may expect from previous versions.

    Note: VLLM logs are handled by ContainerLogCapture since VLLM runs in
    Docker containers. See docs-internal/logging/log-sources-and-sinks.md.
    """
    try:
        # Capture any pre-existing handlers (Ray installs its own) so we can keep them
        def _handler_signature(handler: logging.Handler) -> tuple:
            stream = getattr(handler, "stream", None)
            base_filename = getattr(handler, "baseFilename", None)
            return (handler.__class__, stream, base_filename)

        # Honor environment-driven log level (DEBUG/INFO/etc.)
        debug_level = get_debug_level()

        # Check if modern logging is already configured
        root_logger = logging.getLogger()

        existing_handlers = list(root_logger.handlers)

        if root_logger.handlers and any(
            hasattr(handler, "baseFilename")
            and "application" in str(getattr(handler, "baseFilename", ""))
            for handler in root_logger.handlers
        ):
            logger.debug("Modern logging already configured, skipping")
            return

        # Use the standard Kamiwaza logging system as primary
        # This configures application.log, application.jsonl, audit-events.jsonl, debug.log
        # with proper structured formatters, OTEL correlation, and audit filtering
        setup_logging(
            service_name="kamiwaza-ray-worker",
            component="ray-worker",
            force_reconfigure=True,
        )

        # Add backward-compatible handler for kamiwazad-core.log
        # Users may expect logs here from previous versions
        log_dir = os_module.environ.get("KAMIWAZA_LOG_DIR")
        if not log_dir:
            log_dir = os_module.path.join(KAMIWAZA_ROOT, "logs")

        os_module.makedirs(log_dir, exist_ok=True)

        core_log_file = os_module.path.join(log_dir, "kamiwazad-core.log")
        core_handler = RotatingFileHandler(
            core_log_file, maxBytes=50 * 1024 * 1024, backupCount=5  # 50MB
        )
        core_formatter = logging.Formatter(
            "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
        )
        core_handler.setFormatter(core_formatter)
        core_handler.setLevel(debug_level)

        # Add to root logger for backward compatibility
        root_logger = logging.getLogger()
        root_logger.addHandler(core_handler)
        # Preserve any pre-existing handlers (e.g., Ray log-to-driver) to avoid breaking `ray logs`
        current_signatures = {_handler_signature(h) for h in root_logger.handlers}
        for handler in existing_handlers:
            sig = _handler_signature(handler)
            if sig not in current_signatures:
                root_logger.addHandler(handler)

        root_logger.setLevel(debug_level)

        # Ensure specific module loggers propagate to root (where all handlers are)
        # This ensures modules not yet migrated to modern logging still get captured
        module_loggers = [
            "kamiwaza.serving",
            "kamiwaza.services",
            "kamiwaza.cluster",
            "kamiwaza.services.models",
            "kamiwaza.services.vectordb",
            "kamiwaza.services.catalog",
            "kamiwaza.services.prompts",
            "kamiwaza.services.embedding",
            "kamiwaza.services.activity",
            "kamiwaza.services.auth",
            "kamiwaza.services.retrieval",
            "kamiwaza.services.news",
            "kamiwaza.node",
            "kamiwaza.middleware",
            "kamiwaza.db",
            "kamiwaza.util",
            "kamiwaza.logger",
            "kamiwaza.scheduler",
            "ray",
            "ray.serve",
            "uvicorn",
            "fastapi",
            "httpx",
            "requests",
            "pydantic",
        ]

        for logger_name in module_loggers:
            module_logger = logging.getLogger(logger_name)
            # Clear any existing handlers installed by frameworks to prevent duplicate output
            module_logger.handlers.clear()
            module_logger.setLevel(debug_level)
            # Ensure propagation to root (where all handlers are configured)
            module_logger.propagate = True

        # Set SQLAlchemy loggers to WARNING level to reduce noise (unless in debug mode)
        if os_module.getenv("KAMIWAZA_DEBUG", "False") != "True":
            for name in [
                "sqlalchemy",
                "sqlalchemy.engine",
                "sqlalchemy.orm",
                "sqlalchemy.pool",
            ]:
                logging.getLogger(name).setLevel(logging.WARNING)

        logger.info("Ray worker logging configured successfully")
        logger.info(
            "Primary logs: application.log, application.jsonl, audit-events.jsonl"
        )
        logger.info(f"Backward-compatible logs: {core_log_file}")

    except Exception as e:
        # Fallback to basic logging if setup fails
        print(f"Failed to configure Ray worker logging: {e}")
        debug_enabled = os_module.getenv("KAMIWAZA_DEBUG", "false").lower() in [
            "true",
            "1",
            "yes",
        ]
        basic_level = logging.DEBUG if debug_enabled else logging.INFO
        logging.basicConfig(
            level=basic_level,
            format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
        )


# We technically should have KAMIWAZA_ROOT from launch.py but we will allow
# this to work the original way. Main wraps this now so that imported libs can
# save config files in the correct spot


@ray.remote(num_cpus=0)
def test_env_vars():
    kamiwaza_root = os_module.getenv("KAMIWAZA_ROOT")
    kamiwaza_lib_root = os_module.getenv("KAMIWAZA_LIB_ROOT")

    if kamiwaza_root and kamiwaza_lib_root:
        return f"Environment variables are set:\nKAMIWAZA_ROOT: {kamiwaza_root}\nKAMIWAZA_LIB_ROOT: {kamiwaza_lib_root}"
    else:
        return "Environment variables are not set."


# Create PongRouter for liveness tests - only emerges when full service is up
pong_router = APIRouter()


@pong_router.get("/ping")
async def ping():
    """Simple ping endpoint for liveness testing."""
    return {"status": "pong", "timestamp": time.time()}


@pong_router.get("/whoami")
async def whoami(user_info: Annotated[dict, Depends(get_user)]):
    """Return current user information from headers."""
    return {
        "user_id": user_info["id"],
        "email": user_info["email"],
        "name": user_info["name"],
        "roles": user_info["roles"],
        "request_id": user_info.get("request_id"),
    }


# flush first cache
config_cache_dir = os_module.path.join(KAMIWAZA_ROOT, ".kamiwaza", "config_cache")
if os_module.path.exists(config_cache_dir):
    for file_name in os_module.listdir(config_cache_dir):
        file_path = os_module.path.join(config_cache_dir, file_name)
        if os_module.path.isfile(file_path):
            os_module.unlink(file_path)

config_cache_dir = os_module.path.join(KAMIWAZA_ROOT, ".kamiwaza", "config_cache")

if os_module.path.exists(
    os_module.path.join(
        os_module.environ["KAMIWAZA_ROOT"], "runtime", "runtime_config.json"
    )
):
    os_module.unlink(
        os_module.path.join(
            os_module.environ["KAMIWAZA_ROOT"], "runtime", "runtime_config.json"
        )
    )

KAMIWAZA_SERVE_PORT = cluster_settings.ray_serve_port or 7777
os_module.environ["KAMIWAZA_SERVE_PORT"] = str(KAMIWAZA_SERVE_PORT)

ALL_INTERFACES_HOST = ".".join(["0"] * 4)
KAMIWAZA_SERVE_HOST = (
    os_module.environ.get("KAMIWAZA_SERVE_HOST") or ALL_INTERFACES_HOST
)
os_module.environ["KAMIWAZA_SERVE_HOST"] = KAMIWAZA_SERVE_HOST

NODE_SERVICE_HOST = os_module.environ.get("KAMIWAZA_NODE_HOST", KAMIWAZA_SERVE_HOST)
os_module.environ["KAMIWAZA_NODE_HOST"] = NODE_SERVICE_HOST

if port_in_use(KAMIWAZA_SERVE_PORT):
    logger.error(
        f"Port {KAMIWAZA_SERVE_PORT} is already in use; if not our ray serve instance, startup will fail"
    )

# flush real cache
if os_module.path.exists(config_cache_dir):
    for file_name in os_module.listdir(config_cache_dir):
        file_path = os_module.path.join(config_cache_dir, file_name)
        if os_module.path.isfile(file_path):
            os_module.unlink(file_path)

cluster_module_file = inspect.getfile(ClusterService)
cluster_dir = os_module.path.dirname(cluster_module_file)
KAMIWAZA_LIB_ROOT = os_module.path.abspath(os_module.path.join(cluster_dir, ".."))
logger.debug(f"lib root is {KAMIWAZA_LIB_ROOT}")
os_module.environ["KAMIWAZA_LIB_ROOT"] = KAMIWAZA_LIB_ROOT


# Enhanced command line argument parsing to include help message and Ray connection parameters
parser = argparse.ArgumentParser(
    description="Launch the Kamiwaza service in either cluster or standalone mode.",
    usage="Use '--standalone' to run without clustering. Specify '--ray-host' and '--ray-port' to connect to an existing Ray cluster. Without these flags, it runs in cluster mode by default. Ray should be run with 'ray start --head'",
)
parser.add_argument(
    "--standalone",
    action="store_true",
    help="Run in standalone mode without clustering",
)
parser.add_argument(
    "--ray-host", type=str, default=None, help="Specify the Ray head node host"
)
parser.add_argument(
    "--ray-port", type=int, default=None, help="Specify the Ray head node port"
)
parser.add_argument(
    "--no-url-prefix",
    action="store_true",
    help="Disable URL prefix for the FastAPI app",
)
args = parser.parse_args()

# Read environment variables from env.sh files if they exist
env_files_to_check = []

# Enterprise edition: /etc/kamiwaza/env.sh
if os_module.path.exists("/etc/kamiwaza/env.sh"):
    env_files_to_check.append("/etc/kamiwaza/env.sh")

# Community edition: kamiwaza_root/env.sh
try:
    community_env_path = os_module.path.join(KAMIWAZA_ROOT, "env.sh")
    if os_module.path.exists(community_env_path):
        env_files_to_check.append(community_env_path)
except Exception as exc:
    # If we can't find kamiwaza root, continue but surface detail for diagnostics
    logger.debug(f"Unable to resolve community env file {community_env_path}: {exc}")

# Read from env files and set all environment variables; always override OTEL_* to avoid stale shell values
for env_file_path in env_files_to_check:
    try:
        # Read the entire env file without specifying required keys to get all variables
        with open(env_file_path, "r", encoding="utf-8") as f:
            for line in f:
                if "export " in line:
                    line = line.replace("export ", "")
                if "=" in line and not line.strip().startswith("#"):
                    key, value = line.strip().split("=", 1)
                    # Remove quotes if present
                    value = value.strip('"').strip("'")
                    # Always override OTEL_* vars to ensure consistent telemetry config
                    if key.startswith("OTEL_"):
                        prev = os_module.environ.get(key)
                        os_module.environ[key] = value
                        if prev and prev != value:
                            logger.debug(
                                f"Overrode environment variable {key} from '{prev}' to '{value}' via {env_file_path}"
                            )
                        else:
                            logger.debug(
                                f"Set environment variable {key} from {env_file_path}"
                            )
                    else:
                        # Set environment variable if not already set
                        if key not in os_module.environ:
                            os_module.environ[key] = value
                            logger.debug(
                                f"Set environment variable {key} from {env_file_path}"
                            )
        break  # Use the first env file found
    except Exception as e:
        logger.debug(f"Could not read env file {env_file_path}: {e}")
        continue


if args.standalone:
    logger.warning(
        "WARNING: --standalone flag is deprecated, has no effect, and will be removed in a future version"
    )

# Set environment variables based on the parsed arguments
os_module.environ["KAMIWAZA_STANDALONE"] = "false"
os_module.environ["KAMIWAZA_RAY_HOST"] = args.ray_host if args.ray_host else ""
os_module.environ["KAMIWAZA_RAY_PORT"] = str(args.ray_port) if args.ray_port else ""
os_module.environ["KAMIWAZA_NO_URL_PREFIX"] = str(args.no_url_prefix).lower()

# Set cluster mode based on the presence of the --standalone flag
clusterMode = True
os_module.environ["KAMIWAZA_CLUSTER_MODE"] = str(clusterMode).lower()

# Validate license early in startup process - non-gating
logger.info("Performing license validation...")
validate_license()

# Initialize OpenTelemetry for distributed tracing
otel_initialized = otel_config.initialize()

# Initialize OpenTelemetry Metrics
metrics_initialized = metrics_config.initialize()
if metrics_initialized:
    logger.info("Metrics initialized successfully")
else:
    logger.info("Metrics disabled or initialization failed")

# Global shutdown coordination flag
_shutdown_requested = False


async def _shutdown_ephemeral_deployments() -> int:
    """Purge all ephemeral deployments on shutdown.

    Returns count of deployments purged.
    """
    logger.info("Shutdown: purging all ephemeral deployments")
    try:
        from kamiwaza.serving.garden.apps.cleanup import purge_all_ephemeral_deployments

        result = await purge_all_ephemeral_deployments(reason="shutdown")
        logger.info(
            "Shutdown: ephemeral cleanup complete (purged=%d, errors=%d)",
            result["purged_count"],
            result["error_count"],
        )
        return int(result["purged_count"])
    except ImportError:
        logger.debug("App garden cleanup not available; skipping shutdown cleanup")
        return 0
    except Exception:
        logger.warning("Shutdown: ephemeral cleanup failed", exc_info=True)
        return 0


@asynccontextmanager
async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
    """FastAPI lifespan handler for startup/shutdown coordination.

    On startup, starts the background cleanup task for stuck ephemeral purges.
    On shutdown, ensures all ephemeral app deployments are purged.
    """
    global _shutdown_requested

    # Startup - start background cleanup task for stuck purges
    # This must run in the lifespan context so the task runs in the main event loop
    try:
        from kamiwaza.serving.garden.apps.cleanup import start_ephemeral_cleanup

        await start_ephemeral_cleanup()
        logger.info("Lifespan: started ephemeral deployment background cleanup task")
    except ImportError:
        logger.debug("App garden cleanup not available")
    except Exception as e:
        logger.warning(f"Lifespan: failed to start cleanup task: {e}")

    logger.info("Lifespan: startup complete")

    yield

    # Shutdown tasks
    logger.info("Lifespan: shutdown initiated")
    _shutdown_requested = True

    # Stop background cleanup task if running
    try:
        from kamiwaza.serving.garden.apps.cleanup import stop_ephemeral_cleanup

        await stop_ephemeral_cleanup()
    except ImportError:
        pass
    except Exception:
        logger.warning("Failed to stop cleanup task", exc_info=True)

    # Purge remaining ephemeral deployments
    await _shutdown_ephemeral_deployments()

    logger.info("Lifespan: shutdown complete")


def _setup_signal_handlers() -> None:
    """Setup signal handlers for graceful shutdown.

    When running under Ray Serve, FastAPI's lifespan shutdown doesn't execute
    because Ray Serve manages the app lifecycle and kills replicas directly.
    This signal handler ensures ephemeral deployments are cleaned up by running
    the cleanup synchronously before the process terminates.
    """

    def signal_handler(signum: int, frame: Any) -> None:
        global _shutdown_requested
        import concurrent.futures

        sig_name = signal.Signals(signum).name
        logger.info(f"Received {sig_name}, initiating graceful shutdown")
        _shutdown_requested = True

        # Run cleanup in a separate thread with its own event loop.
        # This is necessary because:
        # 1. asyncio.run() fails if an event loop is already running (e.g., Ray Serve context)
        # 2. Signal handlers may interrupt an existing event loop
        # Using ThreadPoolExecutor matches the pattern in _CleanupUvicornServer.handle_exit()
        def run_cleanup_in_thread():
            loop = asyncio.new_event_loop()
            asyncio.set_event_loop(loop)
            try:
                return loop.run_until_complete(_shutdown_ephemeral_deployments())
            finally:
                loop.close()

        try:
            logger.info("Signal handler: running ephemeral deployment cleanup")
            with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
                future = executor.submit(run_cleanup_in_thread)
                # Wait up to 5 seconds for cleanup to complete
                # Keep this short - if cleanup takes longer, it will be retried on next startup
                result = future.result(timeout=5)
                logger.info(
                    f"Signal handler: cleanup complete, purged {result} deployments"
                )
        except concurrent.futures.TimeoutError:
            logger.warning(
                "Signal handler: cleanup timed out after 5 seconds, will retry on startup"
            )
        except Exception as e:
            logger.error(f"Signal handler: cleanup failed: {e}", exc_info=True)

        # After cleanup, restore default signal behavior and re-raise to exit
        # This ensures the process actually terminates after cleanup
        signal.signal(signum, signal.SIG_DFL)
        os_module.kill(os_module.getpid(), signum)

    signal.signal(signal.SIGTERM, signal_handler)
    signal.signal(signal.SIGINT, signal_handler)


class _CleanupUvicornServer(uvicorn.Server):
    """Custom uvicorn Server that runs ephemeral cleanup before shutdown."""

    def handle_exit(self, sig: int, frame: Any) -> None:
        """Override handle_exit to run cleanup before setting should_exit."""
        global _shutdown_requested
        import concurrent.futures

        sig_name = signal.Signals(sig).name
        logger.info(f"Uvicorn received {sig_name}, running cleanup before shutdown")

        if not _shutdown_requested:
            _shutdown_requested = True
            try:
                logger.info("Pre-shutdown: running ephemeral deployment cleanup")

                # We can't use asyncio.run() because uvicorn's event loop is running.
                # Run the async cleanup in a separate thread with its own event loop.
                def run_cleanup_in_thread():
                    loop = asyncio.new_event_loop()
                    asyncio.set_event_loop(loop)
                    try:
                        return loop.run_until_complete(
                            _shutdown_ephemeral_deployments()
                        )
                    finally:
                        loop.close()

                # Use ThreadPoolExecutor to run cleanup and wait for completion
                with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
                    future = executor.submit(run_cleanup_in_thread)
                    # Wait up to 5 seconds for cleanup to complete
                    # Keep this short - if cleanup takes longer, it will be retried on next startup
                    cleanup_result = future.result(timeout=5)
                    logger.info(
                        f"Pre-shutdown: cleanup complete, purged {cleanup_result} deployments"
                    )
            except concurrent.futures.TimeoutError:
                logger.warning(
                    "Pre-shutdown: cleanup timed out after 5 seconds, will retry on startup"
                )
            except Exception as e:
                logger.error(f"Pre-shutdown: cleanup failed: {e}", exc_info=True)

        # Now call parent's handle_exit to trigger uvicorn shutdown
        super().handle_exit(sig, frame)


def _run_uvicorn_with_cleanup(app_to_run, host: str, port: int) -> None:
    """Run uvicorn server with cleanup on shutdown.

    This function runs uvicorn with a custom Server subclass that overrides
    handle_exit to run ephemeral deployment cleanup BEFORE the process exits.
    """
    config = uvicorn.Config(app_to_run, host=host, port=port)
    server = _CleanupUvicornServer(config)
    server.run()


# Setup signal handlers for graceful shutdown
_setup_signal_handlers()

# Swagger UI configuration for better schema visibility
swagger_ui_config = {
    "defaultModelsExpandDepth": 1,  # Expand schemas section
    "defaultModelExpandDepth": 1,  # Expand model properties one level
    "defaultModelRendering": "model",  # Show schema structure instead of example
    "docExpansion": "list",  # Keep operations collapsed but visible
}

# Initialize FastAPI app with or without URL prefix based on the --no-url-prefix flag
if args.no_url_prefix:
    app = FastAPI(lifespan=lifespan, swagger_ui_parameters=swagger_ui_config)
else:
    app = FastAPI(
        root_path="/api", lifespan=lifespan, swagger_ui_parameters=swagger_ui_config
    )

# Always initialize auth service and include auth routers (lightweight)
init_auth(app)

preview_image_root = Path.home() / ".kamiwaza" / "app_cache"
preview_image_root.mkdir(parents=True, exist_ok=True)
app.mount(
    "/app-garden-images",
    StaticFiles(directory=preview_image_root, html=False, check_dir=False),
    name="app-garden-images",
)


# Add CORS middleware with environment-driven allowlist
allowed_origins = os_module.getenv(
    "AUTH_ALLOWED_ORIGINS",
    "http://localhost:3000,http://localhost:5173,https://kamiwaza.ai,https://localhost",
)
origins = [o.strip() for o in allowed_origins.split(",") if o.strip()]

app.add_middleware(
    CORSMiddleware,
    allow_origins=origins,
    allow_credentials=True,
    allow_methods=["GET", "POST", "PUT", "DELETE", "PATCH", "HEAD", "OPTIONS"],
    allow_headers=["*"],
)
logger.info(f"CORS allow_origins={origins}")

# Add audit context middleware for automatic audit logging context propagation
# This middleware extracts user/session/IP info from requests and makes it
# available to @audit_log decorated service methods via context variables
app.add_middleware(AuditContextMiddleware)
logger.debug("Audit context middleware enabled")

# Add cache headers for static files (app-garden-images)
app.add_middleware(StaticFileCacheMiddleware)
logger.debug("Static file cache middleware enabled")

# Apply exception handlers
for exc_class, handler in serving_exception_handlers.items():
    app.add_exception_handler(exc_class, handler)

# Always include pong router for health checks
app.include_router(pong_router, tags=["health"])

app.include_router(auth_router, prefix="/auth", tags=["auth"])
app.include_router(authz_router, tags=["authz"])
app.include_router(forward_auth_router, tags=["forward-auth"])
app.include_router(model_router, tags=["models"])
app.include_router(config_router, prefix="/config", tags=["config"])
app.include_router(serving_router, prefix="/serving", tags=["serving"])
app.include_router(vectordb_router, prefix="/vectordb", tags=["vectordb"])
# catalog prefix baked in at router
app.include_router(catalog_router, tags=["catalog"])
app.include_router(promptd_api, prefix="/prompts", tags=["prompts"])
app.include_router(embedding_router, prefix="/embedding", tags=["embedding"])
app.include_router(cluster_router, prefix="/cluster", tags=["cluster"])
app.include_router(activity_router, prefix="/activity", tags=["activity"])
app.include_router(ingestion_router, prefix="/ingestion", tags=["ingestion"])
app.include_router(retrieval_router, prefix="/retrieval", tags=["retrieval"])
app.include_router(news_router, prefix="/news", tags=["news"])
# app.include_router(dde_router, tags=["dde"])  # TODO: enable once DDE router ready
app.include_router(logger_router, prefix="/logger", tags=["logger"])
app.include_router(app_serving_router, prefix="/apps", tags=["apps"])
app.include_router(app_template_router, prefix="/apps", tags=["app_templates"])
app.include_router(tool_router, tags=["Tool Shed"])
app.include_router(security_router, prefix="/security", tags=["security"])

app.middleware("http")(activity_logger)  # Activity logger after


# Special on/off for inference via kamiwaza module
if serving_settings.enable_inference:
    logger.warning(
        "Inference is set enabled, but the inference router is not enabled in this version"
    )
    # app.include_router(inference_router, prefix="/inference", tags=["inference"])

# for consistency we don't do non-clusterMode any more, normalizing on always activating ray.
if clusterMode:
    # Every node runs its own node service, outside ray, on 7788; this is a somewhat placeholder-y
    # move for node-to-node comms, bypass-ray checks of various types, etc.
    nodeapp = FastAPI()
    nodeapp.include_router(node_router, prefix="/node", tags=["node"])
else:
    # In the legacy non-cluster mode, we simply included it in the single endpoint.
    app.include_router(node_router, prefix="/node", tags=["node"])

if clusterMode:
    # Ray launch; Kamiwaza "normal practice" is to launch with `start-ray.sh` and then this, which preserve environment variables well.
    # For endpoint/community there is a decent chance that we preload enough things that we don't implode without that, but best practice anyhow.
    if not ray.is_initialized():
        if not serving_settings.ray_init_address:
            logger.warning(
                "WARNING: Kamiwaza is not in standalone mode, and serving.settings.ray_init_address is not set; will try to detect ray via /tmp/ray/ray_current_cluster and start as head if not detected"
            )

        runtime_env = {
            "env_vars": {
                "KAMIWAZA_ROOT": KAMIWAZA_ROOT,
                "KAMIWAZA_LIB_ROOT": KAMIWAZA_LIB_ROOT,
                "PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION": "python",
                "KAMIWAZA_RUNNING": "true",  # Set before ray.init() so workers inherit it
            }
        }

        if "PYTHONPATH" in os_module.environ:
            runtime_env["env_vars"]["PYTHONPATH"] = os_module.environ["PYTHONPATH"]

        # Set in current process as well
        os_module.environ["KAMIWAZA_RUNNING"] = "true"

        # Prepare options dict to pass log_to_driver as per api.py
        options = {
            "log_to_driver": os_module.getenv("KAMIWAZA_DEBUG_RAY", "false") == "true"
        }
        start_ray(
            address=(
                None
                if not args.ray_host
                else f"{args.ray_host}{':' if args.ray_port else ''}{args.ray_port if args.ray_port else ''}"
            ),
            runtime_env=runtime_env,
            options=options,
        )

        # loop until ray starts
        ray_wait = 0
        while not ray.is_initialized():
            ray_wait += 1
            if ray_wait > 30:
                logger.error(
                    "Ray is not initialized after 30 seconds. Giving up on waiting for ray."
                )
                break
            time.sleep(1)

        logger.debug("Getting primary hostname from node_hostip()")
        primary_hostname = node_hostip()
        if primary_hostname:
            logger.debug(f"Setting KAMIWAZA_PRIMARY_NODE env var to {primary_hostname}")
            os_module.environ["KAMIWAZA_PRIMARY_NODE"] = primary_hostname

        if primary_node := os_module.environ.get("KAMIWAZA_PRIMARY_NODE"):
            logger.debug(
                f"Setting KAMIWAZA_PRIMARY_NODE in runtime_env to {primary_node}"
            )
            runtime_env["env_vars"]["KAMIWAZA_PRIMARY_NODE"] = primary_node

        # Note: KAMIWAZA_RUNNING already set in runtime_env before ray.init() above
        if "HF_TOKEN" in os_module.environ:
            runtime_env["env_vars"]["HF_TOKEN"] = os_module.environ["HF_TOKEN"]

        if "KAMIWAZA_COMMUNITY" in os_module.environ:
            runtime_env["env_vars"]["KAMIWAZA_COMMUNITY"] = os_module.environ[
                "KAMIWAZA_COMMUNITY"
            ]

        # Preserve Python no-bytecode flag if set
        if "PYTHONDONTWRITEBYTECODE" in os_module.environ:
            runtime_env["env_vars"]["PYTHONDONTWRITEBYTECODE"] = os_module.environ[
                "PYTHONDONTWRITEBYTECODE"
            ]

        # push selected env vars into runtime_env for worker processes
        # include KAMIWAZA_*, AUTH_GATEWAY_*, KEYCLOAK_*, OTEL_* since workers need auth settings
        # HF_* needed for downloads
        for key, value in os_module.environ.items():
            if (
                "KAMIWAZA" in key
                or key.startswith("AUTH_GATEWAY_")
                or key.startswith("AUTH_")
                or key.startswith("KEYCLOAK_")
                or key.startswith("OTEL_")
                or key.startswith("HF_")
                or key.startswith("FORWARDAUTH_")
                or key == "PYTHONDONTWRITEBYTECODE"
            ) and key not in runtime_env["env_vars"]:
                runtime_env["env_vars"][key] = value

        ## Here we restart ray to update the new runtime env since we "booted"
        ## Another way to go about this would be to put hostname discovery into
        ## its own module that does nothing but that, return it from a subprocess
        ## and start ray; on the other hand, it probably saves very little,
        ## because we then still have to start ray twice and we have a
        ## child process to manage.

        ## Note: this does not actually restart ray, it restarts ray IN THIS PROCESS
        ## In kz ray is initialized by start-ray.sh
        ray.shutdown()

        # Prepare options dict to pass log_to_driver as per api.py
        options = {
            "log_to_driver": os_module.getenv("KAMIWAZA_DEBUG_RAY", "false") == "true"
        }
        start_ray(
            address=(
                None
                if not args.ray_host
                else f"{args.ray_host}{':' if args.ray_port else ''}{args.ray_port if args.ray_port else ''}"
            ),
            runtime_env=runtime_env,
            options=options,
        )

        # loop until ray starts
        ray_wait = 0
        while not ray.is_initialized():
            ray_wait += 1
            if ray_wait > 30:
                logger.error(
                    "Ray is not initialized after 30 seconds. Giving up on waiting for ray."
                )
                break
            time.sleep(1)

        # Normal production path
        bootstrap_config()

    result = test_env_vars.remote()
    logger.debug("Ray Remote vars: %s", ray.get(result))

    # before we start the fastapi service back up, clean up any stale routes in Traefik
    ss = ServingService()
    deployments = ss.list_deployments()
    deployment_ids = [str(d.id) for d in deployments]
    ts = TraefikService()
    asyncio.run(ts.flush_routes(prefixes_to_keep=deployment_ids))

    # clean up any stale pending deployment flags
    from kamiwaza.services.models.services import ModelService

    ms = ModelService()
    cleanup_result = asyncio.run(ms.cleanup_stale_pending_deployments())
    if cleanup_result["cleaned_count"] > 0:
        logger.info(
            f"Cleaned up {cleanup_result['cleaned_count']} stale pending deployment flags"
        )
    elif cleanup_result["error"]:
        logger.warning(f"Error cleaning pending deployments: {cleanup_result['error']}")

    logger.debug("Starting Ray Serve")
    if KAMIWAZA_SERVE_HOST == ALL_INTERFACES_HOST:
        logger.warning(
            "KAMIWAZA_SERVE_HOST not set; binding Ray Serve to all interfaces"
        )
    http_options = serve.config.HTTPOptions(
        host=KAMIWAZA_SERVE_HOST, port=KAMIWAZA_SERVE_PORT, location="EveryNode"
    )
    serve.start(detached=True, http_options=http_options)

    # Wrapper for Ray Serve; we are using the original FastAPI endpoints so it has no methods,
    # Updated to use Ray Serve's HTTPOptions for Ray version 2.10, following the new API structure
    @serve.deployment(
        name="kamiwaza_api",
        autoscaling_config={
            "min_replicas": int(
                float(
                    os_module.getenv(
                        "KAMIWAZA_API_MIN_REPLICAS", 1 if is_community() else 2
                    )
                )
            ),
            "initial_replicas": int(
                float(
                    os_module.getenv(
                        "KAMIWAZA_API_INITIAL_REPLICAS", 1 if is_community() else 2
                    )
                )
            ),
            "max_replicas": int(
                float(os_module.getenv("KAMIWAZA_API_MAX_REPLICAS", 16))
            ),
            "target_ongoing_requests": int(
                float(
                    os_module.getenv("KAMIWAZA_API_TARGET_ONGOING_REQUESTS", 256 * 0.6)
                )
            ),
        },
        max_ongoing_requests=int(
            float(os_module.getenv("KAMIWAZA_API_MAX_ONGOING_REQUESTS", 400))
        ),
        max_replicas_per_node=int(
            float(os_module.getenv("KAMIWAZA_API_MAX_REPLICAS_PER_NODE", 4))
        ),
        ray_actor_options={
            "num_cpus": float(
                os_module.getenv("KAMIWAZA_API_NUM_CPUS_PER_REPLICA", 0.5)
            ),
        },
    )
    @serve.ingress(app)
    class FastAPIWrapper:
        def __init__(self):
            _log = logging.getLogger(__name__)
            # Configure logging for Ray worker processes
            configure_ray_worker_logging()

            # Initialize OpenTelemetry for Ray Serve workers
            from kamiwaza.lib.otel_config import otel_config

            otel_initialized = otel_config.initialize()
            if otel_initialized:
                _log.info("OTEL initialized successfully in Ray Serve worker")
            else:
                _log.warning("OTEL initialization failed in Ray Serve worker")

            _log.info("FastAPIWrapper.__init__ called")

    try:
        deployment_handle = cast(_BindableDeployment, FastAPIWrapper)
        serve.run(deployment_handle.bind(), route_prefix="/api")
    except Exception as e:
        logger.error(f"Failed to start Ray Serve: {str(e)}")
        sys.exit(1)

    logger.debug("Starting Cluster Service")
    try:
        cluster_service = ClusterService()
        cluster_service.cluster_init()

        # Refresh hardware information on API restart to ensure fresh GPU data
        logger.debug("Refreshing hardware information on API startup...")
        try:
            refresh_success = cluster_service.refresh_hardware_info()
        except Exception as refresh_error:
            logger.warning(
                f"Hardware refresh error during API startup: {refresh_error}"
            )

    except Exception:
        import traceback

        exc_type, exc_value, exc_traceback = sys.exc_info()
        tb_str = traceback.format_exception(exc_type, exc_value, exc_traceback)
        logger.error(f"Error initializing cluster: {''.join(tb_str)}; exiting")
        exit(1)

    # Initialize garden apps information
    initialize_garden_apps()

    # Clean up orphaned ephemeral app deployments from previous crash
    # Note: The background cleanup task is started in the FastAPI lifespan handler
    # so it runs in the main event loop and persists during app runtime.
    try:
        from kamiwaza.serving.garden.apps.cleanup import (
            cleanup_orphaned_ephemeral_on_startup,
        )

        startup_cleanup_result = asyncio.run(cleanup_orphaned_ephemeral_on_startup())
        if startup_cleanup_result["purged_count"] > 0:
            logger.info(
                f"Startup: cleaned up {startup_cleanup_result['purged_count']} "
                f"orphaned ephemeral deployments from previous run"
            )
    except ImportError:
        logger.debug("App garden cleanup not available")
    except Exception as e:
        logger.warning(f"Error during startup ephemeral cleanup: {e}")

    # Initialize model guide data
    logger.info("Initializing model guide data...")
    try:
        from kamiwaza.services.models.services import ModelService

        model_service = ModelService()
        import_result = asyncio.run(model_service.import_model_guide())
        if "error" not in import_result:
            logger.info("Model guide data initialized successfully")
        else:
            logger.warning(
                f"Failed to initialize model guide data: {import_result.get('error')}"
            )
    except Exception as e:
        logger.warning(f"Failed to initialize model guide data: {e}")

    logger.debug("Starting Scheduled Tasks")
    task_schedule.start_scheduled_tasks()

    # /node endpoint comes up in separate service, which has the benefit of blocking
    # ray from exiting when serve.run() is "done" (aka initialized)
    if __name__ == "__main__":
        # Configure logging for node service
        configure_ray_worker_logging()
        if NODE_SERVICE_HOST == ALL_INTERFACES_HOST:
            logger.warning(
                "KAMIWAZA_NODE_HOST not set; binding node service to all interfaces"
            )
        # Use custom wrapper that runs cleanup after uvicorn stops but before Ray exits
        _run_uvicorn_with_cleanup(nodeapp, host=NODE_SERVICE_HOST, port=7788)

else:
    # It's a bit of a misnomer, of course, because cluster_Service is also kamiwaza metadata
    # We could do other things like disable the cluster page, since you won't ever have use
    # of placement groups in single-node mode, but consistency here. We do this
    # after check for clusterMode because we want cluster_init() to pick up the ray
    # info generated by ray_init()

    # this should do local resolution now because no ray
    primary_hostname = node_hostip()
    if primary_hostname:
        os_module.environ["KAMIWAZA_PRIMARY_NODE"] = primary_hostname

    try:
        cluster_service = ClusterService()
        cluster_service.cluster_init()
    except Exception:
        # TODO: again, probably this becomes fatal at some point
        import traceback

        exc_type, exc_value, exc_traceback = sys.exc_info()
        tb_str = traceback.format_exception(exc_type, exc_value, exc_traceback)
        logger.error(
            f"Non-Fatal: Error initializing cluster: {''.join(tb_str)}; will continue but default resources or node may not be registered"
        )

    # Initialize garden apps information
    initialize_garden_apps()

    # Initialize model guide data
    logger.info("Initializing model guide data...")
    try:
        from kamiwaza.services.models.services import ModelService

        model_service = ModelService()
        import_result = asyncio.run(model_service.import_model_guide())
        if "error" not in import_result:
            logger.info("Model guide data initialized successfully")
        else:
            logger.warning(
                f"Failed to initialize model guide data: {import_result.get('error')}"
            )
    except Exception as e:
        logger.warning(f"Failed to initialize model guide data: {e}")

    task_schedule.start_scheduled_tasks()

    if __name__ == "__main__":
        # Configure logging for standalone mode
        configure_ray_worker_logging()
        # run all the APIs together, single-node mode
        logger.debug("Kamiwaza: Debug logging active (if you see this)")
        print("Starting uvicorn in non-clusterMode: unsupported, good luck!")
        if KAMIWAZA_SERVE_HOST == ALL_INTERFACES_HOST:
            logger.warning(
                "KAMIWAZA_SERVE_HOST not set; binding FastAPI app to all interfaces"
            )
        uvicorn.run(
            app,
            host=KAMIWAZA_SERVE_HOST,
            port=KAMIWAZA_SERVE_PORT,
            workers=4,
            limit_concurrency=100,
        )
