#!/bin/bash
set -euo pipefail

# KNOWN ISSUE: If SELinux is in enforcing mode, it may block TLS handshakes 
# between host and Docker containers. If you see "Handshake read failed" errors
# when connecting to etcd, try: sudo setenforce 0
# Or check: sudo ausearch -m avc -ts recent | grep -i etcd

# Logging functions for clean output
log_info() {
    echo "[INFO] $1"
}

log_warn() {
    echo "[WARN] $1"
}

log_error() {
    echo "[ERROR] $1"
}

log_success() {
    echo "[SUCCESS] $1"
}

log_step() {
    echo ""
    echo "========================================"
    echo "STEP: $1"
    echo "========================================"
}

verify_safe_source() {
    local file="$1"

    if [[ ! -e "$file" ]]; then
        echo "[ERROR] Required file $file not found" >&2
        return 1
    fi

    if [[ ! -f "$file" ]]; then
        echo "[ERROR] $file is not a regular file" >&2
        return 1
    fi

    if [[ -L "$file" ]]; then
        echo "[ERROR] Refusing to source symlinked file $file" >&2
        return 1
    fi

    local file_owner
    if stat -c '%u' "$file" >/dev/null 2>&1; then
        file_owner=$(stat -c '%u' "$file")
    else
        file_owner=$(stat -f '%u' "$file")
    fi

    local current_uid
    current_uid=$(id -u)
    if [[ "$file_owner" != "$current_uid" && "$file_owner" != "0" ]]; then
        echo "[WARN] $file is owned by UID $file_owner (expected $current_uid or root)" >&2
    fi

    return 0
}



## EXAMPLE of env vars that should be set before bringing up containers
# # On manager node:
# export KAMIWAZA_SWARM_HEAD=true

# # On worker nodes:
# export KAMIWAZA_SWARM_TARGET=10.1.0.4  # IP of any manager node

# Split EXCLUDE_CONTAINERS into an array if it is comma separated

# Check for offline mode from env var or file
if [[ "${OFFLINE_MODE:-}" == "true" ]] || [ -f .kamiwaza-offline ]; then
    OFFLINE_MODE=true
    log_info "OFFLINE_MODE enabled - will not pull Docker images"
else
    OFFLINE_MODE=false
fi

use_nv_vllm=false
if [[ -n "${KAMIWAZA_USE_NV_VLLM:-}" ]]; then
    case "${KAMIWAZA_USE_NV_VLLM,,}" in
        true|1|yes|on)
            use_nv_vllm=true
            ;;
    esac
fi

# Source set-kamiwaza-root.sh from its directory if it exists
SCRIPT_DIR=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")
if verify_safe_source "${SCRIPT_DIR}/set-kamiwaza-root.sh"; then
    # shellcheck disable=SC1091
    source "${SCRIPT_DIR}/set-kamiwaza-root.sh"
else
    echo "Error: set-kamiwaza-root.sh not found or unsafe in ${SCRIPT_DIR}"
    exit 1
fi

if verify_safe_source "${SCRIPT_DIR}/common.sh"; then
    # shellcheck disable=SC1091
    source "${SCRIPT_DIR}/common.sh"
else
    echo "Error: common.sh not found or unsafe" >&2
    exit 1
fi

if is_truthy "${KAMIWAZA_DEBUG:-false}"; then
    set -x
fi

# Detect Docker Compose (v2 plugin preferred, fallback to v1)
DOCKER_COMPOSE="docker compose"
if ! docker compose version >/dev/null 2>&1; then
    if command -v docker-compose >/dev/null 2>&1; then
        DOCKER_COMPOSE="docker-compose"
    else
        echo "Error: Docker Compose not found. Install Docker Compose v2 (docker compose) or v1 (docker-compose)." >&2
        exit 1
    fi
fi

# Detect Docker Compose (v2 plugin preferred, fallback to v1)
DOCKER_COMPOSE="docker compose"
if ! docker compose version >/dev/null 2>&1; then
    if command -v docker-compose >/dev/null 2>&1; then
        DOCKER_COMPOSE="docker-compose"
    else
        echo "Error: Docker Compose not found. Install Docker Compose v2 (docker compose) or v1 (docker-compose)." >&2
        exit 1
    fi
fi

RESET_ETCD=0
RESET_HARD=0
UPGRADE_DATAHUB=false
for arg in "$@"; do
    if [[ "$arg" == "--reset-etcd" ]]; then
        RESET_ETCD=1
        shift
    fi
    if [[ "$arg" == "--hard" ]]; then
        RESET_HARD=1
        shift
    fi
    if [[ "$arg" == "--upgrade-datahub" ]]; then
        UPGRADE_DATAHUB=true
        export UPGRADE_DATAHUB
        shift
    fi
done


if [ -f /etc/kamiwaza/env.sh ]; then
    source /etc/kamiwaza/env.sh
elif [ -f "${KAMIWAZA_ROOT}/env.sh" ]; then
    source "${KAMIWAZA_ROOT}/env.sh"
fi

# Ensure environment defaults are available before observability setup
KAMIWAZA_ENV=${KAMIWAZA_ENV:-default}
export KAMIWAZA_ENV

# Set up environment variables (ARCH, KAMIWAZA_LOG_DIR, OTEL/Loki/Jaeger defaults)
# Note: This function handles both OTEL enabled and disabled states properly
# and sets ARCH/KAMIWAZA_LOG_DIR which are needed even when OTEL is disabled
setup_common_environment

# if we are the non-head node exclude current single-host containers
IS_WORKER=999
if [ -n "${KAMIWAZA_HEAD_IP:-}" ]; then
    if [ -n "${KAMIWAZAD_IS_WORKER:-}" ]; then
        IS_WORKER=$KAMIWAZAD_IS_WORKER
    else
        if ifconfig | grep inet | awk '{print $2}' | grep -q "^${KAMIWAZA_HEAD_IP}$" > /dev/null 2>&1; then
            IS_WORKER=0
        else
            IS_WORKER=1
        fi
    fi
    export KAMIWAZAD_IS_WORKER=$IS_WORKER
    if [ $IS_WORKER -eq 0 ]; then
        EXCLUDE_CONTAINERS=${KAMIWAZA_EXCLUDE_CONTAINERS:-qdrant,nonexistant}
        export KAMIWAZA_IS_HEAD=true
    else
        export KAMIWAZA_SWARM_TARGET=$KAMIWAZA_HEAD_IP
        EXCLUDE_CONTAINERS=${KAMIWAZA_EXCLUDE_CONTAINERS:-qdrant,nonexistant,cockroach,milvus,datahub}
    fi
else
    export KAMIWAZAD_IS_WORKER=0
    EXCLUDE_CONTAINERS=${KAMIWAZA_EXCLUDE_CONTAINERS:-qdrant,nonexistant}
fi

# Add cockroach and datahub to exclusions for lite mode
if [[ "${KAMIWAZA_LITE:-}" == "true" ]]; then
    log_info "KAMIWAZA_LITE is enabled - excluding cockroachdb and datahub from startup"
    if [[ "$EXCLUDE_CONTAINERS" == *"cockroach"* ]]; then
        # cockroach already in exclusions
        true
    else
        EXCLUDE_CONTAINERS="${EXCLUDE_CONTAINERS},cockroach"
    fi
    if [[ "$EXCLUDE_CONTAINERS" == *"datahub"* ]]; then
        # datahub already in exclusions
        true
    else
        EXCLUDE_CONTAINERS="${EXCLUDE_CONTAINERS},datahub"
    fi
fi

# Add milvus to exclusions if disabled (default: enabled)
if [[ "${KAMIWAZA_MILVUS_ENABLED:-true}" != "true" ]]; then
    log_info "KAMIWAZA_MILVUS_ENABLED is not true - excluding milvus from startup"
    if [[ "$EXCLUDE_CONTAINERS" == *"milvus"* ]]; then
        # milvus already in exclusions
        true
    else
        EXCLUDE_CONTAINERS="${EXCLUDE_CONTAINERS},milvus"
    fi
fi

# Add otel to exclusions if not explicitly enabled
if [[ "${KAMIWAZA_OTEL_ENABLED:-false}" != "true" ]]; then
    log_info "KAMIWAZA_OTEL_ENABLED is not true - excluding otel-collector from startup"
    if [[ "$EXCLUDE_CONTAINERS" != *"otel"* ]]; then
        EXCLUDE_CONTAINERS="${EXCLUDE_CONTAINERS},otel"
    fi
fi

# Add Loki/Grafana stack to exclusions when disabled
if [[ "${KAMIWAZA_LOKI_ENABLED:-false}" != "true" ]]; then
    log_info "KAMIWAZA_LOKI_ENABLED is not true - excluding loki stack from startup"
    if [[ "$EXCLUDE_CONTAINERS" != *"loki"* ]]; then
        EXCLUDE_CONTAINERS="${EXCLUDE_CONTAINERS},loki"
    fi
fi

excluded_containers=(${EXCLUDE_CONTAINERS//,/ })

# Set etcd cluster configuration based on node type
export LOCAL_HOSTNAME=$(hostname)
export KAMIWAZA_ETCD_NODE_NAME="${LOCAL_HOSTNAME}"


if [ "${KAMIWAZAD_IS_WORKER:-0}" -eq 1 ]; then
    # worker node
    HEAD_HOSTNAME=$(ssh -i /etc/kamiwaza/ssl/cluster.key -o StrictHostKeyChecking=no ${KAMIWAZA_HEAD_IP} hostname)
    if [ $? -ne 0 ] || [ -z "${HEAD_HOSTNAME}" ]; then
        echo "Error: Failed to resolve head node's hostname via SSH. Exiting."
        exit 1
    fi
    if [ -z "${HEAD_HOSTNAME}" ]; then
        echo "Error: Failed to resolve head node's hostname. Exiting."
        exit 1
    fi
    export KAMIWAZA_ETCD_INITIAL_CLUSTER="${KAMIWAZA_ENV:-default}_kamiwaza-etcd-${LOCAL_HOSTNAME}=https://${KAMIWAZA_ENV:-default}_kamiwaza-etcd-${LOCAL_HOSTNAME}:2380,${KAMIWAZA_ENV:-default}_kamiwaza-etcd-${HEAD_HOSTNAME}=https://${KAMIWAZA_ENV:-default}_kamiwaza-etcd-${HEAD_HOSTNAME}:2380"
    export KAMIWAZA_ETCD_CLUSTER_STATE="existing"
else
    export KAMIWAZA_ETCD_INITIAL_CLUSTER="${KAMIWAZA_ENV:-default}_kamiwaza-etcd-${LOCAL_HOSTNAME}=https://${KAMIWAZA_ENV:-default}_kamiwaza-etcd-${LOCAL_HOSTNAME}:2380"
    export KAMIWAZA_ETCD_CLUSTER_STATE="new"
fi

# Set common etcd variables
export KAMIWAZA_ETCD_ADVERTISE_PEER_URLS="https://${KAMIWAZA_ENV:-default}_kamiwaza-etcd-${LOCAL_HOSTNAME}:2380"
export KAMIWAZA_ETCD_ADVERTISE_CLIENT_URLS="https://${KAMIWAZA_ENV:-default}_kamiwaza-etcd-${LOCAL_HOSTNAME}:2379"

# If we're a worker node, wait for head node to be ready
if [ "${KAMIWAZAD_IS_WORKER:-0}" -eq 1 ]; then
    echo "Waiting for head node to be ready..."
    max_attempts=2160 # 6 hours
    attempt=1

    while [ $attempt -le $max_attempts ]; do
        # Check if head node is still starting up
        if ssh -i /etc/kamiwaza/ssl/cluster.key -o StrictHostKeyChecking=no ${KAMIWAZA_HEAD_IP} \
            "[ -f /tmp/kamiwazad.starting ]"; then
            
            if [ $attempt -eq $max_attempts ]; then
                echo "Warning: Head node still not ready after $max_attempts attempts ($(( max_attempts * 10 )) seconds)"
                echo "Continuing startup, but services may not function correctly"
                break
            fi
            
            echo "Head node not ready yet (attempt $attempt of $max_attempts)..."
            sleep 10
            ((attempt++))
        else
            echo "Head node is ready"
            break
        fi
    done
fi


# set up/ensure swarm is up
if [[ "${KAMIWAZA_COMMUNITY:-}" != "true" ]]; then
    source setup_swarm.sh
    max_attempts=12  # 12 attempts * 10s = 2 minutes
    attempt=1

    while [ $attempt -le $max_attempts ]; do
        echo "Setting up swarm (attempt $attempt of $max_attempts)..."
        if setup_swarm; then
            break
        fi
        
        if [ $attempt -eq $max_attempts ]; then
            echo "Error: Failed to setup swarm after $max_attempts attempts ($(( max_attempts * 10 )) seconds)"
            exit 1
        fi
        
        echo "Swarm setup attempt $attempt failed, retrying in 10 seconds..."
        sleep 10
        ((attempt++))
    done
fi

log_step "Setting up essential Docker networks"
source setup_network.sh
setup_essential_networks

log_info "Verifying required networks exist"
# Verify required networks exist
verify_networks() {
    local required_networks=(
        "${KAMIWAZA_ENV:-default}_kamiwaza-traefik"
        "${KAMIWAZA_ENV:-default}_kamiwaza-etcd"
        "${KAMIWAZA_ENV:-default}_kamiwaza-backend"
        "${KAMIWAZA_ENV:-default}_kamiwaza-apps"
        "${KAMIWAZA_ENV:-default}_kamiwaza-keycloak"
    )
    
    for network in "${required_networks[@]}"; do
        if ! docker network inspect "$network" >/dev/null 2>&1; then
            echo "Error: Required network $network does not exist"
          echo "Please run setup_networks.sh first"
            exit 1
        fi
    done
}

# Verify networks before proceeding
verify_networks

log_success "Essential Docker networks have been set up for Kamiwaza services"

# Test for KAMIWAZA_ROOT in the environment and set/export it if not set
if [ -z "${KAMIWAZA_ROOT}" ]; then
    script_dir=$(dirname "$0")
    # Yes, these paths both lead to the same result; however, highlighting
    # that launch.py can be in ROOT or ROOT/kamiwaza, depending on the
    # install methods/env, but either way we want the upper folder
    if [ -f "$script_dir/launch.py" ]; then
        export KAMIWAZA_ROOT=$(cd "$script_dir" && pwd)
    elif [ -f "$script_dir/kamiwaza/launch.py" ]; then
        export KAMIWAZA_ROOT=$(cd "$script_dir" && pwd)
    fi
fi

ensure_jwt_keys() {
    local runtime_dir="${KAMIWAZA_ROOT}/runtime"
    local private_key_path="${runtime_dir}/jwt_private_key.pem"
    local public_key_path="${runtime_dir}/jwt_public_key.pem"
    local generate_with_kw_py
    local generate_with_openssl

    if [ -f "$private_key_path" ] && [ -f "$public_key_path" ]; then
        return 0
    fi

    log_info "Generating RS256 JWT keypair in ${runtime_dir}"

    mkdir -p "$runtime_dir"

    generate_with_kw_py() {
        if (cd -- "${KAMIWAZA_ROOT}" && scripts/kw_py util/generate_jwt_keys.py "${runtime_dir}"); then
            return 0
        fi
        return 1
    }

    generate_with_openssl() {
        if ! command -v openssl >/dev/null 2>&1; then
            return 1
        fi
        local previous_umask
        previous_umask=$(umask)
        umask 077
        if ! openssl genpkey -algorithm RSA -pkeyopt rsa_keygen_bits:2048 -out "${private_key_path}" >/dev/null 2>&1; then
            umask "${previous_umask}"
            return 1
        fi
        if ! openssl rsa -pubout -in "${private_key_path}" -out "${public_key_path}" >/dev/null 2>&1; then
            umask "${previous_umask}"
            return 1
        fi
        umask "${previous_umask}"
        chmod 600 "${private_key_path}" >/dev/null 2>&1 || true
        chmod 644 "${public_key_path}" >/dev/null 2>&1 || true
        return 0
    }

    if ! generate_with_kw_py; then
        log_warn "Python-based JWT key generation failed; attempting OpenSSL fallback"
        if ! generate_with_openssl; then
            log_error "Failed to generate JWT keys via util/generate_jwt_keys.py and OpenSSL fallback"
            return 1
        fi
    fi

    if [ -f "$private_key_path" ] && [ -f "$public_key_path" ]; then
        log_success "JWT keypair generated"
        return 0
    fi

    log_error "JWT keypair missing after generation attempt"
    return 1
}

get_jwt_public_key() {
    local public_key_path="${KAMIWAZA_ROOT}/runtime/jwt_public_key.pem"
    
    if [ -f "$public_key_path" ]; then
        # Read the public key and format it as a single line
        public_key=$(cat "$public_key_path")
        echo "$public_key"
        return 0
    fi
    
    echo "Error: Unable to retrieve JWT public key" >&2
    return 1
}

if ! ensure_jwt_keys; then
    echo "Failed to ensure JWT keypair. Exiting." >&2
    exit 1
fi

unset JWT_PUBLIC_KEY
JWT_PUBLIC_KEY=$(get_jwt_public_key)
if [ $? -eq 0 ]; then
    export JWT_PUBLIC_KEY
else
    echo "Failed to set JWT_PUBLIC_KEY. Exiting." >&2
    exit 1
fi

# Determine platform
if [[ "$(uname)" == "Darwin" ]]; then
    platform='osx'
else
    platform='linux'
fi

# Determine architecture
arch_raw=$(uname -m)
case "$arch_raw" in
    "x86_64"|"amd64")
        arch='amd64'
        ;;
    "aarch64"|"arm64"|"arm64e"|"armv8"*|"armv9"*)
        arch='arm64'
        ;;
    *)
        echo "Unsupported architecture: $arch_raw"
        exit 1
        ;;
esac

# Source common.sh and use detect_gpu_compute to determine CPU or GPU
source "${KAMIWAZA_ROOT}/common.sh"

if [ "$(detect_gpu_compute)" = "true" ]; then
    cpugpu='gpu'
else
    cpugpu='cpu'
fi

# Get the component argument if provided
component_arg=${1:-}


log_info "etcd cluster configuration:"
log_info "  Node Name: ${KAMIWAZA_ETCD_NODE_NAME:-Not Set}"
log_info "  Advertise Peer URLs: ${KAMIWAZA_ETCD_ADVERTISE_PEER_URLS:-Not Set}"
log_info "  Advertise Client URLs: ${KAMIWAZA_ETCD_ADVERTISE_CLIENT_URLS:-Not Set}" 
log_info "  Initial Cluster: ${KAMIWAZA_ETCD_INITIAL_CLUSTER:-Not Set}"
log_info "  Cluster State: ${KAMIWAZA_ETCD_CLUSTER_STATE:-Not Set}"

# Pull retagged containers
# Read version from kamiwaza.version.json if not set
if [ -f "${KAMIWAZA_ROOT}/kamiwaza.version.json" ]; then
    # Using sed instead of grep -P for better compatibility
    KAMIWAZA_VERSION_MAJOR=$(sed -n 's/.*"KAMIWAZA_VERSION_MAJOR": *\([^,}]*\).*/\1/p' "${KAMIWAZA_ROOT}/kamiwaza.version.json" | tr -d '"')
    KAMIWAZA_VERSION_MINOR=$(sed -n 's/.*"KAMIWAZA_VERSION_MINOR": *\([^,}]*\).*/\1/p' "${KAMIWAZA_ROOT}/kamiwaza.version.json" | tr -d '"')
    KAMIWAZA_VERSION_PATCH=$(sed -n 's/.*"KAMIWAZA_VERSION_PATCH": *\([^,}]*\).*/\1/p' "${KAMIWAZA_ROOT}/kamiwaza.version.json" | tr -d '"')

    # Check if any of the version components are empty
    if [ -z "${KAMIWAZA_VERSION_MAJOR}" ] || [ -z "${KAMIWAZA_VERSION_MINOR}" ] || [ -z "${KAMIWAZA_VERSION_PATCH}" ]; then
        echo "Error: Failed to read version components from kamiwaza.version.json properly"
        exit 1
    fi
else
    # Check if version variables are set in environment
    if [ -z "${KAMIWAZA_VERSION_MAJOR+x}" ] || [ -z "${KAMIWAZA_VERSION_MINOR+x}" ] || [ -z "${KAMIWAZA_VERSION_PATCH+x}" ]; then
        echo "Error: Version variables not set in environment and kamiwaza.version.json not found"
        exit 1
    fi
fi

kamiwaza_version="${KAMIWAZA_VERSION_MAJOR}.${KAMIWAZA_VERSION_MINOR}.${KAMIWAZA_VERSION_PATCH}"
kamiwaza_prefix="kamiwazaai/kamiwaza"

pull_and_tag_vllm_image() {
    local base_name="$1"
    local vllm_version_tag="v${KAMIWAZA_VLLM_VERSION:-0.13.0}"
    local source_repo="vllm/vllm-openai"
    local pulled_image=""
    local candidates=()

    if [[ "$arch" == "amd64" ]]; then
        candidates=("${vllm_version_tag}-amd64" "${vllm_version_tag}-x86_64" "${vllm_version_tag}")
    else
        candidates=("${vllm_version_tag}-aarch64" "${vllm_version_tag}-arm64" "${vllm_version_tag}")
    fi

    for candidate in "${candidates[@]}"; do
        local image_ref="${source_repo}:${candidate}"
        if docker image inspect "${image_ref}" >/dev/null 2>&1; then
            pulled_image="${image_ref}"
            log_info "Using cached vLLM image ${image_ref}"
            break
        fi
        if docker pull "${image_ref}"; then
            pulled_image="${image_ref}"
            log_success "Pulled ${image_ref} for architecture ${arch}"
            break
        else
            log_warn "Failed to pull ${image_ref}; trying next candidate"
        fi
    done

    if [[ -z "${pulled_image}" ]]; then
        log_error "Unable to pull a vLLM image for architecture ${arch} (candidates: ${candidates[*]})"
        return 1
    fi

    local primary_tag="${kamiwaza_prefix}-${base_name}:${kamiwaza_version}-${arch}"
    if docker image inspect "${primary_tag}" >/dev/null 2>&1; then
        log_info "vLLM image ${primary_tag} already present; skipping retag"
    else
        docker tag "${pulled_image}" "${primary_tag}"
        log_success "Tagged ${pulled_image} as ${primary_tag}"
    fi

    if [[ "$arch" == "arm64" ]]; then
        local alt_tag="${kamiwaza_prefix}-${base_name}:${kamiwaza_version}-aarch64"
        if docker image inspect "${alt_tag}" >/dev/null 2>&1; then
            log_info "vLLM image ${alt_tag} already present; skipping retag"
        else
            docker tag "${pulled_image}" "${alt_tag}"
            log_success "Tagged ${pulled_image} as ${alt_tag}"
        fi
    fi

    return 0
}

if [[ "$use_nv_vllm" == true ]]; then
    nv_vllm_version="${KAMIWAZA_NV_VLLM_VERSION:-25.10-py3}"
    log_step "Pre-pulling NVIDIA vLLM image nvcr.io/nvidia/vllm:${nv_vllm_version}"
    if docker pull "nvcr.io/nvidia/vllm:${nv_vllm_version}"; then
        log_success "Successfully pulled nvcr.io/nvidia/vllm:${nv_vllm_version}"
    else
        log_warn "Failed to pull nvcr.io/nvidia/vllm:${nv_vllm_version}; ensure NVIDIA registry credentials are available"
    fi
fi
# Define platform-specific container lists
vanilla_containers=()

# Check if running on Ampere Linux
if command -v lscpu &> /dev/null && lscpu | grep -qi 'ampere'; then
    # Ampere Linux containers
    vanilla_containers=(
        "vllm-openai-v${KAMIWAZA_VLLM_VERSION:-0.13.0}:${kamiwaza_version}"
        "amperecomputingai/llama.cpp:latest"
    )
elif [[ "$(uname)" == "Linux" ]]; then
    # Non-Ampere Linux containers
    vanilla_containers=(
        "vllm-openai-v${KAMIWAZA_VLLM_VERSION:-0.13.0}:${kamiwaza_version}"
    )
fi

# Exit early if no containers to process
if [ ${#vanilla_containers[@]} -eq 0 ]; then
    log_info "No additional containers to pull for current platform configuration"
else 
    if [[ "$OFFLINE_MODE" == "true" ]]; then
        log_step "Checking platform-specific container images (OFFLINE_MODE)"
        # In offline mode, check if images exist locally
        for container in "${vanilla_containers[@]}"; do
            base_name=$(echo "$container" | awk -F'/' '{print $NF}' | awk -F':' '{print $1}')
            
            # Special case for amperecomputingai/llama.cpp
            if [[ "$base_name" == "llama.cpp" ]]; then
                base_name="ampere-llamacpp"
            fi

            new_image_name="${kamiwaza_prefix}-${base_name}:${kamiwaza_version}-${arch}"
            new_image_name=$(echo "$new_image_name" | tr '[:upper:]' '[:lower:]')

            # Check if image exists locally
            if docker image inspect "$new_image_name" >/dev/null 2>&1; then
                log_success "Image $new_image_name found locally"
            else
                log_warn "Image $new_image_name not found locally (skipping pull in OFFLINE_MODE)"
            fi
        done
    else
        log_step "Pulling platform-specific container images"
        # Process containers
        for container in "${vanilla_containers[@]}"; do
            base_name=$(echo "$container" | awk -F'/' '{print $NF}' | awk -F':' '{print $1}')
            
            # Special case for amperecomputingai/llama.cpp
            if [[ "$base_name" == "llama.cpp" ]]; then
                base_name="ampere-llamacpp"
            fi

        new_image_name="${kamiwaza_prefix}-${base_name}:${kamiwaza_version}-${arch}"
        new_image_name=$(echo "$new_image_name" | tr '[:upper:]' '[:lower:]')

        if [[ "$base_name" == "vllm-openai-v${KAMIWAZA_VLLM_VERSION:-0.13.0}" ]]; then
            if [[ "${KAMIWAZA_OFFLINE:-False}" == "true" ]]; then
                if docker image inspect "${new_image_name}" >/dev/null 2>&1; then
                    log_info "Offline mode: vLLM image ${new_image_name} already present"
                else
                    log_warn "Offline mode: vLLM image ${new_image_name} not present locally; deployment may fail"
                fi
            else
                if ! pull_and_tag_vllm_image "${base_name}"; then
                    log_warn "Falling back to pulling ${new_image_name} directly"
                    docker pull --platform "linux/$arch" "${new_image_name}" || log_error "Failed to pull fallback image ${new_image_name}"
                fi
            fi
            continue
        fi

            log_info "Pulling $new_image_name for architecture $arch..."
            if ! docker pull --platform "linux/$arch" "$new_image_name"; then
                log_error "Failed to pull image $new_image_name for architecture $arch"
            else
                log_success "Successfully pulled $new_image_name"
            fi
        done
    fi
fi

# Pull frontend image
if [[ "$OFFLINE_MODE" == "true" ]]; then
    log_step "Checking frontend container image (OFFLINE_MODE)"
    frontend_tags=("latest-${arch}" "latest")
    frontend_found=false

    for tag in "${frontend_tags[@]}"; do
        if docker image inspect "kamiwazaai/frontend:${tag}" >/dev/null 2>&1; then
            log_success "Frontend image kamiwazaai/frontend:${tag} found locally"
            frontend_found=true
            break
        fi
    done

    if [ "$frontend_found" = false ]; then
        log_warn "No frontend image found locally (skipping pull in OFFLINE_MODE)"
        log_warn "Frontend may need to be built locally with: ./build-scripting/build-frontend.sh"
    fi
else
    log_step "Pulling frontend container image"
    # Frontend is a multi-arch image (no arch suffix in versioned tags)
    # Try versioned tag first, then fall back to latest
    if [[ -n "${kamiwaza_version:-}" && "${kamiwaza_version}" != "latest" ]]; then
        frontend_tags=("${kamiwaza_version}" "latest-${arch}" "latest")
    else
        frontend_tags=("latest-${arch}" "latest")
    fi
    frontend_pulled=false

    for tag in "${frontend_tags[@]}"; do
        log_info "Attempting to pull kamiwazaai/frontend:${tag}..."
        if docker pull "kamiwazaai/frontend:${tag}" 2>/dev/null; then
            log_success "Successfully pulled kamiwazaai/frontend:${tag}"
            frontend_pulled=true
            break
        else
            log_warn "Failed to pull kamiwazaai/frontend:${tag}"
        fi
    done

    if [ "$frontend_pulled" = false ]; then
        log_warn "No frontend image could be pulled from registry"
        log_warn "Frontend may need to be built locally with: ./build-scripting/build-frontend.sh"
    fi
fi

# Define the ordered requirements - components that need to start in sequence
ordered_requirements=("etcd")  # Add more as needed

log_step "Starting ordered requirements: ${ordered_requirements[*]}"

# Process ordered requirements first
for requirement in "${ordered_requirements[@]}"; do
    log_info "Processing ordered requirement: $requirement"
    for component_path in $(find kamiwaza/deployment -mindepth 1 -maxdepth 1 -type d | grep -v '/envs$' | grep "$requirement"); do
        component=$(basename $component_path)
        
        log_info "Found component: $component"
        
        # Skip if component is excluded
        exclude=false
        for excluded in "${excluded_containers[@]}"; do
            if [[ "$component" == *"$excluded"* ]]; then
                exclude=true
                break
            fi
        done
        if [[ "$exclude" == true ]]; then
            log_info "Skipping $component because it is excluded"
            continue
        fi

        # Determine the appropriate architecture suffixes to create directories for
        arch_suffixes=() # Initialize an empty array to hold potential architecture suffixes
        if [[ -d "kamiwaza/deployment/${component}/${arch}" ]]; then
            arch_suffixes+=("${arch}")
        fi
        if [[ -d "kamiwaza/deployment/${component}/${arch}-cpu" ]]; then
            arch_suffixes+=("${arch}-cpu")
        fi
        if [[ -d "kamiwaza/deployment/${component}/${arch}-gpu" ]]; then
            arch_suffixes+=("${arch}-gpu")
        fi

        # Create environment specific directories based on the architecture suffixes found
        for suffix in "${arch_suffixes[@]}"; do
            mkdir -p "kamiwaza/deployment/envs/${KAMIWAZA_ENV}/${component}/${suffix}"
        done

        # Copy files from both architecture and architecture with CPU/GPU suffix if they exist
        for suffix in "${arch_suffixes[@]}"; do
            for file in docker-compose.yml prelaunch.sh postlaunch.sh launch.sh; do
                if [[ -f "kamiwaza/deployment/${component}/${suffix}/${file}" ]]; then
                    cp "kamiwaza/deployment/${component}/${suffix}/${file}" "kamiwaza/deployment/envs/${KAMIWAZA_ENV}/${component}/${suffix}/"
                fi
            done
            # Copy any other *.yml files in the source to the target
            for yml_file in kamiwaza/deployment/${component}/${suffix}/*.yml; do
                if [[ -f "$yml_file" ]]; then
                    cp "$yml_file" "kamiwaza/deployment/envs/${KAMIWAZA_ENV}/${component}/${suffix}/"
                fi
            done
            # Copy any *.py files in the source to the target
            for py_file in kamiwaza/deployment/${component}/${suffix}/*.py; do
                if [[ -f "$py_file" ]]; then
                    cp "$py_file" "kamiwaza/deployment/envs/${KAMIWAZA_ENV}/${component}/${suffix}/"
                fi
            done
            # Copy traefik plugins
            if [ -d "kamiwaza/deployment/${component}/${suffix}/.plugins-local" ]; then
                cp -R "kamiwaza/deployment/${component}/${suffix}/.plugins-local" "kamiwaza/deployment/envs/${KAMIWAZA_ENV}/${component}/${suffix}/"
            fi
            # Copy Grafana provisioning files (dashboards, datasources) if present
            if [ -d "kamiwaza/deployment/${component}/${suffix}/grafana-provisioning" ]; then
                cp -R "kamiwaza/deployment/${component}/${suffix}/grafana-provisioning" "kamiwaza/deployment/envs/${KAMIWAZA_ENV}/${component}/${suffix}/"
            fi
        done

        # Determine the appropriate architecture folder considering CPU/GPU suffix for further operations
        if [[ "$cpugpu" == "gpu" && -d "kamiwaza/deployment/envs/${KAMIWAZA_ENV}/${component}/${arch}-gpu" ]]; then
            arch_folder="${arch}-gpu"
        elif [[ -d "kamiwaza/deployment/envs/${KAMIWAZA_ENV}/${component}/${arch}" ]]; then
            arch_folder="${arch}"
        elif [[ "$cpugpu" == "cpu" && -d "kamiwaza/deployment/envs/${KAMIWAZA_ENV}/${component}/${arch}-cpu" ]]; then
            arch_folder="${arch}-cpu"
        else
            echo "No suitable architecture folder found for component: ${component}"
            continue
        fi

        # Navigate to the component directory
        cd "kamiwaza/deployment/envs/${KAMIWAZA_ENV}/${component}/${arch_folder}"

        # Execute lifecycle scripts if they exist
        if [[ "$component" == *"etcd"* && "$RESET_ETCD" -eq 1 ]]; then
            # Get the project name and container name properly evaluated
            project_name="${KAMIWAZA_ENV:-default}-kamiwaza-etcd"
            container_name="${KAMIWAZA_ENV:-default}_kamiwaza-etcd-${LOCAL_HOSTNAME}"

            # Check if etcd is already running by inspecting the compose project
            if ! $DOCKER_COMPOSE -p "$project_name" ps --format json | grep -q '"State":"running"'; then
                echo "Resetting etcd state before startup since etcd is not running"

                #docker service rm dummy-network-propagation || true

                docker rm ${KAMIWAZA_ENV:-default}_kamiwaza-etcd-delay-1 || true

                # Test if we can sudo without password by running a harmless command
                if sudo -n true 2>/dev/null; then
                    sudo rm -rf "${DOCKER_VOLUME_DIRECTORY:-.}/volumes/etcd"/*.etcd
                else
                    # Try without sudo
                    if ! rm -rf "${DOCKER_VOLUME_DIRECTORY:-.}/volumes/etcd"/*.etcd 2>/dev/null; then
                        echo "ERROR: We need sudo access to delete ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/etcd/*.etcd but you're not in sudoers"
                        echo "Please run this command as a user with sudo access:"
                        echo "sudo rm -rf ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/etcd/*.etcd"
                        echo "If this is unexpected, please report it to the Kamiwaza team via Discord or support@kamiwaza.ai"
                        exit 1
                    fi
                fi
            else
                if [[ "$RESET_HARD" -eq 1 ]]; then
                    echo "Hard reset requested - stopping etcd"
                    $DOCKER_COMPOSE -p "$project_name" down -v
                    # Wait for etcd to fully stop
                    while $DOCKER_COMPOSE -p "$project_name" ps --format json | grep -q '"State":"running"'; do
                        echo "Waiting for etcd to stop..."
                        sleep 2
                    done
                    echo "Resetting etcd state"
                    
                    # Remove the dummy service and network
                    #docker service rm dummy-network-propagation || true
                    docker rm ${KAMIWAZA_ENV:-default}_kamiwaza-etcd-delay-1 || true

                    # Test if we can sudo without password by running a harmless command
                    if sudo -n true 2>/dev/null; then
                        sudo rm -rf "${DOCKER_VOLUME_DIRECTORY:-.}/volumes/etcd"/*.etcd
                    else
                        # Try without sudo
                        if ! rm -rf "${DOCKER_VOLUME_DIRECTORY:-.}/volumes/etcd"/*.etcd 2>/dev/null; then
                            echo "ERROR: We need sudo access to delete ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/etcd/*.etcd but you're not in sudoers"
                            echo "Please run this command as a user with sudo access:"
                            echo "sudo rm -rf ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/etcd/*.etcd"
                            echo "If this is unexpected, please report it to the Kamiwaza team via Discord or support@kamiwaza.ai"
                            exit 1
                        fi
                    fi
                    
                    echo "Bringing etcd back up"
                    $DOCKER_COMPOSE -p "$project_name" up -d
                    
                    # Restart Traefik to ensure it reconnects to etcd with fresh state
                    echo "Restarting Traefik to ensure proper etcd connection..."
                    traefik_project="${KAMIWAZA_ENV:-default}-kamiwaza-traefik"
                    if $DOCKER_COMPOSE -p "$traefik_project" ps --format json 2>/dev/null | grep -q '"State":"running"'; then
                        $DOCKER_COMPOSE -p "$traefik_project" restart
                        echo "Traefik restarted successfully"
                    else
                        echo "Traefik not running, will be started normally later"
                    fi
                else
                    echo "Etcd is already running, skipping reset (use --hard for forced reset)"
                fi

            fi

        fi

        # Execute prelaunch scripts with proper error handling and logging
        if [[ -f "prelaunch.sh" ]]; then
            log_info "Running prelaunch script for $component..."
            if [[ "$component" == *"traefik"* || "$component" == *"etcd"* || "$component" == *"keycloak"* ]]; then
                # Run in the *current* shell so exports from prelaunch.sh persist.
                # DO NOT put `source` on the left of a pipe or the env changes are lost.
                if ! { source "prelaunch.sh"; } > >(tee prelaunch.log) 2>&1; then
                    log_error "prelaunch.sh failed for $component (sourced)"
                    log_error "Check prelaunch.log in $(pwd) for details"
                    exit 1
                fi
            else
                if ! { bash "prelaunch.sh"; } > >(tee prelaunch.log) 2>&1; then
                    log_error "prelaunch.sh failed for $component (bash)"
                    log_error "Check prelaunch.log in $(pwd) for details"
                    exit 1
                fi
            fi
            log_success "Prelaunch script completed successfully for $component"
            fi
            # Check for skip_once file
            if [[ -f ".skip_once" ]]; then
                log_info "Found .skip_once file - skipping startup for $component"
                rm -f ".skip_once"
            elif [[ -f "launch.sh" ]]; then
                # Execute launch script instead of docker compose (launch.sh handles container startup)
                log_info "Running launch script for $component (handles container startup)..."
                
                # Source launch.sh in isolated subshell with safety options
                if (
                    set -Eeuo pipefail
                    source launch.sh
                ) 2>&1 | tee launch.log; then
                    log_success "Launch script completed successfully for $component"
                else
                    rc=$?
                    log_error "launch.sh failed for $component (exit code $rc)"
                    log_error "Check launch.log in $(pwd) for details"
                    exit 1
                fi
            else
                # Start the containers with docker compose
                log_info "Starting containers for $component..."
                if ! $DOCKER_COMPOSE -f "docker-compose.yml" -p "${KAMIWAZA_ENV}-${component}" up -d; then
                    log_error "Failed to start containers for $component"
                    exit 1
                fi
                log_success "Containers started successfully for $component"
            fi
        
        # Execute postlaunch script if it exists
        if [[ -f "postlaunch.sh" ]]; then
            log_info "Running postlaunch script for $component..."
            sleep 5
            if ! bash "postlaunch.sh" 2>&1 | tee postlaunch.log; then
                log_warn "postlaunch.sh failed for $component (non-critical)"
                log_warn "Check postlaunch.log in $(pwd) for details"
            else
                log_success "Postlaunch script completed successfully for $component"
            fi
        fi

        # Return to the original directory
        cd - > /dev/null
    done
    
    # Optional: Add delay or health check after each requirement type
    log_info "Waiting for $requirement services to be healthy..."
    sleep 5
done

log_step "Starting remaining components"

# Then process remaining components
for component_path in $(find kamiwaza/deployment -mindepth 1 -maxdepth 1 -type d | grep -v '/envs$'); do
    component=$(basename $component_path)
    
    # Skip if this component was already processed in requirements
    skip=false
    for requirement in "${ordered_requirements[@]}"; do
        if [[ "$component" == *"$requirement"* ]]; then
            skip=true
            break
        fi
    done
    if [[ "$skip" == true ]]; then
        continue
    fi
    
    # If a component argument is provided, skip components that don't match
    if [[ -n "$component_arg" && "$component" != *"$component_arg"* ]]; then
        continue
    fi

    log_info "Processing component: $component"
    
    # Check if the component is excluded
    exclude=false
    for excluded in "${excluded_containers[@]}"; do
        if [[ "$component" == *"$excluded"* ]]; then
            exclude=true
            break
        fi
    done
    # In Lite mode, skip Keycloak deployment
    if [[ "${KAMIWAZA_USE_AUTH:-false}" != "true" && "$component" == *"kamiwaza-keycloak"* ]]; then
        log_info "Skipping $component because authentication is disabled"
        continue
    fi
    if [[ "$exclude" == true ]]; then
        log_info "Skipping $component because it is excluded"
        continue
    fi

    # Determine the appropriate architecture suffixes to create directories for
    arch_suffixes=() # Initialize an empty array to hold potential architecture suffixes
    if [[ -d "kamiwaza/deployment/${component}/${arch}" ]]; then
        arch_suffixes+=("${arch}")
    fi
    if [[ -d "kamiwaza/deployment/${component}/${arch}-cpu" ]]; then
        arch_suffixes+=("${arch}-cpu")
    fi
    if [[ -d "kamiwaza/deployment/${component}/${arch}-gpu" ]]; then
        arch_suffixes+=("${arch}-gpu")
    fi

    # Create environment specific directories based on the architecture suffixes found
    for suffix in "${arch_suffixes[@]}"; do
        mkdir -p "kamiwaza/deployment/envs/${KAMIWAZA_ENV}/${component}/${suffix}"
    done

    # Copy files from both architecture and architecture with CPU/GPU suffix if they exist
    for suffix in "${arch_suffixes[@]}"; do
        for file in docker-compose.yml prelaunch.sh postlaunch.sh launch.sh; do
            if [[ -f "kamiwaza/deployment/${component}/${suffix}/${file}" ]]; then
                cp "kamiwaza/deployment/${component}/${suffix}/${file}" "kamiwaza/deployment/envs/${KAMIWAZA_ENV}/${component}/${suffix}/"
            fi
        done
        # Copy any other *.yml files in the source to the target
        for yml_file in kamiwaza/deployment/${component}/${suffix}/*.yml; do
            if [[ -f "$yml_file" ]]; then
                cp "$yml_file" "kamiwaza/deployment/envs/${KAMIWAZA_ENV}/${component}/${suffix}/"
            fi
        done
        # Copy any *.py files in the source to the target
        for py_file in kamiwaza/deployment/${component}/${suffix}/*.py; do
            if [[ -f "$py_file" ]]; then
                cp "$py_file" "kamiwaza/deployment/envs/${KAMIWAZA_ENV}/${component}/${suffix}/"
            fi
        done
        # Copy traefik plugins
        if [ -d "kamiwaza/deployment/${component}/${suffix}/.plugins-local" ]; then
            cp -R "kamiwaza/deployment/${component}/${suffix}/.plugins-local" "kamiwaza/deployment/envs/${KAMIWAZA_ENV}/${component}/${suffix}/"
        fi
        # Copy Keycloak realm-config (realm import JSON) if present
        if [ -d "kamiwaza/deployment/${component}/${suffix}/realm-config" ]; then
            echo "COPY:::: " "kamiwaza/deployment/${component}/${suffix}/realm-config" "kamiwaza/deployment/envs/${KAMIWAZA_ENV}/${component}/${suffix}/"
            cp -R "kamiwaza/deployment/${component}/${suffix}/realm-config" "kamiwaza/deployment/envs/${KAMIWAZA_ENV}/${component}/${suffix}/"
        fi
        # Copy Grafana provisioning files (dashboards, datasources) if present
        if [ -d "kamiwaza/deployment/${component}/${suffix}/grafana-provisioning" ]; then
            cp -R "kamiwaza/deployment/${component}/${suffix}/grafana-provisioning" "kamiwaza/deployment/envs/${KAMIWAZA_ENV}/${component}/${suffix}/"
        fi
    done

    # Determine the appropriate architecture folder considering CPU/GPU suffix for further operations
    case "$cpugpu" in
        gpu)
            if [[ -d "kamiwaza/deployment/envs/${KAMIWAZA_ENV}/${component}/${arch}-gpu" ]]; then
                arch_folder="${arch}-gpu"
            elif [[ -d "kamiwaza/deployment/envs/${KAMIWAZA_ENV}/${component}/${arch}" ]]; then
                arch_folder="${arch}"
            fi
            ;;
        cpu)
            if [[ -d "kamiwaza/deployment/envs/${KAMIWAZA_ENV}/${component}/${arch}-cpu" ]]; then
                arch_folder="${arch}-cpu"
            elif [[ -d "kamiwaza/deployment/envs/${KAMIWAZA_ENV}/${component}/${arch}" ]]; then
                arch_folder="${arch}"
            fi
            ;;
        *)
            if [[ -d "kamiwaza/deployment/envs/${KAMIWAZA_ENV}/${component}/${arch}" ]]; then
                arch_folder="${arch}"
            fi
            ;;
    esac

    if [[ -z "${arch_folder:-}" ]]; then
        echo "No suitable architecture folder found for component: ${component}"
        continue
    fi

    # shellcheck disable=SC2031  # KAMIWAZA_LITE/KAMIWAZA_USE_AUTH exported earlier
    if [[ "$component" == *"kamiwaza-traefik"* ]]; then
        # Determine arch folder and select dynamic file
        if [[ "${KAMIWAZA_LITE:-}" == "true" && "${KAMIWAZA_USE_AUTH:-false}" != "true" ]]; then
            if [[ -f "kamiwaza/deployment/envs/${KAMIWAZA_ENV}/${component}/${arch_folder}/traefik-dynamic.lite.yml" ]]; then
                cp "kamiwaza/deployment/envs/${KAMIWAZA_ENV}/${component}/${arch_folder}/traefik-dynamic.lite.yml" \
                   "kamiwaza/deployment/envs/${KAMIWAZA_ENV}/${component}/${arch_folder}/traefik-dynamic.yml"
            fi
        else
            if [[ -f "kamiwaza/deployment/envs/${KAMIWAZA_ENV}/${component}/${arch_folder}/traefik-dynamic.full.yml" ]]; then
                cp "kamiwaza/deployment/envs/${KAMIWAZA_ENV}/${component}/${arch_folder}/traefik-dynamic.full.yml" \
                   "kamiwaza/deployment/envs/${KAMIWAZA_ENV}/${component}/${arch_folder}/traefik-dynamic.yml"
            fi
        fi
    fi

    # Navigate to the component directory
    cd "kamiwaza/deployment/envs/${KAMIWAZA_ENV}/${component}/${arch_folder}"

    # Phase 1: Execute prelaunch scripts
    if [[ -f "prelaunch.sh" ]]; then
        log_info "Running prelaunch script for $component..."
        if [[ "$component" == *"traefik"* || "$component" == *"etcd"* || "$component" == *"keycloak"* ]]; then
            # shellcheck disable=SC1091
            # Run in the *current* shell so exports from prelaunch.sh persist.
            # DO NOT put `source` on the left of a pipe or the env changes are lost.
            if ! { source "prelaunch.sh"; } > >(tee prelaunch.log) 2>&1; then
                log_error "prelaunch.sh failed for $component (sourced)"
                log_error "Check prelaunch.log in $(pwd) for details"
                exit 1
            fi
        else
            if ! { bash "prelaunch.sh"; } > >(tee prelaunch.log) 2>&1; then
                log_error "prelaunch.sh failed for $component (bash)"
                log_error "Check prelaunch.log in $(pwd) for details"
                exit 1
            fi
        fi
        log_success "Prelaunch script completed successfully for $component"
    fi
    
    # Phase 2: Container startup (skip, launch, or docker compose)
    if [[ -f ".skip_once" ]]; then
        log_info "Found .skip_once file - skipping startup for $component"
        rm -f ".skip_once"
    elif [[ -f "launch.sh" ]]; then
        # Execute launch script instead of docker compose (launch.sh handles container startup)
        log_info "Running launch script for $component (handles container startup)..."
        if (
            set -Eeuo pipefail
            source launch.sh
        ) 2>&1 | tee launch.log; then
            log_success "Launch script completed successfully for $component"
        else
            rc=$?
            log_error "launch.sh failed for $component (exit code $rc)"
            log_error "Check launch.log in $(pwd) for details"
            exit 1
        fi
    else
        # Start the containers with docker compose
        log_info "Starting containers for $component..."
        if [[ "$component" == *"kamiwaza-traefik"* ]]; then
            if command -v lsof >/dev/null 2>&1; then
                listener_output=$(lsof -nP -iTCP:61100-61200 -sTCP:LISTEN 2>/dev/null || true)
            elif command -v ss >/dev/null 2>&1; then
                listener_output=$(ss -ltn 2>/dev/null | awk 'NR > 1 {split($4,addr,":"); port=addr[length(addr)]; if (port >= 61100 && port <= 61200) print $0}')
            else
                listener_output=""
                log_warn "Unable to verify Traefik port availability (missing lsof/ss); continuing"
            fi

            if [[ -n "${listener_output:-}" ]]; then
                project_name="${KAMIWAZA_ENV:-default}-${component}"
                existing_ids=$($DOCKER_COMPOSE -f "docker-compose.yml" -p "$project_name" ps --status running --format '{{.ID}}' 2>/dev/null || true)
                if [[ -n "${existing_ids//[$'\t\r\n ']/}" ]]; then
                    log_info "Traefik is already running for project ${project_name}; skipping port warning."
                else
                    log_error "Ports 61100-61200 are already in use; Traefik requires exclusive access to this range."
                    printf '%s\n' "$listener_output" | head -n 10
                    pids=$(printf '%s\n' "$listener_output" | awk '{print $2}' | awk 'NF' | sort -u | tr '\n' ' ')
                    if [[ -n "${pids//[$'\t\r\n ']/}" ]]; then
                        log_error "Processes holding the port range: ${pids}"
                        log_error "Stop the offending processes and rerun containers-up.sh (for example: sudo kill ${pids})."
                    else
                        log_error "Stop the listed processes above and rerun containers-up.sh."
                    fi
                    exit 1
                fi
            fi
            unset listener_output
        fi
        if ! $DOCKER_COMPOSE -f "docker-compose.yml" -p "${KAMIWAZA_ENV}-${component}" up -d; then
            log_error "Failed to start containers for $component"
            exit 1
        fi
        log_success "Containers started successfully for $component"
    fi
    
    # Phase 3: Execute postlaunch scripts
    if [[ -f "postlaunch.sh" ]]; then
        log_info "Running postlaunch script for $component..."
        sleep 5
        if ! bash "postlaunch.sh" 2>&1 | tee postlaunch.log; then
            log_warn "postlaunch.sh failed for $component (non-critical)"
            log_warn "Check postlaunch.log in $(pwd) for details"
        else
            log_success "Postlaunch script completed successfully for $component"
        fi
    fi

    # Return to the original directory
    cd - > /dev/null
done

# Connect Traefik to networks after all components are up
connect_traefik_to_network() {
    local network_name=$1
    if docker network inspect "$network_name" >/dev/null 2>&1; then
        if ! docker network inspect "$network_name" | grep -q "${KAMIWAZA_ENV:-default}_kamiwaza-traefik"; then
            log_info "Connecting Traefik to network: $network_name"
            docker network connect "$network_name" "${KAMIWAZA_ENV:-default}_kamiwaza-traefik" || true
        fi
    fi
}

log_step "Configuring Traefik network connections"

# Connect Traefik to the backend network
# the etcd network is already connected to traefik by default based on the compose configs
#connect_traefik_to_network "kamiwaza-etcd-${KAMIWAZA_ETCD_NODE_NAME:-default}"

connect_traefik_to_network "${KAMIWAZA_ENV:-default}_kamiwaza-backend"
# Only connect to milvus network if milvus is enabled
if [[ "${KAMIWAZA_MILVUS_ENABLED:-true}" == "true" ]]; then
    connect_traefik_to_network "${KAMIWAZA_ENV:-default}_kamiwaza-milvus"
fi
connect_traefik_to_network "${KAMIWAZA_ENV:-default}_datahub_network"
connect_traefik_to_network "${KAMIWAZA_ENV:-default}_kamiwaza-keycloak"

# TODO: Update docker-compose files to use these networks instead of exposing ports directly
# This will involve modifying the network configurations in each docker-compose.yml file
# to use the appropriate networks created above, and removing direct port mappings
# except for Traefik's public ports.

# TODO: Set up Traefik rules to route traffic to the appropriate backend services
# This will involve creating Traefik configuration to handle routing based on
# hostnames, paths, or other criteria to the correct backend services on their
# respective networks.

log_success "Traefik has been connected to necessary networks"


# Check for database reset flag
# shellcheck disable=SC2031 # KAMIWAZA_ROOT is static for this script; subshell assignments don't apply here
if [ -f "${KAMIWAZA_ROOT}/.kamiwaza-db-reset" ] && [ "${IS_WORKER:-999}" -eq 0 ]; then
    log_step "Initializing Kamiwaza databases"
    log_info "DB Init flag set: initializing Kamiwaza databases..."
    if [ -f "${KAMIWAZA_ROOT}/util/admin_db_reset.py" ]; then
        max_attempts=3
        attempt=1
        
        while [ $attempt -le $max_attempts ]; do
            if "${KAMIWAZA_ROOT}/.venv/bin/python3" "${KAMIWAZA_ROOT}/util/admin_db_reset.py" --reset=false; then
                break
            else
                if [ $attempt -eq $max_attempts ]; then
                    log_error "Database initialization failed after $max_attempts attempts"
                    exit 1
                fi
                log_warn "Database initialization attempt $attempt failed, retrying in 5 seconds..."
                sleep 5
                ((attempt++))
            fi
        done
    else
        log_error "Database initialization script not found - contact support@kamiwaza.ai"
        exit 1
    fi
    rm -f "${KAMIWAZA_ROOT}/.kamiwaza-db-reset"
    log_success "Database initialization completed"
fi

if [ "${IS_WORKER:-999}" -ne 0 ]; then
    log_info "Worker node detected, restarting dummy container"
    bash restart-dummy-container.sh
fi

log_step "Container startup completed successfully"
log_success "All Kamiwaza containers have been started"

exit 0
