# shellcheck shell=bash

get_user_group() {
    case "$(uname)" in
        "Darwin")
            echo "staff"
            ;;
        *)
            echo "${USER}"
            ;;
    esac
}

# Common logging functions for main scripts
# Can set LOG_PREFIX for component-specific logging
log_info() {
    echo "[${LOG_PREFIX:-INFO}] $1"
}

log_warn() {
    echo "[${LOG_PREFIX:-WARN}] $1"
}

log_error() {
    echo "[${LOG_PREFIX:-ERROR}] $1"
}

log_success() {
    echo "[${LOG_PREFIX:-SUCCESS}] $1"
}

log_step() {
    echo ""
    echo "=========================================="
    echo "${LOG_PREFIX:-STEP}: $1"
    echo "=========================================="
}

kw_resolve_fernet_identity() {
    if [[ -z "${KAMIWAZA_SECURITY_DIR:-}" ]]; then
        KAMIWAZA_SECURITY_DIR="/etc/kamiwaza/security"
    fi
    if [[ -z "${FERNET_KEY_PATH:-}" ]]; then
        FERNET_KEY_PATH="${KAMIWAZA_SECURITY_DIR}/fernet.key"
    fi

    if [[ -z "${FERNET_KEY_OWNER:-}" ]]; then
        if [[ -n "${KAMIWAZA_ENV_OWNER:-}" ]]; then
            FERNET_KEY_OWNER="${KAMIWAZA_ENV_OWNER}"
        elif [[ -n "${SUDO_USER:-}" ]]; then
            FERNET_KEY_OWNER="${SUDO_USER}"
        elif [[ -n "${USER:-}" ]]; then
            FERNET_KEY_OWNER="${USER}"
        elif command -v id >/dev/null 2>&1; then
            FERNET_KEY_OWNER="$(id -un 2>/dev/null || echo root)"
        else
            FERNET_KEY_OWNER="root"
        fi
    fi

    if [[ -z "${FERNET_KEY_GROUP:-}" ]]; then
        if [[ -n "${KAMIWAZA_ENV_GROUP:-}" ]]; then
            FERNET_KEY_GROUP="${KAMIWAZA_ENV_GROUP}"
        elif [[ -n "${FERNET_KEY_OWNER:-}" ]] && command -v id >/dev/null 2>&1; then
            FERNET_KEY_GROUP="$(id -gn "${FERNET_KEY_OWNER}" 2>/dev/null || id -gn 2>/dev/null || echo "${FERNET_KEY_OWNER}")"
        else
            FERNET_KEY_GROUP="${FERNET_KEY_OWNER:-root}"
        fi
    fi
}

kw_ensure_fernet_permissions() {
    kw_resolve_fernet_identity
    local target="${1:-${FERNET_KEY_PATH:-}}"
    if [[ -z "$target" ]]; then
        log_warn "Fernet key path is undefined; skipping permission enforcement"
        return 1
    fi
    if ! sudo test -f "$target"; then
        return 0
    fi
    local owner="${FERNET_KEY_OWNER:-}"
    local group="${FERNET_KEY_GROUP:-${owner}}"
    if [[ -n "$owner" ]]; then
        if ! sudo chown "${owner}:${group:-$owner}" "$target"; then
            log_warn "Unable to set ${owner}:${group:-$owner} on $target"
        fi
    else
        log_warn "FERNET_KEY_OWNER is empty; cannot set ownership on $target"
    fi
    if ! sudo chmod 600 "$target"; then
        log_warn "Unable to enforce permissions on $target"
    fi
}

kw_verify_fernet_key_readable() {
    kw_resolve_fernet_identity
    local owner="${1:-${FERNET_KEY_OWNER:-}}"
    local target="${2:-${FERNET_KEY_PATH:-}}"
    if [[ -z "$owner" || -z "$target" ]]; then
        log_warn "Unable to verify fernet key readability; owner/path missing"
        return 1
    fi
    if ! sudo test -f "$target"; then
        return 0
    fi
    if ! id "$owner" >/dev/null 2>&1; then
        log_warn "Unable to verify fernet key readability; user $owner not present"
        return 1
    fi
    if ! sudo -u "$owner" head -c 1 "$target" >/dev/null 2>&1; then
        log_warn "$owner cannot read $target; catalog/retrieval may fail"
        return 1
    fi
    return 0
}

# Function to log command execution
log_cmd() {
    log_info "Executing: $*"
    "$@"
}

# Check if a value represents a truthy flag (true/yes/on/1)
is_truthy() {
    local value="${1:-}"
    if [[ -z "$value" ]]; then
        return 1
    fi
    local normalized
    normalized="$(printf '%s' "$value" | tr '[:upper:]' '[:lower:]')"
    case "$normalized" in
        1|true|yes|on)
            return 0
            ;;
        *)
            return 1
            ;;
    esac
}

require_resolvable_host() {
    local host="$1"
    local label="${2:-hostname}"
    local allow_loopback="${3:-false}"

    if [[ -z "$host" ]]; then
        log_error "$label must be provided"
        exit 1
    fi

    if [[ "$allow_loopback" == "true" ]]; then
        case "$host" in
            localhost|127.0.0.1)
                return 0
                ;;
        esac
    fi

    if ! python3 - "$host" <<'PY'
import socket
import sys

target = sys.argv[1]
try:
    socket.getaddrinfo(target, None)
except OSError:
    raise SystemExit(1)
PY
    then
        log_error "$label ('$host') does not resolve. Configure a routable hostname or IP that every node can reach."
        exit 1
    fi
}


# Drop Linux page cache on DGX Spark/UMA systems.
# On unified memory systems, Linux page cache can be mistakenly counted as "used" memory
# by PyTorch/Ray, causing false OOM errors during model launches.
# This function safely drops the cache to ensure accurate free memory reporting.
# Returns 0 on success, 1 on failure (non-fatal - we log and continue).
drop_page_cache_if_dgx_spark() {
    if ! is_dgx_spark; then
        return 0
    fi

    log_info "DGX Spark detected: dropping page cache to ensure accurate free memory reporting"

    # sync first to flush any pending writes
    if ! sync; then
        log_warn "sync failed before dropping page cache"
    fi

    # Drop page cache (3 = drop pagecache, dentries, and inodes)
    # This requires root/sudo privileges
    if command -v sudo >/dev/null 2>&1; then
        if sudo sh -c 'echo 3 > /proc/sys/vm/drop_caches' 2>/dev/null; then
            log_info "Page cache dropped successfully"
            return 0
        else
            log_warn "Failed to drop page cache (sudo may require password or permissions)"
            return 1
        fi
    else
        # Try without sudo (in case we're already root)
        if sh -c 'echo 3 > /proc/sys/vm/drop_caches' 2>/dev/null; then
            log_info "Page cache dropped successfully"
            return 0
        else
            log_warn "Failed to drop page cache (requires root privileges)"
            return 1
        fi
    fi
}

# Detect DGX Spark/Grace Hopper class systems.
# Returns 0 when detected, 1 otherwise.
is_dgx_spark() {
    local override="${KAMIWAZA_FORCE_DGX_SPARK:-}"
    if [[ -n "$override" ]]; then
        if is_truthy "$override"; then
            return 0
        else
            return 1
        fi
    fi

    if ! command -v nvidia-smi >/dev/null 2>&1; then
        return 1
    fi

    # Check for UMA memory type first.
    if nvidia-smi -q 2>/dev/null | grep -qi 'Memory Type *: *UMA'; then
        return 0
    fi

    # Fallback to GPU name matching.
    local names
    names=$(nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null) || return 1
    local name_lower
    while IFS= read -r name_lower; do
        name_lower=$(printf '%s' "${name_lower}" | tr '[:upper:]' '[:lower:]')
        if [[ "$name_lower" == *spark* || "$name_lower" == *gh200* || "$name_lower" == *gb200* || "$name_lower" == *"grace hopper"* ]]; then
            return 0
        fi
    done <<< "${names}"
    return 1
}

# Track origin of generated or loaded secrets for better observability
record_secret_source() {
    local key="$1"
    local status="$2"
    local sanitized="${key//[^A-Za-z0-9_]/_}"
    eval "KAMIWAZA_SECRET_SOURCE_${sanitized}=\"${status}\""
}

get_secret_source() {
    local key="$1"
    local sanitized="${key//[^A-Za-z0-9_]/_}"
    local var="KAMIWAZA_SECRET_SOURCE_${sanitized}"
    printf '%s' "${!var:-unknown}"
}

log_secret_source_summary() {
    local env_origin="$1"
    local persisted_target="${KAMIWAZA_ENV_FILE_PATH:-${env_origin:-${KAMIWAZA_ROOT:-}/env.sh}}"
    local origin_desc="${env_origin:-environment variables}"
    local -a tracked_keys=(
        "AUTH_FORWARD_HEADER_SECRET"
        "AUTH_GATEWAY_EDGE_SHARED_SECRET"
        "KAMIWAZA_KEYCLOAK_ADMIN_PASSWORD"
    )
    local key
    local source_status

    for key in "${tracked_keys[@]}"; do
        source_status=$(get_secret_source "$key")
        case "$source_status" in
            existing)
                log_info "$key loaded from ${origin_desc}"
                ;;
            generated)
                log_info "$key generated and persisted to ${persisted_target}"
                ;;
            rotated)
                log_info "$key rotated and persisted to ${persisted_target}"
                ;;
            skipped)
                if [[ -n "${!key:-}" ]]; then
                    log_info "$key bootstrap skipped (retaining existing value)"
                else
                    log_warn "$key bootstrap skipped and no value present; set it manually before enabling auth."
                fi
                ;;
            missing)
                log_warn "$key remains unset after bootstrap attempt."
                ;;
            *)
                log_warn "Unable to determine source for $key (status: ${source_status})"
                ;;
        esac
    done
}

# Warn (or abort outside dev) when auth secrets are unset or using defaults
check_default_secret() {
    local name="$1"
    local value="$2"
    local default_value="$3"
    local env_name
    env_name="${KAMIWAZA_ENV:-dev}"
    env_name="$(printf '%s' "$env_name" | tr '[:upper:]' '[:lower:]')" # normalize to lowercase for comparisons

    # Default to production enforcement unless explicitly marked as dev-like
    local is_prod_env="true"
    case "$env_name" in
        dev|development|default|community|local)
            is_prod_env="false"
            ;;
    esac

    if [[ -z "$value" ]]; then
        if [[ "$is_prod_env" == "true" ]]; then
            log_error "### KAMIWAZA_AUTH_ERROR: $name must be set. Refusing to start."
            exit 1
        else
            log_warn "### KAMIWAZA_AUTH_WARNING: $name is unset. Set it in env.sh"
        fi
        return 1
    fi

    if [[ -n "$default_value" && "$value" == "$default_value" ]]; then
        if [[ "$is_prod_env" == "true" ]]; then
            log_error "### KAMIWAZA_AUTH_ERROR: $name cannot remain $default_value outside dev. Refusing to start."
            exit 1
        else
            log_warn "### KAMIWAZA_AUTH_WARNING: $name is still set to the default ($default_value)"
        fi
        return 1
    fi
}

enforce_auth_secret_defaults() {
    check_default_secret "KAMIWAZA_KEYCLOAK_ADMIN_PASSWORD" \
        "${KAMIWAZA_KEYCLOAK_ADMIN_PASSWORD:-}" "admin"
    check_default_secret "KEYCLOAK_ADMIN_PASSWORD" \
        "${KEYCLOAK_ADMIN_PASSWORD:-}" "admin"
    check_default_secret "AUTH_FORWARD_HEADER_SECRET" \
        "${AUTH_FORWARD_HEADER_SECRET:-}" ""
    check_default_secret "AUTH_GATEWAY_EDGE_SHARED_SECRET" \
        "${AUTH_GATEWAY_EDGE_SHARED_SECRET:-}" ""
}

# Safe deactivate function that handles both standard venv and pyenv-virtualenv
safe_deactivate() {
    if [[ -n "${VIRTUAL_ENV:-}" ]]; then
        # Check if we're in a pyenv-virtualenv environment
        if [[ "${PYENV_VIRTUALENV_INIT:-}" == "1" ]] || command -v pyenv-virtualenv-init &>/dev/null; then
            # Use source deactivate for pyenv-virtualenv
            # shellcheck disable=SC1091
            source deactivate 2>/dev/null || true
        else
            # Use regular deactivate for standard venv
            type deactivate &>/dev/null && deactivate || true
        fi
    fi
}

# Source set-kamiwaza-root.sh if it exists in the script's directory
COMMON_DIR="$(dirname "${BASH_SOURCE[0]}")"

if [[ -f "${COMMON_DIR}/set-kamiwaza-root.sh" ]]; then
    source "${COMMON_DIR}/set-kamiwaza-root.sh"
else
    echo "Warning: set-kamiwaza-root.sh not found"
    exit 1
fi

if [[ -f "${COMMON_DIR}/common-python.sh" ]]; then
    # Provides kw_py for uv-backed Python execution.
    # shellcheck disable=SC1090
    source "${COMMON_DIR}/common-python.sh"
else
    echo "Warning: common-python.sh not found; kw_py unavailable" >&2
fi

if [ -f "${KAMIWAZA_ROOT}/.kamiwaza_install_community" ]; then
    export KAMIWAZA_COMMUNITY=true
fi

# common.sh
verify_environment() {
    # Fail fast if neither head nor worker configuration is present
    if [[ "${KAMIWAZA_SWARM_HEAD:-}" != "true" && -z "${KAMIWAZA_HEAD_IP:-}" ]]; then
        echo "Error: Must specify either KAMIWAZA_SWARM_HEAD=true or KAMIWAZA_HEAD_IP"
        exit 1
    fi

    # For worker nodes, HEAD_IP is required
    if [[ "${KAMIWAZA_SWARM_HEAD:-}" != "true" && -z "${KAMIWAZA_HEAD_IP:-}" ]]; then
        echo "Error: Worker nodes require KAMIWAZA_HEAD_IP to be set"
        exit 1
    fi
}

setup_environment() {
    # Try sourcing env.sh based on installation type
    local env_file_loaded=""
    if [[ "$(uname)" == "Darwin" ]] || [[ "${KAMIWAZA_COMMUNITY:-}" == "true" ]]; then
        local candidate="${KAMIWAZA_ROOT}/env.sh"
        if [[ -f "${candidate}" ]]; then
            # shellcheck disable=SC1090
            source "${candidate}"
            env_file_loaded="${candidate}"
        fi
    else
        if [[ -f /etc/kamiwaza/env.sh ]]; then
            # shellcheck disable=SC1090,SC1091
            source "/etc/kamiwaza/env.sh"
            env_file_loaded="/etc/kamiwaza/env.sh"
        elif [[ -f "${KAMIWAZA_ROOT}/env.sh" ]]; then
            # shellcheck disable=SC1090
            source "${KAMIWAZA_ROOT}/env.sh"
            env_file_loaded="${KAMIWAZA_ROOT}/env.sh"
        fi
    fi


    unset KAMIWAZA_GENERATED_SECRETS

    if [[ "${KAMIWAZA_SKIP_SECRET_BOOTSTRAP:-}" == "true" ]]; then
        log_info "Skipping auth secret bootstrap (KAMIWAZA_SKIP_SECRET_BOOTSTRAP=true)"
        local -a skipped_keys=("AUTH_FORWARD_HEADER_SECRET" "AUTH_GATEWAY_EDGE_SHARED_SECRET")
        local key
        for key in "${skipped_keys[@]}"; do
            if [[ -n "${!key:-}" ]]; then
                record_secret_source "$key" "existing"
            else
                record_secret_source "$key" "skipped"
            fi
        done
    else
        local should_generate=false
        if [[ "${KAMIWAZA_SWARM_HEAD:-}" == "true" || "${KAMIWAZA_ORIG_NODE_TYPE:-}" == "head" || "${KAMIWAZA_COMMUNITY:-}" == "true" || "$(uname)" == "Darwin" ]]; then
            should_generate=true
        elif [[ -z "${KAMIWAZA_HEAD_IP:-}" ]]; then
            should_generate=true
        else
            local best_head_ip
            best_head_ip=$(best_ip_for_hostname)
            if [[ -n "$best_head_ip" && "${KAMIWAZA_HEAD_IP}" == "$best_head_ip" ]]; then
                should_generate=true
            fi
        fi

        # Ensure new auth secrets exist before enforcing guard rails so first boot
        # can proceed without manual edits.
        if $should_generate; then
            ensure_random_secret "AUTH_FORWARD_HEADER_SECRET"
            ensure_random_secret "AUTH_GATEWAY_EDGE_SHARED_SECRET"

            if [[ -n "${KAMIWAZA_GENERATED_SECRETS:-}" ]]; then
                log_info "Generated auth secret(s): ${KAMIWAZA_GENERATED_SECRETS}. Refreshing compose overlays."
                if [[ -x "${KAMIWAZA_ROOT}/copy-compose.sh" ]]; then
                    if ! (cd "${KAMIWAZA_ROOT}" && bash copy-compose.sh); then
                        log_warn "copy-compose.sh failed while updating for new secrets"
                    fi
                fi
                unset KAMIWAZA_GENERATED_SECRETS
            fi
        else
            local -a existing_keys=("AUTH_FORWARD_HEADER_SECRET" "AUTH_GATEWAY_EDGE_SHARED_SECRET")
            local key
            for key in "${existing_keys[@]}"; do
                if [[ -n "${!key:-}" ]]; then
                    record_secret_source "$key" "existing"
                else
                    record_secret_source "$key" "skipped"
                fi
            done
        fi
    fi

    verify_environment

    # Set up environment if not already configured. This also fills in
    # missing secrets (Keycloak admin, etc.) so that auth enforcement below
    # sees consistent values.
    # Note: Condition matches secret generation logic above (line 445) to ensure
    # community/macOS installs always run setup_head_env even if KAMIWAZA_SWARM_HEAD
    # was not persisted (e.g., developer_install.sh restores original env.sh).
    if [[ "${KAMIWAZA_SWARM_HEAD:-}" == "true" || "${KAMIWAZA_ORIG_NODE_TYPE:-}" == "head" || "${KAMIWAZA_COMMUNITY:-}" == "true" || "$(uname)" == "Darwin" ]]; then
        setup_head_env
    elif [[ -n "${KAMIWAZA_HEAD_IP:-}" ]]; then
        setup_worker_env
    fi

    if ! enforce_auth_secret_defaults; then
        local env_name="${KAMIWAZA_ENV:-dev}"
        env_name="$(printf '%s' "$env_name" | tr '[:upper:]' '[:lower:]')"
        case "$env_name" in
            dev|development|default|community|local)
                true
                ;;
            *)
                return 1
                ;;
        esac
    fi
    log_secret_source_summary "${env_file_loaded}"

    # Re-source env.sh to ensure all persisted values are available in current shell
    local final_env_file="${KAMIWAZA_ENV_FILE_PATH:-}"
    if [[ -n "$final_env_file" && -f "$final_env_file" ]]; then
        set -a
        # shellcheck disable=SC1090
        source "$final_env_file"
        set +a
    fi
}

# Helper to upsert export lines into env.sh safely.
# Keep track of the resolved env.sh path within this shell so repeated
# set_env_value calls don't toggle between /etc and repo-local copies.
__KW_ENV_FILE_TARGET=""

__kw_write_env_export() {
    local env_file="$1"
    local key="$2"
    local value="$3"
    local use_sudo="${4:-false}"

    if [[ -z "$env_file" || -z "$key" ]]; then
        log_error "Invalid arguments while writing environment export (file: '$env_file', key: '$key')"
        return 1
    fi

    local writer_cmd=()
    if [[ "$use_sudo" == "true" ]]; then
        writer_cmd=(sudo python3 - "$env_file" "$key" "$value")
    else
        writer_cmd=(python3 - "$env_file" "$key" "$value")
    fi

    "${writer_cmd[@]}" <<'PY'
import sys
from pathlib import Path

env_path = Path(sys.argv[1])
key = sys.argv[2]
value = sys.argv[3]
prefix = f"export {key}="

lines = []
if env_path.exists():
    with env_path.open("r", encoding="utf-8") as fh:
        lines = fh.read().splitlines()

for idx, line in enumerate(lines):
    if line.startswith(prefix):
        lines[idx] = prefix + value
        break
else:
    lines.append(prefix + value)

output = "\n".join(lines)
if output and not output.endswith("\n"):
    output += "\n"

with env_path.open("w", encoding="utf-8") as fh:
    fh.write(output)
PY
}

__kw_canonical_path() {
    python3 - "$1" <<'PY'
import os
import sys

try:
    print(os.path.realpath(os.path.expanduser(sys.argv[1])))
except Exception:
    sys.exit(1)
PY
}

# In common.sh
set_env_value() {
    local key="$1"
    local value="$2"
    local env_file=""
    local base_dir=""
    local needs_sudo="false"

    if [[ -n "${__KW_ENV_FILE_TARGET:-}" ]]; then
        env_file="${__KW_ENV_FILE_TARGET}"
        case "$env_file" in
            /etc/kamiwaza/*)
                base_dir="/etc/kamiwaza"
                needs_sudo="true"
                ;;
            *)
                base_dir="$(dirname "$env_file")"
                needs_sudo="false"
                ;;
        esac
    else
        # Determine env file location based on installation type and OS
        if [[ "$(uname)" == "Darwin" ]] || [[ "${KAMIWAZA_COMMUNITY:-}" == "true" ]]; then
            base_dir="${KAMIWAZA_ROOT:-$(pwd)}"
            env_file="${base_dir}/env.sh"
        else
            base_dir="/etc/kamiwaza"
            env_file="/etc/kamiwaza/env.sh"
            needs_sudo="true"
        fi
    fi

    local canonical_base=""
    local canonical_env=""

    if ! canonical_base=$(__kw_canonical_path "$base_dir"); then
        canonical_base=""
    fi
    if ! canonical_env=$(__kw_canonical_path "$env_file"); then
        canonical_env=""
    fi

    if [[ -z "$canonical_env" || -z "$canonical_base" ]]; then
        log_error "Unable to resolve env.sh location (base: $base_dir, target: $env_file)"
        return 1
    fi

    case "$canonical_base" in
        */) ;;
        *) canonical_base="${canonical_base}/" ;;
    esac

    if [[ "$canonical_env" != "$canonical_base"* ]]; then
        log_error "env.sh path outside allowed directory: $canonical_env"
        return 1
    fi

    env_file="$canonical_env"
    __KW_ENV_FILE_TARGET="$env_file"

    local __kw_uname
    __kw_uname="$(uname)"
    local -a __kw_stat_owner_cmd
    local -a __kw_stat_group_cmd
    if [[ "${__kw_uname}" == "Darwin" ]]; then
        __kw_stat_owner_cmd=(stat -f '%Su')
        __kw_stat_group_cmd=(stat -f '%Sg')
    else
        __kw_stat_owner_cmd=(stat -c '%U')
        __kw_stat_group_cmd=(stat -c '%G')
    fi

    local env_owner="${USER}"
    local env_group="$(get_user_group)"
    if [[ "$needs_sudo" == "true" ]]; then
        env_owner="${KAMIWAZA_ENV_OWNER:-root}"
        env_group="${KAMIWAZA_ENV_GROUP:-root}"
    fi

    local install_owner=""
    if [[ -n "${KAMIWAZA_ROOT:-}" && -d "${KAMIWAZA_ROOT}" ]]; then
        install_owner=$("${__kw_stat_owner_cmd[@]}" "${KAMIWAZA_ROOT}" 2>/dev/null || true)
        if [[ -z "${install_owner}" && "$needs_sudo" == "true" ]]; then
            install_owner=$(sudo "${__kw_stat_owner_cmd[@]}" "${KAMIWAZA_ROOT}" 2>/dev/null || true)
        fi
    fi

    # Ensure file exists with correct permissions. When the env file lives
    # under /etc/kamiwaza the kamiwaza user typically lacks execute access to
    # the directory, so a plain [[ -f ]] check will appear false and would
    # clobber the file on every write. Use sudo-aware existence checks.
    local env_exists="false"
    if [[ "$needs_sudo" == "true" ]]; then
        if sudo test -f "$env_file"; then
            env_exists="true"
        fi
    else
        if [[ -f "$env_file" ]]; then
            env_exists="true"
        fi
    fi

    local env_current_owner=""
    local env_current_group=""
    if [[ "$env_exists" == "true" ]]; then
        if [[ "$needs_sudo" == "true" ]]; then
            env_current_owner=$(sudo "${__kw_stat_owner_cmd[@]}" "$env_file" 2>/dev/null || true)
            env_current_group=$(sudo "${__kw_stat_group_cmd[@]}" "$env_file" 2>/dev/null || true)
        else
            env_current_owner=$("${__kw_stat_owner_cmd[@]}" "$env_file" 2>/dev/null || true)
            env_current_group=$("${__kw_stat_group_cmd[@]}" "$env_file" 2>/dev/null || true)
        fi
    fi

    local preserve_owner="false"
    if [[ -n "$install_owner" && -n "$env_current_owner" && "$install_owner" == "$env_current_owner" ]]; then
        preserve_owner="true"
        env_owner="$env_current_owner"
        env_group="${env_current_group:-$env_current_owner}"
    fi

    if [[ "$env_exists" == "false" ]]; then
        if [[ "$(uname)" == "Darwin" ]] || [[ "${KAMIWAZA_COMMUNITY:-}" == "true" ]]; then
            # For OSX/community, create in KAMIWAZA_ROOT without sudo
            touch "$env_file"
            chmod 640 "$env_file"
        else
            sudo touch "$env_file"
            sudo chown "${env_owner}:${env_group}" "$env_file"
            sudo chmod 640 "$env_file"
        fi

        # Add installation type marker
        if [[ "${KAMIWAZA_COMMUNITY:-}" == "true" ]]; then
            echo "# Kamiwaza Community Edition Environment" > "$env_file"
        else
            echo "# Kamiwaza Enterprise Edition Environment" | sudo tee "$env_file" >/dev/null
        fi
    elif [[ "$needs_sudo" == "true" && "$preserve_owner" == "false" ]]; then
        sudo chown "${env_owner}:${env_group}" "$env_file" >/dev/null 2>&1 || true
    fi

    if ! __kw_write_env_export "$env_file" "$key" "$value" "$needs_sudo"; then
        log_error "Failed to update ${key} in ${env_file}"
        return 1
    fi

    export KAMIWAZA_ENV_FILE_PATH="$env_file"
}

set_env_value_if_not_exists() {
    local key="$1"
    local value="$2"
    local env_file=""
    local needs_sudo="false"

    if [[ -n "${__KW_ENV_FILE_TARGET:-}" ]]; then
        env_file="${__KW_ENV_FILE_TARGET}"
        case "$env_file" in
            /etc/kamiwaza/*) needs_sudo="true" ;;
        esac
    elif [[ "$(uname)" == "Darwin" ]] || [[ "${KAMIWAZA_COMMUNITY:-}" == "true" ]]; then
        env_file="${KAMIWAZA_ROOT:-$(pwd)}/env.sh"
    else
        env_file="/etc/kamiwaza/env.sh"
        needs_sudo="true"
    fi

    local canonical_env=""
    if ! canonical_env=$(__kw_canonical_path "$env_file"); then
        canonical_env=""
    fi

    # Check if key already exists in file (sudo-aware for /etc/kamiwaza/)
    # Priority: 1) File value, 2) Default
    # Note: We intentionally ignore environment variables so that env.sh is the
    # source of truth. This prevents stale shell variables from interfering
    # with fresh installs after cleanup.
    local key_exists="false"
    if [[ -n "$canonical_env" ]]; then
        if [[ "$needs_sudo" == "true" ]]; then
            if sudo test -f "$canonical_env" 2>/dev/null; then
                if sudo grep -q "^export ${key}=" "$canonical_env" 2>/dev/null; then
                    key_exists="true"
                fi
            fi
        else
            if [[ -f "$canonical_env" ]]; then
                if grep -q "^export ${key}=" "$canonical_env" 2>/dev/null; then
                    key_exists="true"
                fi
            fi
        fi
    fi

    if [[ "$key_exists" == "true" ]]; then
        export KAMIWAZA_ENV_FILE_PATH="$canonical_env"
        return 0
    fi

    set_env_value "$key" "$value"
}

__kw_wait_for_entropy() {
    local minimum="${1:-256}"
    local attempts="${2:-5}"

    if [[ ! -r /proc/sys/kernel/random/entropy_avail ]]; then
        return 0
    fi

    local available=0
    local attempt=0
    while (( attempt < attempts )); do
        if read -r available < /proc/sys/kernel/random/entropy_avail 2>/dev/null; then
            if (( available >= minimum )); then
                return 0
            fi
        fi
        sleep 1
        attempt=$((attempt + 1))
    done

    log_warn "Entropy pool remains below ${minimum} bits (current: ${available}); continuing with best effort"
    return 1
}

# Generate a cryptographically strong secret and persist it to env.sh.
# The helper avoids regenerating once a value is present so secrets remain stable
# across restarts. Both new and existing values are written to env.sh to ensure
# persistence (e.g., when a secret exists in environment but not in the file).
ensure_random_secret() {
    local key="$1"
    local force="${2:-false}"
    local current_value="${!key:-}"
    local status="generated"

    if [[ "$force" == "true" && -n "$current_value" ]]; then
        log_info "Forcing regeneration of ${key}"
        status="rotated"
    elif [[ -n "$current_value" ]]; then
        # Value exists in environment - persist it to env.sh file and return
        set_env_value "$key" "$current_value"
        record_secret_source "$key" "existing"
        return 0
    fi

    local generated=""
    __kw_wait_for_entropy 256 5 || true

    if command -v python3 >/dev/null 2>&1; then
        # SECURITY: single-quoted heredoc prevents shell interpolation in Python snippet
        generated=$(python3 - <<'PY'
import secrets
print(secrets.token_hex(32))
PY
)
    elif command -v python >/dev/null 2>&1; then
        generated=$(python - <<'PY'
import secrets
print(secrets.token_hex(32))
PY
)
    elif command -v openssl >/dev/null 2>&1; then
        generated=$(openssl rand -hex 32)
    else
        generated=$(dd if=/dev/urandom bs=32 count=1 2>/dev/null | hexdump -ve '1/1 "%02x"')
    fi

    if [[ -z "$generated" ]]; then
        log_warn "Unable to generate secret for $key; leaving unset"
        record_secret_source "$key" "missing"
        return 1
    fi

    set_env_value "$key" "$generated"
    export "$key=$generated"

    record_secret_source "$key" "$status"

    if [[ -z "${KAMIWAZA_GENERATED_SECRETS:-}" ]]; then
        KAMIWAZA_GENERATED_SECRETS="$key"
    else
        KAMIWAZA_GENERATED_SECRETS="${KAMIWAZA_GENERATED_SECRETS},$key"
    fi
}


# A more robust way to pick a host ip
best_ip_for_hostname() {
    # Function to find the best IP address for hostname
    local loopback_ip="127.0.0.1"

    # Check for explicit localhost override (set by --use-localhost flag)
    local use_localhost="${KAMIWAZA_USE_LOCALHOST:-}"
    local use_localhost_lower
    use_localhost_lower="$(printf '%s' "$use_localhost" | tr '[:upper:]' '[:lower:]')"
    if [[ "$use_localhost_lower" =~ ^(true|1|yes)$ ]]; then
        echo "$loopback_ip"
        return 0
    fi

    # Check for explicit "detect real IP" override (--use-localhost=false)
    # This skips the Darwin/WSL localhost defaults below
    local force_detect_ip=false
    if [[ "$use_localhost_lower" =~ ^(false|0|no)$ ]]; then
        force_detect_ip=true
    fi

    # For OSX and WSL2: default to localhost (matches 0.7.0 behavior)
    # These are endpoint systems where DHCP can change IPs and break services
    # Use --use-localhost=false to override and detect real IP
    if [[ "$force_detect_ip" != "true" ]]; then
        if [[ "$(uname)" == "Darwin" ]]; then
            echo "$loopback_ip"
            return 0
        elif [[ "$(uname)" == "Linux" ]]; then
            if grep -qEi "(Microsoft|WSL)" /proc/version &> /dev/null; then
                echo "$loopback_ip"
                return 0
            fi
        fi
    fi

    # Detect real IP - for Linux servers or when --use-localhost=false
    if [[ "$(uname)" == "Darwin" ]]; then
        # macOS: use ifconfig
        local mac_ip
        mac_ip=$(ifconfig | grep "inet " | grep -v "127.0.0.1" | head -1 | awk '{print $2}')
        if [[ -n "$mac_ip" ]]; then
            echo "$mac_ip"
            return 0
        fi
        # Fallback to localhost if no IP found
        echo "$loopback_ip"
        return 0
    fi

    # For Linux:

    # 1. Get all IPv4 addresses from hostname -I
    local all_hostname_ips
    all_hostname_ips=$(hostname -I)

    # 2. Filter to only IPv4 addresses
    local ipv4_ips=""
    for ip in $all_hostname_ips; do
        if [[ ! "$ip" =~ ":" ]]; then
            ipv4_ips="$ipv4_ips $ip"
        fi
    done
    ipv4_ips="${ipv4_ips## }" # Remove leading space

    # 3. Try to get IP from the default route interface (most reliable)
    if command -v ip &>/dev/null; then
        # Get the source IP used for internet connectivity
        local default_ip
        default_ip=$(ip route get 8.8.8.8 2>/dev/null | awk '/src/{for(i=1;i<=NF;i++) if ($i=="src") print $(i+1)}')
        if [[ -n "$default_ip" ]]; then
            for ip in $ipv4_ips; do
                if [[ "$ip" == "$default_ip" ]]; then
                    echo "$default_ip"
                    return 0
                fi
            done
        fi

        # Get the default interface
        local default_iface
        default_iface=$(ip route get 8.8.8.8 2>/dev/null | awk '/dev/{for(i=1;i<=NF;i++) if ($i=="dev") print $(i+1)}')
        if [[ -n "$default_iface" ]]; then
            local iface_ip
            iface_ip=$(ip -br -4 addr show "$default_iface" 2>/dev/null | awk '{print $3}' | awk -F/ '{print $1}')
            if [[ -n "$iface_ip" ]]; then
                for ip in $ipv4_ips; do
                    if [[ "$ip" == "$iface_ip" ]]; then
                        echo "$iface_ip"
                        return 0
                    fi
                done
            fi
        fi

        # Get all interfaces with their IPs
        local iface_data
        iface_data=$(ip -br -4 addr show 2>/dev/null)
        if [[ -n "$iface_data" ]]; then
            # Process each interface
            while IFS= read -r line; do
                local iface
                iface=$(echo "$line" | awk '{print $1}')
                local state
                state=$(echo "$line" | awk '{print $2}')
                local ip_cidr
                ip_cidr=$(echo "$line" | awk '{print $3}' | awk -F/ '{print $1}')

                # Skip loopback, docker, bridge interfaces
                if [[ "$iface" == "lo" || "$iface" == docker* || "$iface" == br-* ||
                      "$iface" == virbr* || "$iface" == *_gwbridge || "$iface" == vxlan* ]]; then
                    continue
                fi

                # Skip interfaces that aren't UP
                if [[ "$state" != "UP" ]]; then
                    continue
                fi

                if [[ -n "$ip_cidr" ]]; then
                    for ip in $ipv4_ips; do
                        if [[ "$ip" == "$ip_cidr" ]]; then
                            echo "$ip_cidr"
                            return 0
                        fi
                    done
                fi
            done <<< "$iface_data"
        fi
    fi

    # 4. Last resort - first non-loopback, non-link-local IP
    for ip in $ipv4_ips; do
        if [[ ! "$ip" =~ ^127\. && ! "$ip" =~ ^169\.254\. ]]; then
            echo "$ip"
            return 0
        fi
    done

    # Fallback to loopback if nothing else
    echo "$loopback_ip"
}

sync_traefik_log_level() {
    local desired="INFO"
    local debug_flag="${KAMIWAZA_DEBUG:-${KAMIWAZA_DEBUG_MODE:-}}"
    local normalized="$(printf '%s' "$debug_flag" | tr '[:upper:]' '[:lower:]')"
    if [[ "$normalized" =~ ^(true|1|yes)$ ]]; then
        desired="DEBUG"
    fi
    set_env_value_if_not_exists "TRAEFIK_LOG_LEVEL" "$desired"
}

is_dev_profile() {
    local env_name="${KAMIWAZA_ENV:-}"
    env_name="$(printf '%s' "$env_name" | tr '[:upper:]' '[:lower:]')"

    case "$env_name" in
        dev|development|default|community|local)
            return 0
            ;;
    esac

    if [[ "${KAMIWAZA_COMMUNITY:-}" == "true" ]]; then
        return 0
    fi

    if [[ "$(uname)" == "Darwin" ]]; then
        return 0
    fi

    local debug_flag="${KAMIWAZA_DEBUG:-${KAMIWAZA_DEBUG_MODE:-}}"
    if [[ "${debug_flag}" =~ ^([Tt]rue|1|yes)$ ]]; then
        return 0
    fi

    return 1
}

# Cloud provider metadata detection for automatic URL configuration
is_cloud_instance() {
    # AWS EC2 detection (IMDSv2 preferred, with fallback to IMDSv1)
    # Try IMDSv2 first (more secure)
    local token
    token=$(curl -sf --max-time 1 -X PUT 'http://169.254.169.254/latest/api/token' \
        -H 'X-aws-ec2-metadata-token-ttl-seconds: 21600' 2>/dev/null || true)
    if [[ -n "$token" ]]; then
        if curl -sf --max-time 2 \
            -H "X-aws-ec2-metadata-token: $token" \
            http://169.254.169.254/latest/meta-data/instance-id >/dev/null 2>&1; then
            return 0
        fi
    else
        # Fallback to IMDSv1 for older instances
        if curl -sf --max-time 2 \
            http://169.254.169.254/latest/meta-data/instance-id >/dev/null 2>&1; then
            return 0
        fi
    fi

    # GCP detection
    if curl -sf --max-time 2 -H "Metadata-Flavor: Google" \
        http://metadata.google.internal/computeMetadata/v1/instance/id >/dev/null 2>&1; then
        return 0
    fi

    # Azure detection
    if curl -sf --max-time 2 -H "Metadata: true" \
        "http://169.254.169.254/metadata/instance?api-version=2021-02-01" >/dev/null 2>&1; then
        return 0
    fi

    return 1
}

# Get public IP or hostname from cloud provider metadata
get_cloud_public_address() {
    # AWS EC2 - Get public IP or hostname (prefer IP)
    local token
    token=$(curl -sf --max-time 1 -X PUT 'http://169.254.169.254/latest/api/token' \
        -H 'X-aws-ec2-metadata-token-ttl-seconds: 21600' 2>/dev/null || true)
    if [[ -n "$token" ]]; then
        # Try public IP first (more reliable for direct access)
        local public_ip
        public_ip=$(curl -sf --max-time 2 \
            -H "X-aws-ec2-metadata-token: $token" \
            http://169.254.169.254/latest/meta-data/public-ipv4 2>/dev/null || true)
        if [[ -n "$public_ip" ]]; then
            echo "$public_ip"
            return 0
        fi

        # Fallback to public hostname
        local public_hostname
        public_hostname=$(curl -sf --max-time 2 \
            -H "X-aws-ec2-metadata-token: $token" \
            http://169.254.169.254/latest/meta-data/public-hostname 2>/dev/null || true)
        if [[ -n "$public_hostname" && "$public_hostname" != "unavailable" ]]; then
            echo "$public_hostname"
            return 0
        fi
    else
        # Fallback to IMDSv1 for public IP
        local public_ip
        public_ip=$(curl -sf --max-time 2 \
            http://169.254.169.254/latest/meta-data/public-ipv4 2>/dev/null || true)
        if [[ -n "$public_ip" ]]; then
            echo "$public_ip"
            return 0
        fi

        # Fallback to IMDSv1 for public hostname
        local public_hostname
        public_hostname=$(curl -sf --max-time 2 \
            http://169.254.169.254/latest/meta-data/public-hostname 2>/dev/null || true)
        if [[ -n "$public_hostname" && "$public_hostname" != "unavailable" ]]; then
            echo "$public_hostname"
            return 0
        fi
    fi

    # GCP - Get external IP
    local gcp_external_ip
    gcp_external_ip=$(curl -sf --max-time 2 -H "Metadata-Flavor: Google" \
        http://metadata.google.internal/computeMetadata/v1/instance/network-interfaces/0/access-configs/0/external-ip 2>/dev/null || true)
    if [[ -n "$gcp_external_ip" ]]; then
        echo "$gcp_external_ip"
        return 0
    fi

    # Azure - Get public IP address (text format for simplicity)
    local azure_public_ip
    azure_public_ip=$(curl -sf --max-time 2 -H "Metadata: true" \
        "http://169.254.169.254/metadata/instance/network/interface/0/ipv4/ipAddress/0/publicIpAddress?api-version=2021-02-01&format=text" 2>/dev/null || true)
    if [[ -n "$azure_public_ip" ]]; then
        echo "$azure_public_ip"
        return 0
    fi

    return 1
}

# Get default external URL based on environment detection
# Priority: explicit env vars > cloud metadata > FQDN > hostname > KAMIWAZA_HEAD_IP > localhost
get_default_external_url() {
    # Priority 1: KAMIWAZA_EXTERNAL_URL_DEFAULT (explicit override)
    if [[ -n "${KAMIWAZA_EXTERNAL_URL_DEFAULT:-}" ]]; then
        echo "${KAMIWAZA_EXTERNAL_URL_DEFAULT}"
        return 0
    fi

    # Priority 2: KAMIWAZA_EXTERNAL_HOSTNAME (legacy override)
    if [[ -n "${KAMIWAZA_EXTERNAL_HOSTNAME:-}" ]]; then
        echo "${KAMIWAZA_EXTERNAL_HOSTNAME}"
        return 0
    fi

    # Priority 3: Cloud provider public hostname/IP
    local cloud_url
    if cloud_url=$(get_cloud_public_address); then
        echo "$cloud_url"
        return 0
    fi

    # Priority 4: FQDN (hostname -f)
    local fqdn
    fqdn=$(hostname -f 2>/dev/null || true)
    if [[ -n "$fqdn" && "$fqdn" != "localhost" && "$fqdn" != *".localdomain"* ]]; then
        echo "$fqdn"
        return 0
    fi

    # Priority 5: Short hostname
    local short_hostname
    short_hostname=$(hostname 2>/dev/null || true)
    if [[ -n "$short_hostname" && "$short_hostname" != "localhost" ]]; then
        echo "$short_hostname"
        return 0
    fi

    # Priority 6: Fallback to KAMIWAZA_HEAD_IP if set and not localhost
    if [[ -n "${KAMIWAZA_HEAD_IP:-}" ]]; then
        if [[ ! "${KAMIWAZA_HEAD_IP}" =~ ^(127\.|localhost) ]]; then
            echo "${KAMIWAZA_HEAD_IP}"
            return 0
        fi
    fi

    # Final fallback: localhost
    echo "localhost"
}

# Detect installation context (local vs remote) for smart URL configuration
detect_installation_context() {
    local context="local"  # default
    local would_be_local=false

    # Check 0: Explicit --external-url flag signals remote installation intent
    # This check must come first, before any local detection logic
    if [[ -n "${KAMIWAZA_EXTERNAL_URL_DEFAULT:-}" ]]; then
        # User explicitly provided --external-url, so they want remote setup
        # Check if we would have detected local without this override
        if [[ "${KAMIWAZA_USE_LOCALHOST:-}" =~ ^(true|1|yes)$ ]]; then
            would_be_local=true
        elif [[ "$(uname)" == "Darwin" ]]; then
            would_be_local=true
        elif [[ "$(uname)" == "Linux" ]] && grep -qEi "(Microsoft|WSL)" /proc/version 2>/dev/null; then
            would_be_local=true
        elif [[ "${KAMIWAZA_COMMUNITY:-}" == "true" ]] && ! is_cloud_instance && [[ "${KAMIWAZA_USE_LOCALHOST:-}" != "false" ]]; then
            would_be_local=true
        fi

        if [[ "$would_be_local" == "true" ]]; then
            log_warn "Detected local environment, but --external-url was explicitly provided"
            log_warn "Overriding to remote installation mode to honor --external-url=${KAMIWAZA_EXTERNAL_URL_DEFAULT}"
        fi

        echo "remote"
        return 0
    fi

    # Check 1: Explicit localhost override
    if [[ "${KAMIWAZA_USE_LOCALHOST:-}" =~ ^(true|1|yes)$ ]]; then
        echo "local"
        return 0
    fi

    # Check 2: macOS/WSL always local (endpoint systems)
    if [[ "$(uname)" == "Darwin" ]]; then
        echo "local"
        return 0
    fi
    if [[ "$(uname)" == "Linux" ]] && grep -qEi "(Microsoft|WSL)" /proc/version 2>/dev/null; then
        echo "local"
        return 0
    fi

    # Check 3: Community edition defaults to local unless cloud is detected
    # (Developer installs on remote cloud instances should still detect as remote)
    if [[ "${KAMIWAZA_COMMUNITY:-}" == "true" ]]; then
        if ! is_cloud_instance; then
            if [[ "${KAMIWAZA_USE_LOCALHOST:-}" != "false" ]]; then
                echo "local"
                return 0
            fi
        fi
    fi

    # Check 4: Explicit KAMIWAZA_HEAD_IP set to non-localhost
    if [[ -n "${KAMIWAZA_HEAD_IP:-}" ]]; then
        if [[ ! "${KAMIWAZA_HEAD_IP}" =~ ^(127\.|localhost) ]]; then
            context="remote"
        fi
    fi

    # Check 5: Cloud provider detection
    if is_cloud_instance; then
        context="remote"
    fi

    echo "$context"
}

setup_head_env() {
    local dev_profile="false"
    if is_dev_profile; then
        dev_profile="true"
    fi

    set_env_value "KAMIWAZA_CLUSTER_MEMBER" "true"
    set_env_value "KAMIWAZA_INSTALL_ROOT" "${KAMIWAZA_ROOT}"
    set_env_value "KAMIWAZA_ROOT" "${KAMIWAZA_ROOT}"
    set_env_value "KAMIWAZA_SWARM_HEAD" "true"
    set_env_value "KAMIWAZA_ORIG_NODE_TYPE" "head"

    # Respect caller-provided head IP, otherwise derive a local IP
    local head_ip="${KAMIWAZA_HEAD_IP:-}"
    if [[ -z "$head_ip" ]]; then
        head_ip=$(best_ip_for_hostname)
    fi
    set_env_value "KAMIWAZA_HEAD_IP" "$head_ip"
    export KAMIWAZA_HEAD_IP="$head_ip"  # Export for detect_installation_context Check 4

    # Set etcd node name to hostname
    set_env_value "KAMIWAZA_ETCD_NODE_NAME" "$(hostname)"


    sync_traefik_log_level

    # Set KAMIWAZA_LITE based on community edition
    if [[ "${KAMIWAZA_COMMUNITY:-}" == "true" ]]; then
        set_env_value "KAMIWAZA_COMMUNITY" "true"
        set_env_value "KAMIWAZA_LITE" "${KAMIWAZA_LITE:-true}"
    else
        set_env_value "KAMIWAZA_LITE" "false"
    fi

    local desired_use_auth="${KAMIWAZA_USE_AUTH:-}"
    if [[ -n "$desired_use_auth" ]]; then
        desired_use_auth="$(printf '%s' "$desired_use_auth" | tr '[:upper:]' '[:lower:]')"
    fi
    if [[ -z "$desired_use_auth" ]]; then
        if [[ "${KAMIWAZA_LITE:-}" == "true" ]]; then
            desired_use_auth="false"
        else
            desired_use_auth="true"
        fi
    fi
    set_env_value "KAMIWAZA_USE_AUTH" "$desired_use_auth"
    export KAMIWAZA_USE_AUTH="$desired_use_auth"

    # Persist KAMIWAZA_USE_LOCALHOST so future runs of setup_environment() preserve
    # the user's choice. This is critical for detect_installation_context() Check 3.
    if [[ -n "${KAMIWAZA_USE_LOCALHOST:-}" ]]; then
        set_env_value "KAMIWAZA_USE_LOCALHOST" "${KAMIWAZA_USE_LOCALHOST}"
    fi

    # Smart URL configuration based on installation context (local vs remote)
    local install_context
    install_context=$(detect_installation_context)

    # NOTE: These URL values use set_env_value (not set_env_value_if_not_exists) because
    # they must be updated when the user changes their localhost/hostname preference during
    # reinstall. Using _if_not_exists would preserve stale values from previous installs.
    # We also export immediately after setting so derived values use the updated values.
    if [[ "$install_context" == "local" ]]; then
        # Local/development: Use localhost
        log_info "Detected local installation context - using localhost URLs"
        set_env_value "KAMIWAZA_EXTERNAL_URL" "localhost"
        export KAMIWAZA_EXTERNAL_URL="localhost"
        set_env_value "KAMIWAZA_ORIGIN" "https://localhost"
        export KAMIWAZA_ORIGIN="https://localhost"
        set_env_value "KAMIWAZA_CORS_ORIGINS" '"https://localhost"'
        export KAMIWAZA_CORS_ORIGINS='"https://localhost"'
    else
        # Remote: Use public-facing URL
        log_info "Detected remote installation context - auto-configuring public URLs"
        local default_external_url
        default_external_url=$(get_default_external_url)

        log_info "Auto-detected external URL: ${default_external_url}"
        set_env_value "KAMIWAZA_EXTERNAL_URL" "$default_external_url"
        export KAMIWAZA_EXTERNAL_URL="$default_external_url"

        # Derive KAMIWAZA_ORIGIN from the effective KAMIWAZA_EXTERNAL_URL
        # Strip any existing scheme to avoid double-scheme issues (e.g., https://https://...)
        local effective_host="${default_external_url#https://}"
        effective_host="${effective_host#http://}"
        local derived_origin="https://${effective_host}"
        set_env_value "KAMIWAZA_ORIGIN" "$derived_origin"
        export KAMIWAZA_ORIGIN="$derived_origin"

        # Set CORS origins for remote access
        local derived_cors="\"https://${effective_host}\""
        set_env_value "KAMIWAZA_CORS_ORIGINS" "$derived_cors"
        export KAMIWAZA_CORS_ORIGINS="$derived_cors"
        log_info "Auto-configured CORS origins for remote access"
    fi

    set_env_value "PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION" "python"

    # Disable Hugging Face XET download mechanism (causes hangs/timeouts)
    set_env_value "HF_HUB_DISABLE_XET" "1"

    # Dev note: we had a retrieval config here but that should come from `config.py` in retrieval service

    # Authentication Gateway Configuration
    # Only set if not already present
    # JWT secrets now managed by Keycloak - admin password handled during Keycloak bootstrap
    ensure_random_secret "KC_DB_PASSWORD"
    ensure_random_secret "AUTH_GATEWAY_REDIS_PASSWORD"

    # Keycloak admin credentials must exist before prelaunch runs so the
    # service doesn't invent mismatched defaults. Respect any caller provided
    # values, otherwise generate one and mirror it to KEYCLOAK_ADMIN_PASSWORD.
    if [[ -z "${KAMIWAZA_KEYCLOAK_ADMIN_PASSWORD:-}" && -n "${KEYCLOAK_ADMIN_PASSWORD:-}" ]]; then
        KAMIWAZA_KEYCLOAK_ADMIN_PASSWORD="${KEYCLOAK_ADMIN_PASSWORD}"
        set_env_value "KAMIWAZA_KEYCLOAK_ADMIN_PASSWORD" "${KAMIWAZA_KEYCLOAK_ADMIN_PASSWORD}"
        export KAMIWAZA_KEYCLOAK_ADMIN_PASSWORD
    fi

    ensure_random_secret "KAMIWAZA_KEYCLOAK_ADMIN_PASSWORD"

    if [[ -z "${KEYCLOAK_ADMIN_PASSWORD:-}" ]]; then
        KEYCLOAK_ADMIN_PASSWORD="${KAMIWAZA_KEYCLOAK_ADMIN_PASSWORD}"
        set_env_value "KEYCLOAK_ADMIN_PASSWORD" "${KEYCLOAK_ADMIN_PASSWORD}"
        export KEYCLOAK_ADMIN_PASSWORD
    else
        # Keep the mirrored copy in sync if callers only set KEYCLOAK_ADMIN_PASSWORD.
        if [[ "${KEYCLOAK_ADMIN_PASSWORD}" != "${KAMIWAZA_KEYCLOAK_ADMIN_PASSWORD}" ]]; then
            KAMIWAZA_KEYCLOAK_ADMIN_PASSWORD="${KEYCLOAK_ADMIN_PASSWORD}"
            set_env_value "KAMIWAZA_KEYCLOAK_ADMIN_PASSWORD" "${KAMIWAZA_KEYCLOAK_ADMIN_PASSWORD}"
            export KAMIWAZA_KEYCLOAK_ADMIN_PASSWORD
        fi
    fi

    local use_local_auth_defaults="false"
    if [[ "${desired_use_auth:-}" != "true" ]]; then
        if [[ "$dev_profile" == "true" || "${KAMIWAZA_LITE:-}" == "true" ]]; then
            use_local_auth_defaults="true"
        fi
    fi

    # Keycloak / OIDC defaults
    # Set defaults even in Lite mode so operators can flip auth on later without reinstalling
    local keycloak_host="${KAMIWAZA_KEYCLOAK_HOST:-}"
    if [[ -z "$keycloak_host" ]]; then
        if [[ "$use_local_auth_defaults" == "true" ]]; then
            # Local auth mode: Keycloak not used, but set localhost for forward compatibility
            keycloak_host="localhost"
        elif [[ "$install_context" == "local" ]]; then
            # Local development (macOS/Linux laptop): use localhost even with full auth
            keycloak_host="localhost"
        else
            # Remote/cloud installations: use HEAD_IP if available, otherwise localhost (community only)
            keycloak_host="${KAMIWAZA_HEAD_IP:-localhost}"
            if [[ "$keycloak_host" != "localhost" ]]; then
                log_info "Using KAMIWAZA_HEAD_IP ($keycloak_host) for Keycloak host on remote instance"
            else
                log_info "Using localhost for Keycloak host (set KAMIWAZA_HEAD_IP for enterprise remote installs)"
            fi
        fi
    fi
    if [[ -z "$keycloak_host" ]]; then
        # Fallback to KAMIWAZA_HEAD_IP if available
        keycloak_host="${KAMIWAZA_HEAD_IP:-}"
    fi
    if [[ -z "$keycloak_host" ]]; then
        log_error "KAMIWAZA_KEYCLOAK_HOST must be set to a resolvable hostname or IP before enabling auth."
        log_error "Set it in env.sh (or export before running install.sh) and rerun the installer."
        exit 1
    fi
    if [[ "$use_local_auth_defaults" != "true" ]]; then
        if [[ "$keycloak_host" == "localhost" || "$keycloak_host" == "127.0.0.1" ]]; then
            # Community edition with auth enabled is allowed to use localhost for Keycloak
            if [[ "${KAMIWAZA_COMMUNITY:-}" != "true" || "${desired_use_auth:-}" != "true" ]]; then
                log_error "KAMIWAZA_KEYCLOAK_HOST cannot be localhost when KAMIWAZA_USE_AUTH=true."
                exit 1
            fi
        fi
        require_resolvable_host "$keycloak_host" "KAMIWAZA_KEYCLOAK_HOST"
    else
        require_resolvable_host "$keycloak_host" "KAMIWAZA_KEYCLOAK_HOST" "true"
    fi
    set_env_value "KAMIWAZA_KEYCLOAK_HOST" "$keycloak_host"

    local keycloak_base="https://${keycloak_host}"
    local default_keycloak_url
    local default_keycloak_public_url
    local default_jwt_issuer
    local default_jwks

    if [[ "$use_local_auth_defaults" == "true" ]]; then
        # Local auth mode: Kamiwaza issues JWTs directly (not Keycloak)
        # JWKS endpoint is always the local auth service, regardless of installation context
        if [[ "$install_context" == "local" ]]; then
            # True local development: everything uses localhost
            default_keycloak_url="https://localhost"
            default_keycloak_public_url="https://localhost"
            default_jwt_issuer="https://localhost"
            default_jwks="https://localhost/api/auth/jwks"
        else
            # Remote cloud instance with auth disabled: use public URLs
            # CRITICAL: JWKS endpoint must point to Kamiwaza's auth service, not Keycloak
            default_keycloak_url="http://localhost:8080"  # For future Keycloak enablement
            default_keycloak_public_url="https://${keycloak_host}"  # Forward-compatible
            default_jwt_issuer="https://${keycloak_host}"  # Local auth uses base URL as issuer
            default_jwks="https://${keycloak_host}/api/auth/jwks"  # Local auth JWKS endpoint
            log_info "Using public URLs for remote instance (local auth mode)"
        fi
    else
        # Full auth mode: Keycloak handles authentication
        #
        # NOTE (local/dev): Keycloak may be configured to require HTTPS for token requests.
        # On local installs we therefore route all Keycloak calls through Traefik at https://localhost.
        #
        # Remote/cloud installs may still use direct container access for internal calls.
        if [[ "$install_context" == "local" ]]; then
            default_keycloak_url="https://localhost"
            default_keycloak_public_url="https://localhost"
            default_jwt_issuer="https://localhost/realms/kamiwaza"
            default_jwks="https://localhost/realms/kamiwaza/protocol/openid-connect/certs"
            log_info "Using Keycloak via Traefik HTTPS for local authentication"
        else
            # All Keycloak access via Traefik HTTPS for consistency (full-auth remote installs)
            default_keycloak_url="https://localhost"  # Internal: via Traefik for consistent issuer
            default_keycloak_public_url="$keycloak_base"  # Public: browser access
            default_jwt_issuer="https://localhost/realms/kamiwaza"  # Matches KC_HOSTNAME_STRICT token issuer
            default_jwks="https://localhost/realms/kamiwaza/protocol/openid-connect/certs"  # Fetch via Traefik
            log_info "Using Keycloak for authentication (public URL: ${keycloak_base})"
        fi
    fi

    # NOTE: Auth gateway URLs use set_env_value (not _if_not_exists) because they must
    # be updated when the user changes their localhost/hostname preference during reinstall.
    if [[ -z "${AUTH_GATEWAY_KEYCLOAK_URL:-}" || ( "$use_local_auth_defaults" != "true" && ${AUTH_GATEWAY_KEYCLOAK_URL:-} =~ ^https?://localhost/?$ ) ]]; then
        set_env_value "AUTH_GATEWAY_KEYCLOAK_URL" "$default_keycloak_url"
    fi
    if [[ -z "${AUTH_GATEWAY_KEYCLOAK_PUBLIC_URL:-}" || ( "$use_local_auth_defaults" != "true" && ${AUTH_GATEWAY_KEYCLOAK_PUBLIC_URL:-} =~ ^https?://localhost/?$ ) ]]; then
        set_env_value "AUTH_GATEWAY_KEYCLOAK_PUBLIC_URL" "$default_keycloak_public_url"
        set_env_value "AUTH_GATEWAY_PUBLIC_URL" "$default_keycloak_public_url"
    elif [[ -z "${AUTH_GATEWAY_PUBLIC_URL:-}" ]]; then
        # Mirror the explicit Keycloak public URL if caller provided it
        set_env_value "AUTH_GATEWAY_PUBLIC_URL" "${AUTH_GATEWAY_KEYCLOAK_PUBLIC_URL}"
    fi
    set_env_value_if_not_exists "AUTH_GATEWAY_KEYCLOAK_REALM" "kamiwaza"

    # JWT/JWKS/Audience - these derive from installation context, so use set_env_value
    set_env_value "AUTH_GATEWAY_JWT_ISSUER" "$default_jwt_issuer"
    set_env_value "AUTH_GATEWAY_JWKS_URL" "$default_jwks"
    set_env_value_if_not_exists "AUTH_GATEWAY_JWT_AUDIENCE" "kamiwaza-platform"

    # TLS/State defaults: Community/macOS allow self-signed and unsigned state; Enterprise is strict
    local default_tls_insecure="false"
    local default_allow_unsigned="false"
    if [[ "${KAMIWAZA_COMMUNITY:-}" == "true" || "$(uname)" == "Darwin" ]]; then
        default_tls_insecure="true"
        default_allow_unsigned="true"
    fi
    set_env_value_if_not_exists "AUTH_GATEWAY_TLS_INSECURE" "$default_tls_insecure"
    set_env_value_if_not_exists "AUTH_ALLOW_UNSIGNED_STATE" "$default_allow_unsigned"

    # OAuth callback URL - use the public-facing origin
    # Derive from KAMIWAZA_EXTERNAL_URL to ensure consistency with remote installs
    local callback_origin
    if [[ "$install_context" == "local" ]]; then
        callback_origin="https://localhost"
    else
        # Remote: Use the effective external URL (strip any existing scheme)
        local callback_external_url="${KAMIWAZA_EXTERNAL_URL:-$(get_default_external_url)}"
        local callback_host="${callback_external_url#https://}"
        callback_host="${callback_host#http://}"
        callback_origin="https://${callback_host}"
    fi
    # Callback URL derives from installation context - use set_env_value to update on reinstall
    set_env_value "AUTH_CALLBACK_URL" "${callback_origin}/api/auth/callback"

    # Keycloak admin URL - internal access for user provisioning and client setup
    if [[ "$use_local_auth_defaults" != "true" ]]; then
        # When Keycloak is enabled (full-auth), admin REST is exposed via Traefik rewrite
        set_env_value "AUTH_GATEWAY_KEYCLOAK_ADMIN_URL" "https://localhost/_kc_admin"
    else
        set_env_value "AUTH_GATEWAY_KEYCLOAK_ADMIN_URL" "http://localhost:8080/admin"
    fi

    # OIDC client credentials for ForwardAuth/service tokens
    set_env_value_if_not_exists "AUTH_GATEWAY_KEYCLOAK_CLIENT_ID" "kamiwaza-platform"
    ensure_random_secret "AUTH_GATEWAY_KEYCLOAK_CLIENT_SECRET"

    # ReBAC session management (required for ephemeral apps like App Garden)
    set_env_value_if_not_exists "AUTH_REBAC_SESSION_ENABLED" "true"

    # Redis URL for session storage - uses head IP for internal connectivity
    local redis_password="${AUTH_GATEWAY_REDIS_PASSWORD:-}"
    if [[ -n "$redis_password" ]]; then
        set_env_value_if_not_exists "AUTH_REBAC_SESSION_REDIS_URL" "redis://:${redis_password}@${head_ip}:6380/0"
    fi

    # Session security: Community/macOS allow insecure Redis; Enterprise is strict
    local default_session_allow_insecure="false"
    if [[ "${KAMIWAZA_COMMUNITY:-}" == "true" || "$(uname)" == "Darwin" ]]; then
        default_session_allow_insecure="true"
    fi
    set_env_value_if_not_exists "AUTH_REBAC_SESSION_ALLOW_INSECURE" "$default_session_allow_insecure"
}

setup_worker_env() {
    local dev_profile="false"
    if is_dev_profile; then
        dev_profile="true"
    fi

    set_env_value "KAMIWAZA_CLUSTER_MEMBER" "true"
    set_env_value "KAMIWAZA_INSTALL_ROOT" "${KAMIWAZA_ROOT}"
    set_env_value "KAMIWAZA_ROOT" "${KAMIWAZA_ROOT}"
    set_env_value "KAMIWAZA_HEAD_IP" "${KAMIWAZA_HEAD_IP}"
    set_env_value "KAMIWAZA_SWARM_TARGET" "${KAMIWAZA_HEAD_IP}"

    # Set etcd node name to hostname
    set_env_value "KAMIWAZA_ETCD_NODE_NAME" "$(hostname)"


    sync_traefik_log_level

    # Set KAMIWAZA_LITE based on community edition
    if [[ "${KAMIWAZA_COMMUNITY:-}" == "true" ]]; then
        set_env_value "KAMIWAZA_LITE" "${KAMIWAZA_LITE:-true}"
    else
        set_env_value "KAMIWAZA_LITE" "false"
    fi

    local desired_use_auth_worker="${KAMIWAZA_USE_AUTH:-}"
    if [[ -n "$desired_use_auth_worker" ]]; then
        desired_use_auth_worker="${desired_use_auth_worker,,}"
    else
        log_error "KAMIWAZA_USE_AUTH must match the head node configuration. Please copy env.sh from the head before bootstrapping this worker."
        return 1
    fi
    set_env_value "KAMIWAZA_USE_AUTH" "$desired_use_auth_worker"
    export KAMIWAZA_USE_AUTH="$desired_use_auth_worker"

    set_env_value "PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION" "python"

    # Disable Hugging Face XET download mechanism (causes hangs/timeouts)
    set_env_value "HF_HUB_DISABLE_XET" "1"

    set_env_value "KAMIWAZA_ORIG_NODE_TYPE" "worker"

    # Lite mode admin password (minimum 12 characters required)
    if [[ -z "${KAMIWAZA_ADMIN_PASSWORD:-}" ]]; then
        log_error "KAMIWAZA_ADMIN_PASSWORD is unset for worker bootstrap; ensure the head node env.sh is replicated."
        return 1
    fi


    # Auth defaults on workers as well (Traefik terminates TLS locally)
    # Always set defaults so auth can be enabled post-install without re-running setup
    local worker_keycloak_host="${KAMIWAZA_KEYCLOAK_HOST:-}"
    if [[ -z "$worker_keycloak_host" && "${desired_use_auth_worker}" != "true" ]]; then
        worker_keycloak_host="localhost"
    fi
    if [[ -z "$worker_keycloak_host" ]]; then
        log_error "KAMIWAZA_KEYCLOAK_HOST is required on workers when KAMIWAZA_USE_AUTH=true. Copy env.sh from the head node."
        exit 1
    fi
    if [[ "${desired_use_auth_worker}" == "true" ]]; then
        if [[ "$worker_keycloak_host" == "localhost" || "$worker_keycloak_host" == "127.0.0.1" ]]; then
            log_error "Workers cannot use localhost for KAMIWAZA_KEYCLOAK_HOST when auth is enabled."
            exit 1
        fi
        require_resolvable_host "$worker_keycloak_host" "KAMIWAZA_KEYCLOAK_HOST"
    else
        require_resolvable_host "$worker_keycloak_host" "KAMIWAZA_KEYCLOAK_HOST" "true"
    fi
    set_env_value "KAMIWAZA_KEYCLOAK_HOST" "$worker_keycloak_host"

    local worker_keycloak_base="https://${worker_keycloak_host}"
    local worker_default_keycloak_url="$worker_keycloak_base"
    local worker_default_keycloak_public_url="$worker_keycloak_base"
    local worker_default_issuer="${worker_keycloak_base}/realms/kamiwaza"
    local worker_default_jwks="${worker_keycloak_base}/realms/kamiwaza/protocol/openid-connect/certs"
    if [[ "${desired_use_auth_worker}" != "true" ]]; then
        worker_default_keycloak_url="https://localhost"
        worker_default_keycloak_public_url="https://localhost"
        worker_default_issuer="https://localhost/realms/kamiwaza"  # FIXED: must match head node's KC_HOSTNAME
        worker_default_jwks="https://localhost/api/auth/jwks"
    fi

    if [[ -z "${AUTH_GATEWAY_KEYCLOAK_URL:-}" || ( "${desired_use_auth_worker}" == "true" && ${AUTH_GATEWAY_KEYCLOAK_URL:-} =~ ^https?://localhost/?$ ) ]]; then
        set_env_value_if_not_exists "AUTH_GATEWAY_KEYCLOAK_URL" "$worker_default_keycloak_url"
    fi
    if [[ -z "${AUTH_GATEWAY_KEYCLOAK_PUBLIC_URL:-}" || ( "${desired_use_auth_worker}" == "true" && ${AUTH_GATEWAY_KEYCLOAK_PUBLIC_URL:-} =~ ^https?://localhost/?$ ) ]]; then
        set_env_value_if_not_exists "AUTH_GATEWAY_KEYCLOAK_PUBLIC_URL" "$worker_default_keycloak_public_url"
        set_env_value_if_not_exists "AUTH_GATEWAY_PUBLIC_URL" "$worker_default_keycloak_public_url"
    elif [[ -z "${AUTH_GATEWAY_PUBLIC_URL:-}" ]]; then
        set_env_value_if_not_exists "AUTH_GATEWAY_PUBLIC_URL" "${AUTH_GATEWAY_KEYCLOAK_PUBLIC_URL}"
    fi
    set_env_value_if_not_exists "AUTH_GATEWAY_KEYCLOAK_REALM" "kamiwaza"

    # JWT/JWKS/Audience - persist defaults to env.sh if not already present
    set_env_value_if_not_exists "AUTH_GATEWAY_JWT_ISSUER" "$worker_default_issuer"
    set_env_value_if_not_exists "AUTH_GATEWAY_JWKS_URL" "$worker_default_jwks"
    set_env_value_if_not_exists "AUTH_GATEWAY_JWT_AUDIENCE" "kamiwaza-platform"

    # TLS/State defaults: Community/macOS allow self-signed and unsigned state; Enterprise is strict
    local default_tls_insecure="false"
    local default_allow_unsigned="false"
    if [[ "${KAMIWAZA_COMMUNITY:-}" == "true" || "$(uname)" == "Darwin" ]]; then
        default_tls_insecure="true"
        default_allow_unsigned="true"
    fi
    set_env_value_if_not_exists "AUTH_GATEWAY_TLS_INSECURE" "$default_tls_insecure"
    set_env_value_if_not_exists "AUTH_ALLOW_UNSIGNED_STATE" "$default_allow_unsigned"
}

ensure_nofile_min() {
  local min=${1:-65536}
  local cur hard target

  cur=$(ulimit -Sn 2>/dev/null) || { echo "warn: ulimit -Sn failed" >&2; return 0; }
  hard=$(ulimit -Hn 2>/dev/null) || { echo "warn: ulimit -Hn failed" >&2; return 0; }

  # Already unlimited? Nothing to do.
  if [[ "$cur" == "unlimited" ]]; then
    echo "[nofile] soft=unlimited hard=$hard"
    return 0
  fi

  # Normalize non-integers
  case $cur  in (''|*[!0-9]*)  cur=0 ;; esac            # // non-integer soft -> treat as 0
  if [[ "$hard" == "unlimited" ]]; then
    target=$min                                          # // hard is effectively infinite
  else
    case $hard in (''|*[!0-9]*) hard=$min ;; esac        # // non-integer hard -> assume min threshold
    target=$(( hard < min ? hard : min ))                # // never exceed hard
  fi

  if (( cur < target )); then
    ulimit -Sn "$target" || echo "warn: could not set soft nofile to $target" >&2
  fi
}

# Function to be placed in common.sh or similar
setup_docker_root() {
    if [[ "${KAMIWAZA_COMMUNITY:-}" == "true" ]] || [[ "$(uname)" == "Darwin" ]]; then
        return 0
    fi

    local target_root="$1"
    local current_root=""

    # Check if Docker daemon.json exists and get current data-root
    if [ -f "/etc/docker/daemon.json" ]; then
        current_root=$(grep -o '"data-root":[[:space:]]*"[^"]*"' /etc/docker/daemon.json | cut -d'"' -f4)
    fi

    # If current_root matches target_root, nothing to do
    if [ "$current_root" = "$target_root" ]; then
        print_in_color green "Docker data-root already correctly configured at $target_root"
        return 0
    fi

    # Create target directories with correct permissions
    sudo mkdir -p "${target_root}"/{buildkit,image,network,plugins,swarm,tmp,volumes,overlay2,runtimes}
    sudo mkdir -p "${target_root}/network/files"
    for dir in buildkit image network plugins swarm tmp runtimes; do
        sudo chown root:root "${target_root}/$dir"
        sudo chmod 700 "${target_root}/$dir"  # drwx------
    done

    sudo chown root:root "${target_root}/buildkit"
    sudo chmod 711 "${target_root}/buildkit"  # drwx--x--x

    sudo chown root:root "${target_root}/network"
    sudo chmod 750 "${target_root}/network"   # drwxr-x---
    sudo chown root:root "${target_root}/network/files"
    sudo chmod 750 "${target_root}/network/files"  # drwxr-x---

    sudo chown root:root "${target_root}/volumes"
    sudo chmod 751 "${target_root}/volumes"   # drwxr-x--x

    sudo chown root:root "${target_root}/overlay2"
    sudo chmod 710 "${target_root}/overlay2"  # drwx--x---

    # Base directory
    sudo chown root:root "$target_root"
    sudo chmod 710 "${target_root}"

    # Prepare new Docker configuration
    local docker_config
    docker_config=$(python3 - "$target_root" <<'PY'
import json, sys

target_root = sys.argv[1]
config = {
    "data-root": target_root,
    "features": {"buildkit": True},
    "log-driver": "json-file",
    "log-opts": {
        "max-size": "100m",
        "max-file": "3"
    },
}

sys.stdout.write(json.dumps(config))
PY
)

    # If we have a current root and it's different, we need to migrate
    if [ -n "$current_root" ] && [ "$current_root" != "$target_root" ] && [ -d "$current_root" ]; then
        print_in_color yellow "Migrating Docker root from $current_root to $target_root"

        # Stop Docker first
        if systemctl is-active --quiet docker; then
            sudo systemctl stop docker
        fi

        # Only migrate if source exists and has contents
        if sudo test -d "$current_root" && sudo ls -A "$current_root" >/dev/null 2>&1; then
            print_in_color yellow "Copying existing Docker data..."
            if command -v rsync >/dev/null 2>&1; then
                sudo rsync -av --ignore-existing "$current_root/" "$target_root/"
            else
                print_in_color yellow "rsync not available; falling back to cp -a"
                sudo cp -a "$current_root/." "$target_root/"
            fi

            # Create backup of old root
            local backup_dir
            backup_dir="${current_root}_backup_$(date +%Y%m%d_%H%M%S)"
            sudo mv "$current_root" "$backup_dir"
            print_in_color yellow "Old Docker root backed up to $backup_dir"
        fi
    fi

    # Update Docker configuration
    sudo mkdir -p /etc/docker
    echo "$docker_config" | sudo tee /etc/docker/daemon.json > /dev/null

    # Start Docker if it was stopped
    if ! systemctl is-active --quiet docker; then
        sudo systemctl start docker
    else
        sudo systemctl restart docker
    fi

    print_in_color green "Docker data-root configuration completed"
}

setup_network_prereqs() {
    if [[ "${KAMIWAZA_COMMUNITY:-}" == "true" ]] || [[ "$(uname)" == "Darwin" ]]; then
        return 0
    fi

    # Set up required kernel modules if not already configured
    if [[ ! -f /etc/modules-load.d/kamiwaza.conf ]]; then
        print_in_color green "Configuring kernel modules..."
        sudo tee /etc/modules-load.d/kamiwaza.conf <<EOF
overlay
br_netfilter
EOF
    fi

    # Set up sysctl if not already configured
    if [[ ! -f /etc/sysctl.d/kamiwaza.conf ]]; then
        print_in_color green "Configuring sysctl..."
        sudo tee /etc/sysctl.d/kamiwaza.conf <<EOF
net.bridge.bridge-nf-call-iptables  = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.ipv4.ip_forward                 = 1
EOF
    fi

    # Verify/load modules
    if ! lsmod | grep -q br_netfilter || ! lsmod | grep -q overlay; then
        print_in_color yellow "Loading required kernel modules..."
        sudo modprobe overlay
        sudo modprobe br_netfilter
    fi

    # Verify/apply sysctl settings
    local need_sysctl=0
    for setting in net.bridge.bridge-nf-call-iptables net.bridge.bridge-nf-call-ip6tables net.ipv4.ip_forward; do
        if [[ $(sysctl -n $setting 2>/dev/null || echo 0) != 1 ]]; then
            need_sysctl=1
            break
        fi
    done

    if [[ $need_sysctl -eq 1 ]]; then
        print_in_color yellow "Applying sysctl settings..."
        sudo sysctl --system
    fi

    # If Docker is running, restart it to pick up new settings
    if systemctl is-active --quiet docker; then
        print_in_color yellow "Restarting Docker to apply network settings..."
        sudo systemctl restart docker
    fi
}

promote_nvm_node() {
    # Ensure NVM_DIR is set based on installation type
    if [[ "${KAMIWAZA_COMMUNITY:-}" != "true" ]]; then
        export NVM_DIR="/opt/kamiwaza/nvm"
    else
        export NVM_DIR="$HOME/.nvm"
    fi

    if [ -z "$NVM_DIR" ]; then
        echo "NVM_DIR is not set. Please set it and try again."
        return 1
    fi

    # Split PATH into an array
    IFS=':' read -r -a path_array <<< "$PATH"

    # Find indices of the NVM_DIR and the first homebrew path
    nvm_index=-1
    homebrew_index=-1

    for i in "${!path_array[@]}"; do
        if [[ "${path_array[i]}" == *"${NVM_DIR}"* ]]; then
            nvm_index=$i
        fi
        if [[ "${path_array[i]}" == *"/opt/homebrew"* && $homebrew_index -eq -1 ]]; then
            homebrew_index=$i
        fi
    done

    # Check if we found both paths and if NVM_DIR is before the homebrew path
    if [ "$nvm_index" -ge 0 ] && [ "$homebrew_index" -ge 0 ] && [ "$nvm_index" -gt "$homebrew_index" ]; then
        # Move NVM_DIR path to 1 position before the homebrew path
        nvm_path="${path_array[nvm_index]}"
        unset 'path_array[nvm_index]'
        path_array=("${path_array[@]:0:$homebrew_index}" "$nvm_path" "${path_array[@]:$homebrew_index}")

        # Re-export the modified PATH
        local old_ifs="$IFS"
        local joined_path
        IFS=':'
        joined_path="${path_array[*]}"
        IFS="$old_ifs"
        export PATH="$joined_path"
        echo "Promoted NVM path in PATH."
    else
        echo "No changes needed to PATH."
    fi
}

verify_node_version() {
    if ! command -v node &> /dev/null; then
        return 1
    fi

    node_version=$(node --version)
    if [[ ! "$node_version" =~ ^v22 ]]; then
        return 1
    fi

    return 0
}

ensure_correct_node() {
    local max_attempts=3
    local attempt=1

    # Set NVM_DIR based on installation type
    if [[ "${KAMIWAZA_COMMUNITY:-}" != "true" ]]; then
        export NVM_DIR="/opt/kamiwaza/nvm"
    else
        export NVM_DIR="$HOME/.nvm"
    fi

    while [ $attempt -le $max_attempts ]; do
        if verify_node_version; then
            return 0
        fi

        if [ $attempt -eq 1 ]; then
            promote_nvm_node
            hash -r  # Clear command path cache
        fi

        # Only install if not already at version 22
        if ! verify_node_version; then
            nvm install 22
            nvm use 22
        fi

        ((attempt++))
    done

    return 1
}
# Helper for actual GPU detection logic
perform_gpu_detection() {
    local verbose="${1:-false}"

    # NVIDIA (CUDA)
    if command -v nvidia-smi &>/dev/null; then
        if nvidia-smi -L &>/dev/null; then
            if [ "$verbose" = "true" ]; then
                echo "NVIDIA GPU detected" >&2
            fi
            echo true
            return 0
        fi
    fi

    # Habana Gaudi
    if command -v hl-smi &>/dev/null; then
        if hl-smi &>/dev/null; then
            if [ "$verbose" = "true" ]; then
                echo "Habana Gaudi GPU detected" >&2
            fi
            echo true
            return 0
        fi
    fi

    # Vendor-agnostic OpenCL fallback
    if command -v clinfo &>/dev/null; then
        if clinfo &>/dev/null && clinfo | grep -q "Device Type.*GPU"; then
            if [ "$verbose" = "true" ]; then
                echo "OpenCL GPU detected" >&2
            fi
            echo true
            return 0
        fi
    fi

    if [ "$verbose" = "true" ]; then
        echo "No GPU detected" >&2
    fi
    echo false
    return 0
}

# Refactored detect_gpu_compute with caching and short-circuit logic
# Usage: detect_gpu_compute
# Output: 'true' if GPU compute is available, 'false' otherwise
# Caches result in KAMIWAZA_GPU_DETECTED

detect_gpu_compute() {
    if [ -n "${KAMIWAZA_GPU_DETECTED+x}" ]; then
        echo "$KAMIWAZA_GPU_DETECTED"
        return 0
    fi
    local result
    result=$(perform_gpu_detection)
    KAMIWAZA_GPU_DETECTED="$result"
    echo "$result"
    return 0
}

# Common Environment Setup Functions
# Usage in startup scripts for platform initialization

# Check if development environment based on kamiwaza-shibboleth file
is_dev_env() {
    [ -f "${KAMIWAZA_ROOT}/kamiwaza-shibboleth" ]
}

# Set up common environment variables with intelligent defaults
# Sets: ARCH, KAMIWAZA_LOG_DIR, and observability stack defaults (OTEL/Loki/Jaeger)
setup_common_environment() {
    # Architecture detection for deployment paths
    arch_raw=$(uname -m)
    if [[ "$arch_raw" == "x86_64" ]]; then
        export ARCH='amd64'
    elif [[ "$arch_raw" == "aarch64" || "$arch_raw" == "arm64" ]]; then
        export ARCH='arm64'
    else
        log_error "Unsupported architecture for OTEL: $arch_raw"
        return 1
    fi

    # Log directory configuration (matches Python logging utils)
    # Respect existing KAMIWAZA_LOG_DIR if already set
    # Default to $KAMIWAZA_ROOT/logs (consistent with kamiwaza/lib/logging/utils.py)
    # KAMIWAZA_ROOT is required - no fallbacks
    # CRITICAL: This export is required for OTEL collector volume mount alignment.
    # See deployment/kamiwaza-otel-collector/*/docker-compose.yml for the mount config.
    if [[ -z "${KAMIWAZA_LOG_DIR:-}" ]]; then
        if [[ -z "${KAMIWAZA_ROOT:-}" ]]; then
            log_error "KAMIWAZA_ROOT environment variable is not set"
            log_error "KAMIWAZA_ROOT must be set for OTEL environment setup"
            return 1
        fi
        export KAMIWAZA_LOG_DIR="${KAMIWAZA_ROOT}/logs"
    fi
    log_info "OTEL log directory: $KAMIWAZA_LOG_DIR"

    # Set default OTEL service name if not provided
    export OTEL_SERVICE_NAME="${OTEL_SERVICE_NAME:-kamiwaza}"

    # Development environment defaults
    if is_dev_env; then
        export KAMIWAZA_OTEL_ENABLED="${KAMIWAZA_OTEL_ENABLED:-false}"
        # Development: Use insecure connections for local testing
        export OTEL_EXPORTER_INSECURE="${OTEL_EXPORTER_INSECURE:-true}"
        export OTEL_EXPORTER_INSECURE_JAEGER="${OTEL_EXPORTER_INSECURE_JAEGER:-true}"
        # Jaeger disabled by default in dev environments unless explicitly enabled
        export KAMIWAZA_JAEGER_ENABLED="${KAMIWAZA_JAEGER_ENABLED:-false}"
        # Auto-enable Loki/Grafana only when OTEL is enabled (default) in dev
        if [[ "${KAMIWAZA_OTEL_ENABLED:-false}" == "true" ]]; then
            export KAMIWAZA_LOKI_ENABLED="${KAMIWAZA_LOKI_ENABLED:-true}"
            export GRAFANA_ADMIN_PASSWORD="${GRAFANA_ADMIN_PASSWORD:-kamiwaza}"
        elif [[ -z "${KAMIWAZA_LOKI_ENABLED:-}" ]]; then
            export KAMIWAZA_LOKI_ENABLED="false"
        fi
        log_info "Development environment detected - OTEL configured for local development"
    else
        # Production: Use secure connections by default
        export OTEL_EXPORTER_INSECURE="${OTEL_EXPORTER_INSECURE:-false}"
        export OTEL_EXPORTER_INSECURE_JAEGER="${OTEL_EXPORTER_INSECURE_JAEGER:-false}"
        # Jaeger disabled by default in production unless explicitly enabled
        export KAMIWAZA_JAEGER_ENABLED="${KAMIWAZA_JAEGER_ENABLED:-false}"
        # Loki disabled by default in production unless explicitly enabled
        export KAMIWAZA_LOKI_ENABLED="${KAMIWAZA_LOKI_ENABLED:-false}"
        # Production: GRAFANA_ADMIN_PASSWORD has no default - validation enforced in validate_loki_config()
        log_info "Production environment - OTEL configured for secure operation"
    fi

    # Set Docker Compose restart policy for Jaeger based on enabled flag
    if [[ "${KAMIWAZA_JAEGER_ENABLED:-false}" == "true" ]]; then
        export KAMIWAZA_JAEGER_RESTART="unless-stopped"
    else
        export KAMIWAZA_JAEGER_RESTART="no"
    fi

    # Default customer endpoint to local Jaeger for development
    export CUSTOMER_OTLP_ENDPOINT="${CUSTOMER_OTLP_ENDPOINT:-http://jaeger:14250}"

    # Loki endpoint configuration
    export LOKI_ENDPOINT="${LOKI_ENDPOINT:-http://loki:3100/loki/api/v1/push}"

    # Kamiwaza context variables
    export KAMIWAZA_VERSION="${KAMIWAZA_VERSION:-NA}"
    export KAMIWAZA_LICENSE_TYPE="${KAMIWAZA_LICENSE_TYPE:-Community}"

    log_info "Common environment configured: KAMIWAZA_OTEL_ENABLED=${KAMIWAZA_OTEL_ENABLED:-false}, KAMIWAZA_JAEGER_ENABLED=${KAMIWAZA_JAEGER_ENABLED:-false}"
}

# Validate OTEL configuration before startup
validate_otel_config() {
    if [[ "${KAMIWAZA_OTEL_ENABLED:-false}" != "true" ]]; then
        log_info "OTEL is disabled (KAMIWAZA_OTEL_ENABLED=${KAMIWAZA_OTEL_ENABLED:-false})"
        return 0
    fi

    # Check if required architecture deployment exists
    local otel_compose_path="${KAMIWAZA_ROOT}/kamiwaza/deployment/kamiwaza-otel-collector/${ARCH}/docker-compose.yml"
    if [[ ! -f "$otel_compose_path" ]]; then
        log_error "OTEL docker-compose file not found: $otel_compose_path"
        log_error "Run 'bash copy-compose.sh' to set up deployment files"
        return 1
    fi

    # Check if config files exist
    local config_path="${KAMIWAZA_ROOT}/kamiwaza/deployment/kamiwaza-otel-collector/${ARCH}/config.yml"
    if [[ ! -f "$config_path" ]]; then
        log_error "OTEL config file not found: $config_path"
        return 1
    fi

    log_info "OTEL configuration validated successfully"
    return 0
}

# Get OTEL collector health status
get_otel_status() {
    if [[ "${KAMIWAZA_OTEL_ENABLED:-false}" != "true" ]]; then
        echo "disabled"
        return 0
    fi

    # Check if collector is reachable
    if command -v curl >/dev/null 2>&1; then
        if curl -sf "http://localhost:13133/health" >/dev/null 2>&1; then
            echo "healthy"
        else
            echo "unhealthy"
        fi
    else
        echo "unknown"
    fi
}

# Validate Loki configuration before startup
validate_loki_config() {
    if [[ "${KAMIWAZA_LOKI_ENABLED:-false}" != "true" ]]; then
        log_info "Loki is disabled (KAMIWAZA_LOKI_ENABLED=${KAMIWAZA_LOKI_ENABLED:-false})"
        return 0
    fi

    # Security: Require explicit Grafana admin password
    if [[ -z "${GRAFANA_ADMIN_PASSWORD:-}" ]]; then
        log_error "GRAFANA_ADMIN_PASSWORD must be set when KAMIWAZA_LOKI_ENABLED=true"
        log_error "Grafana observability stack requires an explicit admin password for security."
        log_error "Set GRAFANA_ADMIN_PASSWORD in env.sh or export it before starting Loki."
        log_error "Example: export GRAFANA_ADMIN_PASSWORD=\$(openssl rand -base64 32)"
        return 1
    fi

    # Check if required architecture deployment exists
    local loki_compose_path="${KAMIWAZA_ROOT}/kamiwaza/deployment/kamiwaza-loki/${ARCH}/docker-compose.yml"
    if [[ ! -f "$loki_compose_path" ]]; then
        log_error "Loki docker-compose file not found: $loki_compose_path"
        log_error "Run 'bash copy-compose.sh' to set up deployment files"
        return 1
    fi

    # Check if Loki config files exist
    local loki_config_path="${KAMIWAZA_ROOT}/kamiwaza/deployment/kamiwaza-loki/${ARCH}/loki-config.yml"
    if [[ ! -f "$loki_config_path" ]]; then
        log_error "Loki config file not found: $loki_config_path"
        return 1
    fi

    log_info "Loki configuration validated successfully"
    return 0
}

# Get Loki health status
get_loki_status() {
    if [[ "${KAMIWAZA_LOKI_ENABLED:-false}" != "true" ]]; then
        echo "disabled"
        return 0
    fi

    # Check if Loki is reachable
    if command -v curl >/dev/null 2>&1; then
        if curl -sf "http://localhost:3100/ready" >/dev/null 2>&1; then
            echo "healthy"
        else
            echo "unhealthy"
        fi
    else
        echo "unknown"
    fi
}

# Get Grafana health status (when Loki is enabled)
get_grafana_status() {
    if [[ "${KAMIWAZA_LOKI_ENABLED:-false}" != "true" ]]; then
        echo "disabled"
        return 0
    fi

    # Check if Grafana is reachable (updated to use port 3030)
    if command -v curl >/dev/null 2>&1; then
        if curl -sf "http://localhost:3030/api/health" >/dev/null 2>&1; then
            echo "healthy"
        else
            echo "unhealthy"
        fi
    else
        echo "unknown"
    fi
}

# Start OTEL Collector stack (includes Jaeger when enabled)
start_otel_stack() {
    if [[ "${KAMIWAZA_OTEL_ENABLED:-false}" != "true" ]]; then
        log_info "OTEL is disabled, skipping startup"
        return 0
    fi

    log_info "Starting OTEL Collector stack..."

    # Validate configuration first
    if ! validate_otel_config; then
        log_error "OTEL configuration validation failed"
        return 1
    fi

    # Navigate to architecture-specific deployment directory
    local otel_deploy_dir="${KAMIWAZA_ROOT}/kamiwaza/deployment/kamiwaza-otel-collector/${ARCH}"
    if [[ ! -d "$otel_deploy_dir" ]]; then
        log_error "OTEL deployment directory not found: $otel_deploy_dir"
        return 1
    fi

    cd "$otel_deploy_dir" || {
        log_error "Failed to change to OTEL deployment directory"
        return 1
    }

    # Determine which services to start based on Jaeger flag
    local compose_profiles=""
    local services_to_start="otel-collector"

    if [[ "${KAMIWAZA_JAEGER_ENABLED:-false}" == "true" ]]; then
        log_info "Starting OTEL Collector with Jaeger..."
        compose_profiles="jaeger"
        services_to_start="otel-collector jaeger"
    else
        log_info "Starting OTEL Collector without Jaeger..."
    fi

    # Start OTEL stack with docker compose (with profiles if needed)
    if [[ -n "$compose_profiles" ]]; then
        if COMPOSE_PROFILES="$compose_profiles" docker compose up -d; then
            log_info "OTEL stack started successfully with Jaeger"
            log_info "Jaeger UI available at: http://localhost:16686"
            log_info "OTLP endpoints: gRPC=localhost:4317, HTTP=localhost:4318"
            cd - > /dev/null || true
            return 0
        else
            log_error "Failed to start OTEL stack with Jaeger"
            cd - > /dev/null || true
            return 1
        fi
    else
        if docker compose up -d $services_to_start; then
            log_info "OTEL Collector started successfully"
            log_info "OTLP endpoints: gRPC=localhost:4317, HTTP=localhost:4318"
            cd - > /dev/null || true
            return 0
        else
            log_error "Failed to start OTEL Collector"
            cd - > /dev/null || true
            return 1
        fi
    fi
}

# Stop OTEL Collector stack
stop_otel_stack() {
    if [[ "${KAMIWAZA_OTEL_ENABLED:-false}" != "true" ]]; then
        log_info "OTEL is disabled, nothing to stop"
        return 0
    fi

    log_info "Stopping OTEL Collector stack..."

    local otel_deploy_dir="${KAMIWAZA_ROOT}/kamiwaza/deployment/kamiwaza-otel-collector/${ARCH}"
    if [[ ! -d "$otel_deploy_dir" ]]; then
        log_warn "OTEL deployment directory not found, nothing to stop"
        return 0
    fi

    cd "$otel_deploy_dir" || {
        log_error "Failed to change to OTEL deployment directory"
        return 1
    }

    # Stop all services (including Jaeger if it was started)
    if COMPOSE_PROFILES=jaeger docker compose down; then
        log_info "OTEL stack stopped successfully"
        cd - > /dev/null || true
        return 0
    else
        log_error "Failed to stop OTEL stack"
        cd - > /dev/null || true
        return 1
    fi
}

# Start Loki logging stack
start_loki_stack() {
    if [[ "${KAMIWAZA_LOKI_ENABLED:-false}" != "true" ]]; then
        log_info "Loki is disabled, skipping startup"
        return 0
    fi

    log_info "Starting Loki logging stack..."

    # Validate configuration first
    if ! validate_loki_config; then
        log_error "Loki configuration validation failed"
        return 1
    fi

    # Detect ARCH if not set
    local arch="${ARCH:-}"
    if [[ -z "$arch" ]]; then
        local arch_raw=$(uname -m)
        if [[ "$arch_raw" == "x86_64" ]]; then
            arch='amd64'
        elif [[ "$arch_raw" == "aarch64" || "$arch_raw" == "arm64" ]]; then
            arch='arm64'
        fi
    fi

    # Detect KAMIWAZA_ENV if not set
    local env="${KAMIWAZA_ENV:-default}"

    # Use environment-based deployment directory (consistent with containers-up.sh)
    local loki_deploy_dir="${KAMIWAZA_ROOT}/kamiwaza/deployment/envs/${env}/kamiwaza-loki/${arch}"
    if [[ ! -d "$loki_deploy_dir" ]]; then
        log_error "Loki deployment directory not found: $loki_deploy_dir"
        log_info "Run containers-up.sh to initialize environment-specific deployments"
        return 1
    fi

    # Ensure Grafana password is present for docker compose env substitution
    local grafana_password="${GRAFANA_ADMIN_PASSWORD:-}"
    if [[ -z "$grafana_password" ]]; then
        if is_dev_env; then
            grafana_password="kamiwaza"
        else
            log_error "GRAFANA_ADMIN_PASSWORD must be set to start Loki stack"
            return 1
        fi
    fi

    cd "$loki_deploy_dir" || {
        log_error "Failed to change to Loki deployment directory"
        return 1
    }

    local project_name="${env}-kamiwaza-loki"

    # Start Loki stack with docker compose
    log_info "Starting Loki and Grafana containers..."
    if GRAFANA_ADMIN_PASSWORD="$grafana_password" docker compose -p "$project_name" up -d; then
        log_info "Loki stack started successfully"
        log_info "Grafana UI available at: http://localhost:3030"
        log_info "Login with: admin / <your GRAFANA_ADMIN_PASSWORD>"

        # Return to original directory
        cd - > /dev/null || true
        return 0
    else
        log_error "Failed to start Loki stack"
        cd - > /dev/null || true
        return 1
    fi
}

# Stop Loki logging stack
stop_loki_stack() {
    if [[ "${KAMIWAZA_LOKI_ENABLED:-false}" != "true" ]]; then
        log_info "Loki is disabled, nothing to stop"
        return 0
    fi

    log_info "Stopping Loki logging stack..."

    local grafana_password="${GRAFANA_ADMIN_PASSWORD:-}"
    if [[ -z "$grafana_password" ]]; then
        if is_dev_env; then
            grafana_password="kamiwaza"
        else
            log_warn "GRAFANA_ADMIN_PASSWORD not set, using placeholder for stop operation"
            grafana_password="placeholder"
        fi
    fi

    local arch="${ARCH:-}"
    if [[ -z "$arch" ]]; then
        local arch_raw=$(uname -m)
        if [[ "$arch_raw" == "x86_64" ]]; then
            arch='amd64'
        elif [[ "$arch_raw" == "aarch64" || "$arch_raw" == "arm64" ]]; then
            arch='arm64'
        fi
    fi

    local env="${KAMIWAZA_ENV:-default}"

    local loki_deploy_dir="${KAMIWAZA_ROOT}/kamiwaza/deployment/envs/${env}/kamiwaza-loki/${arch}"
    if [[ ! -d "$loki_deploy_dir" ]]; then
        log_warn "Loki deployment directory not found: $loki_deploy_dir"
        return 0
    fi

    cd "$loki_deploy_dir" || {
        log_error "Failed to change to Loki deployment directory"
        return 1
    }

    local project_name="${env}-kamiwaza-loki"

    if GRAFANA_ADMIN_PASSWORD="$grafana_password" docker compose -p "$project_name" down --remove-orphans; then
        log_info "Loki stack stopped successfully"
        cd - > /dev/null || true
        return 0
    else
        log_error "Failed to stop Loki stack"
        cd - > /dev/null || true
        return 1
    fi
}
