#!/usr/bin/env bash

# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.

set -e

# Color codes for output formatting
GREEN='\033[0;32m'
YELLOW='\033[0;33m'
RED='\033[0;31m'
NC='\033[0m' # No Color
BOLD='\033[1m'

# Default values
PORT=5001
INFERENCE_MODEL="meta-llama/Llama-3.2-3B-Instruct"
OLLAMA_MODEL_ALIAS="llama3.2:3b-instruct-fp16"
OLLAMA_URL="http://localhost:11434"
CONTAINER_ENGINE=""

# Functions

print_banner() {
  echo -e "${BOLD}==================================================${NC}"
  echo -e "${BOLD}    Llama Stack Ollama Distribution Setup    ${NC}"
  echo -e "${BOLD}==================================================${NC}"
}

check_command() {
  command -v "$1" &> /dev/null
}

# Function to check prerequisites
check_prerequisites() {
  echo -e "\n${BOLD}Checking prerequisites...${NC}"

  # Check for container engine (Docker or Podman)
  if check_command docker; then
    echo -e "${GREEN}✓${NC} Docker is installed"
    CONTAINER_ENGINE="docker"
  elif check_command podman; then
    echo -e "${GREEN}✓${NC} Podman is installed"
    CONTAINER_ENGINE="podman"
  else
    echo -e "${RED}Error: Neither Docker nor Podman is installed. Please install one of them first.${NC}"
    echo "Visit https://docs.docker.com/get-docker/ or https://podman.io/getting-started/installation for installation instructions."
    exit 1
  fi

  # Check Python and pip
  if check_command python3; then
    PYTHON_VERSION=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[:2])))')
    if [[ $(echo "$PYTHON_VERSION >= 3.10" | bc -l) -eq 1 ]]; then
      echo -e "${GREEN}✓${NC} Python $PYTHON_VERSION is installed"
      HAS_PYTHON=true
    else
      echo -e "${YELLOW}Warning: Python $PYTHON_VERSION detected. Python 3.10+ recommended.${NC}"
      HAS_PYTHON=false
    fi
  else
    echo -e "${YELLOW}Warning: Python 3 is not found. Will use container for operations.${NC}"
    HAS_PYTHON=false
  fi

  # Check pip
  if [ "$HAS_PYTHON" = true ]; then
    if check_command pip || check_command pip3; then
      echo -e "${GREEN}✓${NC} pip is installed"
      HAS_PIP=true
    else
      echo -e "${YELLOW}Warning: pip is not found. Will use container for operations.${NC}"
      HAS_PIP=false
      HAS_PYTHON=false
    fi
  fi
}

# Function to install Ollama
install_ollama() {
  echo -e "\n${BOLD}Installing Ollama...${NC}"

  if check_command ollama; then
    echo -e "${GREEN}✓${NC} Ollama is already installed"
  else
    echo "Installing Ollama..."
    curl -fsSL https://ollama.com/install.sh | sh

    if [ $? -eq 0 ]; then
      echo -e "${GREEN}✓${NC} Ollama installed successfully"
    else
      echo -e "${RED}Error: Failed to install Ollama.${NC}"
      exit 1
    fi
  fi
}

# Function to start Ollama server
start_ollama() {
  echo -e "\n${BOLD}Starting Ollama server...${NC}"

  # Check if Ollama is already running
  if curl -s "$OLLAMA_URL" &> /dev/null; then
    echo -e "${GREEN}✓${NC} Ollama server is already running"
  else
    echo "Starting Ollama server..."
    ollama serve &

    # Wait for Ollama server to start
    MAX_RETRIES=30
    RETRY_COUNT=0

    while ! curl -s "$OLLAMA_URL" &> /dev/null; do
      sleep 1
      RETRY_COUNT=$((RETRY_COUNT + 1))

      if [ $RETRY_COUNT -ge $MAX_RETRIES ]; then
        echo -e "${RED}Error: Ollama server failed to start after $MAX_RETRIES seconds.${NC}"
        exit 1
      fi
    done

    echo -e "${GREEN}✓${NC} Ollama server started successfully"
  fi
}

# Function to pull models
pull_models() {
  echo -e "\n${BOLD}Pulling and running Llama model in Ollama...${NC}"

  # Pull model
  echo "Pulling $INFERENCE_MODEL model as $OLLAMA_MODEL_ALIAS..."
  ollama pull $OLLAMA_MODEL_ALIAS
  if [ $? -ne 0 ]; then
    echo -e "${RED}Error: Failed to pull $OLLAMA_MODEL_ALIAS model.${NC}"
    exit 1
  fi

  # Kill any existing model processes
  pkill -f "ollama run $OLLAMA_MODEL_ALIAS" || true

  # Start model in background
  echo "Starting inference model..."
  nohup ollama run $OLLAMA_MODEL_ALIAS --keepalive 60m > /dev/null 2>&1 &

  # Verify model is running by checking the Ollama API
  echo "Waiting for model to start (this may take a minute)..."

  MAX_RETRIES=30
  RETRY_DELAY=2

  # Wait for model to appear in the Ollama API
  for i in $(seq 1 $MAX_RETRIES); do
    echo -n "."
    MODELS_RUNNING=$(curl -s "$OLLAMA_URL/api/ps" | grep -E "$OLLAMA_MODEL_ALIAS" | wc -l)

    if [ "$MODELS_RUNNING" -ge 1 ]; then
      echo -e "\n${GREEN}✓${NC} Model is running successfully"
      break
    fi

    if [ $i -eq $MAX_RETRIES ]; then
      echo -e "\n${RED}Error: Model failed to start within the expected time.${NC}"
      exit 1
    fi

    sleep $RETRY_DELAY
  done
}

# Function to set up Python environment and install llama-stack-client
setup_llama_stack_cli() {
  echo -e "\n${BOLD}Setting up llama-stack environment...${NC}"

  # Create virtual environment
  echo "Creating Python virtual environment..."
  VENV_DIR="$HOME/.venv/llama-stack"

  if [ -d "$VENV_DIR" ]; then
    echo "Virtual environment already exists at $VENV_DIR"
  else
    python3 -m venv "$VENV_DIR"
    if [ $? -ne 0 ]; then
      echo -e "${RED}Error: Failed to create virtual environment.${NC}"
      exit 1
    else
      echo -e "${GREEN}✓${NC} Virtual environment created successfully"
    fi
  fi

  # Activate virtual environment and install packages
  source "$VENV_DIR/bin/activate"

  echo "Installing llama-stack-client..."
  pip install --upgrade pip
  pip install llama-stack-client

  if [ $? -eq 0 ]; then
    echo -e "${GREEN}✓${NC} llama-stack-client installed successfully"

    # Configure the client to point to the correct server
    echo "Configuring llama-stack-client..."
    llama-stack-client configure --endpoint "http://localhost:$PORT"

    if [ $? -eq 0 ]; then
      echo -e "${GREEN}✓${NC} llama-stack-client configured to use http://localhost:$PORT"
      # Set environment variable for CLI use
      export LLAMA_STACK_BASE_URL="http://localhost:$PORT"
      # Add to shell config if it exists
      if [ -f "$HOME/.bashrc" ]; then
        grep -q "LLAMA_STACK_BASE_URL" "$HOME/.bashrc" || echo "export LLAMA_STACK_BASE_URL=\"http://localhost:$PORT\"" >> "$HOME/.bashrc"
      elif [ -f "$HOME/.zshrc" ]; then
        grep -q "LLAMA_STACK_BASE_URL" "$HOME/.zshrc" || echo "export LLAMA_STACK_BASE_URL=\"http://localhost:$PORT\"" >> "$HOME/.zshrc"
      fi
    else
      echo -e "${YELLOW}Warning: Failed to configure llama-stack-client. You may need to run 'llama-stack-client configure --endpoint http://localhost:$PORT' manually.${NC}"
    fi
  else
    echo -e "${RED}Error: Failed to install llama-stack-client.${NC}"
    exit 1
  fi
}

# Function to run a test inference
run_test_inference() {
  # Run a test inference to verify everything is working
  echo -e "\n${BOLD}Running test inference...${NC}"

  # Show the query being sent
  TEST_QUERY="hello, what model are you?"
  echo -e "${BOLD}Query:${NC} \"$TEST_QUERY\""

  # Send the query and capture the result
  echo -e "${BOLD}Sending request...${NC}"
  TEST_RESULT=$(llama-stack-client inference chat-completion --message "$TEST_QUERY" 2>&1)

  # Display the full result
  echo -e "\n${BOLD}Response:${NC}"
  echo "$TEST_RESULT"

  if [[ $? -eq 0 && "$TEST_RESULT" == *"content"* ]]; then
    echo -e "\n${GREEN}✓${NC} Test inference successful! Response received from the model."
    echo -e "${BOLD}Everything is working correctly!${NC}"
  else
    echo -e "\n${YELLOW}Warning: Test inference might have failed.${NC}"
    echo -e "You can try running a test manually after activation:"
    echo -e "${YELLOW}source $VENV_DIR/bin/activate${NC}"
    echo -e "${YELLOW}llama-stack-client inference chat-completion --message \"hello, what model are you?\"${NC}"
  fi
}

# Function to run the llama-stack server
run_llama_stack() {
  echo -e "\n${BOLD}Starting Llama Stack server...${NC}"

  mkdir -p "$HOME/.llama"

  # Check if container already exists
  CONTAINER_NAME="llama-stack-ollama"
  CONTAINER_EXISTS=false
  CONTAINER_RUNNING=false

  if [ "$CONTAINER_ENGINE" = "docker" ]; then
    if docker ps -a --format '{{.Names}}' | grep -q "^$CONTAINER_NAME$"; then
      CONTAINER_EXISTS=true
      if docker ps --format '{{.Names}}' | grep -q "^$CONTAINER_NAME$"; then
        CONTAINER_RUNNING=true
      fi
    fi
  elif [ "$CONTAINER_ENGINE" = "podman" ]; then
    if podman ps -a --format '{{.Names}}' | grep -q "^$CONTAINER_NAME$"; then
      CONTAINER_EXISTS=true
      if podman ps --format '{{.Names}}' | grep -q "^$CONTAINER_NAME$"; then
        CONTAINER_RUNNING=true
      fi
    fi
  fi

  # Handle existing container
  if [ "$CONTAINER_EXISTS" = true ]; then
    if [ "$CONTAINER_RUNNING" = true ]; then
      echo -e "${YELLOW}Container $CONTAINER_NAME is already running${NC}"
      echo -e "${GREEN}✓${NC} Llama Stack server is already running"

      echo -e "\n${BOLD}Access Information:${NC}"
      echo -e "  • API URL: ${GREEN}http://localhost:$PORT${NC}"
      echo -e "  • Inference Model: ${GREEN}$INFERENCE_MODEL${NC}"
      echo -e "  • Ollama URL: ${GREEN}$OLLAMA_URL${NC}"

      echo -e "\n${BOLD}Management Commands:${NC}"
      echo -e "  • Stop Llama Stack:  ${YELLOW}${CONTAINER_ENGINE} stop $CONTAINER_NAME${NC}"
      echo -e "  • Start Llama Stack: ${YELLOW}${CONTAINER_ENGINE} start $CONTAINER_NAME${NC}"
      echo -e "  • View Logs:         ${YELLOW}${CONTAINER_ENGINE} logs $CONTAINER_NAME${NC}"
      echo -e "  • Stop Ollama:       ${YELLOW}pkill ollama${NC}"

      # Run a test inference
      run_test_inference

      return 0
    else
      echo -e "${YELLOW}Container $CONTAINER_NAME exists but is not running${NC}"
      if [ "$CONTAINER_ENGINE" = "docker" ]; then
        echo "Removing existing container..."
        docker rm $CONTAINER_NAME
      elif [ "$CONTAINER_ENGINE" = "podman" ]; then
        echo "Removing existing container..."
        podman rm $CONTAINER_NAME
      fi
    fi
  fi

  # Set the correct host value based on container engine
  if [ "$CONTAINER_ENGINE" = "docker" ]; then
    if [[ "$OSTYPE" == "linux-gnu"* ]]; then
      # Linux with Docker should use host network
      echo "Running Llama Stack server on Linux with Docker..."
      docker run -d \
        --name $CONTAINER_NAME \
        -p $PORT:$PORT \
        -v "$HOME/.llama:/root/.llama" \
        --network=host \
        llamastack/distribution-ollama \
        --port $PORT \
        --env INFERENCE_MODEL=$INFERENCE_MODEL \
        --env OLLAMA_URL=http://localhost:11434
    else
      # macOS/Windows with Docker should use host.docker.internal
      echo "Running Llama Stack server with Docker..."
      docker run -d \
        --name $CONTAINER_NAME \
        -p $PORT:$PORT \
        -v "$HOME/.llama:/root/.llama" \
        llamastack/distribution-ollama \
        --port $PORT \
        --env INFERENCE_MODEL=$INFERENCE_MODEL \
        --env OLLAMA_URL=http://host.docker.internal:11434
    fi
  elif [ "$CONTAINER_ENGINE" = "podman" ]; then
    # Check podman version for proper host naming
    PODMAN_VERSION=$(podman --version | awk '{print $3}')
    if [[ $(echo "$PODMAN_VERSION >= 4.7.0" | bc -l) -eq 1 ]]; then
      HOST_NAME="host.docker.internal"
    else
      HOST_NAME="host.containers.internal"
    fi

    echo "Running Llama Stack server with Podman..."
    podman run -d \
      --name $CONTAINER_NAME \
      -p $PORT:$PORT \
      -v "$HOME/.llama:/root/.llama:Z" \
      llamastack/distribution-ollama \
      --port $PORT \
      --env INFERENCE_MODEL=$INFERENCE_MODEL \
      --env OLLAMA_URL=http://$HOST_NAME:11434
  fi

  if [ $? -eq 0 ]; then
    echo -e "${GREEN}✓${NC} Llama Stack server started successfully"

    echo -e "\n${BOLD}Setup Complete!${NC}"
    echo -e "\n${BOLD}Access Information:${NC}"
    echo -e "  • API URL: ${GREEN}http://localhost:$PORT${NC}"
    echo -e "  • Inference Model: ${GREEN}$INFERENCE_MODEL${NC}"
    echo -e "  • Ollama URL: ${GREEN}$OLLAMA_URL${NC}"

    echo -e "\n${BOLD}Management Commands:${NC}"
    echo -e "  • Stop Llama Stack:  ${YELLOW}${CONTAINER_ENGINE} stop $CONTAINER_NAME${NC}"
    echo -e "  • Start Llama Stack: ${YELLOW}${CONTAINER_ENGINE} start $CONTAINER_NAME${NC}"
    echo -e "  • View Logs:         ${YELLOW}${CONTAINER_ENGINE} logs $CONTAINER_NAME${NC}"
    echo -e "  • Stop Ollama:       ${YELLOW}pkill ollama${NC}"

    echo -e "\n${BOLD}Using Llama Stack Client:${NC}"
    echo -e "1. Activate the virtual environment: ${YELLOW}source $VENV_DIR/bin/activate${NC}"
    echo -e "2. Set the server URL: ${YELLOW}export LLAMA_STACK_BASE_URL=http://localhost:$PORT${NC}"
    echo -e "3. Run client commands: ${YELLOW}llama-stack-client --help${NC}"

    # Run a test inference
    run_test_inference
  else
    echo -e "${RED}Error: Failed to start Llama Stack server.${NC}"
    exit 1
  fi
}

# Main installation flow
main() {
  print_banner
  check_prerequisites
  install_ollama
  start_ollama
  pull_models
  setup_llama_stack_cli
  run_llama_stack
}

# Run main function
main