From e70c4e67de24d7d2e1af6af5edfe573adf019f95 Mon Sep 17 00:00:00 2001
From: reluctantfuturist <alexey.b.rybak@gmail.com>
Date: Wed, 16 Apr 2025 12:05:58 -0700
Subject: [PATCH] =?UTF-8?q?refactor(install):=20simplify=20demo=20to=20two?=
 =?UTF-8?q?=E2=80=91container=20flow,=20drop=20host=E2=80=91level=20instal?=
 =?UTF-8?q?ls?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 install.sh | 434 +++++++----------------------------------------------
 1 file changed, 51 insertions(+), 383 deletions(-)

diff --git a/install.sh b/install.sh
index bd4ea65ba..aa63f9cd9 100644
--- a/install.sh
+++ b/install.sh
@@ -1,401 +1,69 @@
 #!/usr/bin/env bash
-
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-set -e
+set -Eeuo pipefail
 
-# Color codes for output formatting
-GREEN='\033[0;32m'
-YELLOW='\033[0;33m'
-RED='\033[0;31m'
-NC='\033[0m' # No Color
-BOLD='\033[1m'
+PORT=8321
+OLLAMA_PORT=11434
+MODEL_ALIAS="llama3.2:1b"
+SERVER_IMAGE="llamastack/distribution-ollama:0.2.2"
+WAIT_TIMEOUT=300
 
-# Default values
-PORT=5001
-INFERENCE_MODEL="meta-llama/Llama-3.2-3B-Instruct"
-OLLAMA_MODEL_ALIAS="llama3.2:3b-instruct-fp16"
-OLLAMA_URL="http://localhost:11434"
-CONTAINER_ENGINE=""
+log(){ printf "\e[1;32m%s\e[0m\n" "$*"; }
+die(){ printf "\e[1;31m❌ %s\e[0m\n" "$*" >&2; exit 1; }
 
-# Functions
+command -v docker >/dev/null || die "Docker is required but not found."
 
-print_banner() {
-  echo -e "${BOLD}==================================================${NC}"
-  echo -e "${BOLD}    Llama Stack Ollama Distribution Setup    ${NC}"
-  echo -e "${BOLD}==================================================${NC}"
-}
+# clean up any leftovers from earlier runs
+for name in ollama-server llama-stack; do
+  docker ps -aq --filter "name=^${name}$" | xargs -r docker rm -f
+done
 
-check_command() {
-  command -v "$1" &> /dev/null
-}
+###############################################################################
+# 1. Ollama
+###############################################################################
+log "🦙  Starting Ollama…"
+docker run -d --name ollama-server -p "$OLLAMA_PORT:11434" \
+  -v ollama-models:/root/.ollama \
+  ollama/ollama >/dev/null
 
-# Function to check prerequisites
-check_prerequisites() {
-  echo -e "\n${BOLD}Checking prerequisites...${NC}"
+log "⏳  Waiting for Ollama daemon…"
+timeout "$WAIT_TIMEOUT" bash -c \
+  "until curl -fsS http://localhost:${OLLAMA_PORT}/ 2>/dev/null | grep -q 'Ollama'; do sleep 1; done" \
+  || die "Ollama did not become ready in ${WAIT_TIMEOUT}s"
 
-  # Check for container engine (Docker or Podman)
-  if check_command docker; then
-    echo -e "${GREEN}✓${NC} Docker is installed"
-    CONTAINER_ENGINE="docker"
-  elif check_command podman; then
-    echo -e "${GREEN}✓${NC} Podman is installed"
-    CONTAINER_ENGINE="podman"
-  else
-    echo -e "${RED}Error: Neither Docker nor Podman is installed. Please install one of them first.${NC}"
-    echo "Visit https://docs.docker.com/get-docker/ or https://podman.io/getting-started/installation for installation instructions."
-    exit 1
-  fi
+if ! docker exec ollama-server ollama list | grep -q "$MODEL_ALIAS"; then
+  log "📦  Pulling model $MODEL_ALIAS…"
+  docker exec ollama-server ollama pull "$MODEL_ALIAS"
+fi
 
-  # Check Python and pip
-  if check_command python3; then
-    PYTHON_VERSION=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[:2])))')
-    if [[ $(echo "$PYTHON_VERSION >= 3.10" | bc -l) -eq 1 ]]; then
-      echo -e "${GREEN}✓${NC} Python $PYTHON_VERSION is installed"
-      HAS_PYTHON=true
-    else
-      echo -e "${YELLOW}Warning: Python $PYTHON_VERSION detected. Python 3.10+ recommended.${NC}"
-      HAS_PYTHON=false
-    fi
-  else
-    echo -e "${YELLOW}Warning: Python 3 is not found. Will use container for operations.${NC}"
-    HAS_PYTHON=false
-  fi
+log "🚀  Launching model runtime…"
+docker exec -d ollama-server ollama run "$MODEL_ALIAS" --keepalive 60m
 
-  # Check pip
-  if [ "$HAS_PYTHON" = true ]; then
-    if check_command pip || check_command pip3; then
-      echo -e "${GREEN}✓${NC} pip is installed"
-      HAS_PIP=true
-    else
-      echo -e "${YELLOW}Warning: pip is not found. Will use container for operations.${NC}"
-      HAS_PIP=false
-      HAS_PYTHON=false
-    fi
-  fi
-}
+###############################################################################
+# 2. Llama‑Stack
+###############################################################################
+log "🦙📦  Starting Llama‑Stack…"
+docker run -d --name llama-stack \
+  -p "$PORT:$PORT" \
+  --add-host=host.docker.internal:host-gateway \
+  "$SERVER_IMAGE" \
+  --port "$PORT" \
+  --env INFERENCE_MODEL="$MODEL_ALIAS" \
+  --env OLLAMA_URL="http://host.docker.internal:${OLLAMA_PORT}" >/dev/null
 
-# Function to install Ollama
-install_ollama() {
-  echo -e "\n${BOLD}Installing Ollama...${NC}"
+log "⏳  Waiting for Llama‑Stack API…"
+timeout "$WAIT_TIMEOUT" bash -c \
+  "until curl -fsS http://localhost:${PORT}/v1/health 2>/dev/null | grep -q 'OK'; do sleep 1; done" \
+  || die "Llama‑Stack did not become ready in ${WAIT_TIMEOUT}s"
 
-  if check_command ollama; then
-    echo -e "${GREEN}✓${NC} Ollama is already installed"
-  else
-    echo "Installing Ollama..."
-    curl -fsSL https://ollama.com/install.sh | sh
-
-    if [ $? -eq 0 ]; then
-      echo -e "${GREEN}✓${NC} Ollama installed successfully"
-    else
-      echo -e "${RED}Error: Failed to install Ollama.${NC}"
-      exit 1
-    fi
-  fi
-}
-
-# Function to start Ollama server
-start_ollama() {
-  echo -e "\n${BOLD}Starting Ollama server...${NC}"
-
-  # Check if Ollama is already running
-  if curl -s "$OLLAMA_URL" &> /dev/null; then
-    echo -e "${GREEN}✓${NC} Ollama server is already running"
-  else
-    echo "Starting Ollama server..."
-    ollama serve &
-
-    # Wait for Ollama server to start
-    MAX_RETRIES=30
-    RETRY_COUNT=0
-
-    while ! curl -s "$OLLAMA_URL" &> /dev/null; do
-      sleep 1
-      RETRY_COUNT=$((RETRY_COUNT + 1))
-
-      if [ $RETRY_COUNT -ge $MAX_RETRIES ]; then
-        echo -e "${RED}Error: Ollama server failed to start after $MAX_RETRIES seconds.${NC}"
-        exit 1
-      fi
-    done
-
-    echo -e "${GREEN}✓${NC} Ollama server started successfully"
-  fi
-}
-
-# Function to pull models
-pull_models() {
-  echo -e "\n${BOLD}Pulling and running Llama model in Ollama...${NC}"
-
-  # Pull model
-  echo "Pulling $INFERENCE_MODEL model as $OLLAMA_MODEL_ALIAS..."
-  ollama pull $OLLAMA_MODEL_ALIAS
-  if [ $? -ne 0 ]; then
-    echo -e "${RED}Error: Failed to pull $OLLAMA_MODEL_ALIAS model.${NC}"
-    exit 1
-  fi
-
-  # Kill any existing model processes
-  pkill -f "ollama run $OLLAMA_MODEL_ALIAS" || true
-
-  # Start model in background
-  echo "Starting inference model..."
-  nohup ollama run $OLLAMA_MODEL_ALIAS --keepalive 60m > /dev/null 2>&1 &
-
-  # Verify model is running by checking the Ollama API
-  echo "Waiting for model to start (this may take a minute)..."
-
-  MAX_RETRIES=30
-  RETRY_DELAY=2
-
-  # Wait for model to appear in the Ollama API
-  for i in $(seq 1 $MAX_RETRIES); do
-    echo -n "."
-    MODELS_RUNNING=$(curl -s "$OLLAMA_URL/api/ps" | grep -E "$OLLAMA_MODEL_ALIAS" | wc -l)
-
-    if [ "$MODELS_RUNNING" -ge 1 ]; then
-      echo -e "\n${GREEN}✓${NC} Model is running successfully"
-      break
-    fi
-
-    if [ $i -eq $MAX_RETRIES ]; then
-      echo -e "\n${RED}Error: Model failed to start within the expected time.${NC}"
-      exit 1
-    fi
-
-    sleep $RETRY_DELAY
-  done
-}
-
-# Function to set up Python environment and install llama-stack-client
-setup_llama_stack_cli() {
-  echo -e "\n${BOLD}Setting up llama-stack environment...${NC}"
-
-  # Create virtual environment
-  echo "Creating Python virtual environment..."
-  VENV_DIR="$HOME/.venv/llama-stack"
-
-  if [ -d "$VENV_DIR" ]; then
-    echo "Virtual environment already exists at $VENV_DIR"
-  else
-    python3 -m venv "$VENV_DIR"
-    if [ $? -ne 0 ]; then
-      echo -e "${RED}Error: Failed to create virtual environment.${NC}"
-      exit 1
-    else
-      echo -e "${GREEN}✓${NC} Virtual environment created successfully"
-    fi
-  fi
-
-  # Activate virtual environment and install packages
-  source "$VENV_DIR/bin/activate"
-
-  echo "Installing llama-stack-client..."
-  pip install --upgrade pip
-  pip install llama-stack-client
-
-  if [ $? -eq 0 ]; then
-    echo -e "${GREEN}✓${NC} llama-stack-client installed successfully"
-
-    # Configure the client to point to the correct server
-    echo "Configuring llama-stack-client..."
-    llama-stack-client configure --endpoint "http://localhost:$PORT"
-
-    if [ $? -eq 0 ]; then
-      echo -e "${GREEN}✓${NC} llama-stack-client configured to use http://localhost:$PORT"
-      # Set environment variable for CLI use
-      export LLAMA_STACK_BASE_URL="http://localhost:$PORT"
-      # Add to shell config if it exists
-      if [ -f "$HOME/.bashrc" ]; then
-        grep -q "LLAMA_STACK_BASE_URL" "$HOME/.bashrc" || echo "export LLAMA_STACK_BASE_URL=\"http://localhost:$PORT\"" >> "$HOME/.bashrc"
-      elif [ -f "$HOME/.zshrc" ]; then
-        grep -q "LLAMA_STACK_BASE_URL" "$HOME/.zshrc" || echo "export LLAMA_STACK_BASE_URL=\"http://localhost:$PORT\"" >> "$HOME/.zshrc"
-      fi
-    else
-      echo -e "${YELLOW}Warning: Failed to configure llama-stack-client. You may need to run 'llama-stack-client configure --endpoint http://localhost:$PORT' manually.${NC}"
-    fi
-  else
-    echo -e "${RED}Error: Failed to install llama-stack-client.${NC}"
-    exit 1
-  fi
-}
-
-# Function to run a test inference
-run_test_inference() {
-  # Run a test inference to verify everything is working
-  echo -e "\n${BOLD}Running test inference...${NC}"
-
-  # Show the query being sent
-  TEST_QUERY="hello, what model are you?"
-  echo -e "${BOLD}Query:${NC} \"$TEST_QUERY\""
-
-  # Send the query and capture the result
-  echo -e "${BOLD}Sending request...${NC}"
-  TEST_RESULT=$(llama-stack-client inference chat-completion --message "$TEST_QUERY" 2>&1)
-
-  # Display the full result
-  echo -e "\n${BOLD}Response:${NC}"
-  echo "$TEST_RESULT"
-
-  if [[ $? -eq 0 && "$TEST_RESULT" == *"content"* ]]; then
-    echo -e "\n${GREEN}✓${NC} Test inference successful! Response received from the model."
-    echo -e "${BOLD}Everything is working correctly!${NC}"
-  else
-    echo -e "\n${YELLOW}Warning: Test inference might have failed.${NC}"
-    echo -e "You can try running a test manually after activation:"
-    echo -e "${YELLOW}source $VENV_DIR/bin/activate${NC}"
-    echo -e "${YELLOW}llama-stack-client inference chat-completion --message \"hello, what model are you?\"${NC}"
-  fi
-}
-
-# Function to run the llama-stack server
-run_llama_stack() {
-  echo -e "\n${BOLD}Starting Llama Stack server...${NC}"
-
-  mkdir -p "$HOME/.llama"
-
-  # Check if container already exists
-  CONTAINER_NAME="llama-stack-ollama"
-  CONTAINER_EXISTS=false
-  CONTAINER_RUNNING=false
-
-  if [ "$CONTAINER_ENGINE" = "docker" ]; then
-    if docker ps -a --format '{{.Names}}' | grep -q "^$CONTAINER_NAME$"; then
-      CONTAINER_EXISTS=true
-      if docker ps --format '{{.Names}}' | grep -q "^$CONTAINER_NAME$"; then
-        CONTAINER_RUNNING=true
-      fi
-    fi
-  elif [ "$CONTAINER_ENGINE" = "podman" ]; then
-    if podman ps -a --format '{{.Names}}' | grep -q "^$CONTAINER_NAME$"; then
-      CONTAINER_EXISTS=true
-      if podman ps --format '{{.Names}}' | grep -q "^$CONTAINER_NAME$"; then
-        CONTAINER_RUNNING=true
-      fi
-    fi
-  fi
-
-  # Handle existing container
-  if [ "$CONTAINER_EXISTS" = true ]; then
-    if [ "$CONTAINER_RUNNING" = true ]; then
-      echo -e "${YELLOW}Container $CONTAINER_NAME is already running${NC}"
-      echo -e "${GREEN}✓${NC} Llama Stack server is already running"
-
-      echo -e "\n${BOLD}Access Information:${NC}"
-      echo -e "  • API URL: ${GREEN}http://localhost:$PORT${NC}"
-      echo -e "  • Inference Model: ${GREEN}$INFERENCE_MODEL${NC}"
-      echo -e "  • Ollama URL: ${GREEN}$OLLAMA_URL${NC}"
-
-      echo -e "\n${BOLD}Management Commands:${NC}"
-      echo -e "  • Stop Llama Stack:  ${YELLOW}${CONTAINER_ENGINE} stop $CONTAINER_NAME${NC}"
-      echo -e "  • Start Llama Stack: ${YELLOW}${CONTAINER_ENGINE} start $CONTAINER_NAME${NC}"
-      echo -e "  • View Logs:         ${YELLOW}${CONTAINER_ENGINE} logs $CONTAINER_NAME${NC}"
-      echo -e "  • Stop Ollama:       ${YELLOW}pkill ollama${NC}"
-
-      # Run a test inference
-      run_test_inference
-
-      return 0
-    else
-      echo -e "${YELLOW}Container $CONTAINER_NAME exists but is not running${NC}"
-      if [ "$CONTAINER_ENGINE" = "docker" ]; then
-        echo "Removing existing container..."
-        docker rm $CONTAINER_NAME
-      elif [ "$CONTAINER_ENGINE" = "podman" ]; then
-        echo "Removing existing container..."
-        podman rm $CONTAINER_NAME
-      fi
-    fi
-  fi
-
-  # Set the correct host value based on container engine
-  if [ "$CONTAINER_ENGINE" = "docker" ]; then
-    if [[ "$OSTYPE" == "linux-gnu"* ]]; then
-      # Linux with Docker should use host network
-      echo "Running Llama Stack server on Linux with Docker..."
-      docker run -d \
-        --name $CONTAINER_NAME \
-        -p $PORT:$PORT \
-        -v "$HOME/.llama:/root/.llama" \
-        --network=host \
-        llamastack/distribution-ollama \
-        --port $PORT \
-        --env INFERENCE_MODEL=$INFERENCE_MODEL \
-        --env OLLAMA_URL=http://localhost:11434
-    else
-      # macOS/Windows with Docker should use host.docker.internal
-      echo "Running Llama Stack server with Docker..."
-      docker run -d \
-        --name $CONTAINER_NAME \
-        -p $PORT:$PORT \
-        -v "$HOME/.llama:/root/.llama" \
-        llamastack/distribution-ollama \
-        --port $PORT \
-        --env INFERENCE_MODEL=$INFERENCE_MODEL \
-        --env OLLAMA_URL=http://host.docker.internal:11434
-    fi
-  elif [ "$CONTAINER_ENGINE" = "podman" ]; then
-    # Check podman version for proper host naming
-    PODMAN_VERSION=$(podman --version | awk '{print $3}')
-    if [[ $(echo "$PODMAN_VERSION >= 4.7.0" | bc -l) -eq 1 ]]; then
-      HOST_NAME="host.docker.internal"
-    else
-      HOST_NAME="host.containers.internal"
-    fi
-
-    echo "Running Llama Stack server with Podman..."
-    podman run -d \
-      --name $CONTAINER_NAME \
-      -p $PORT:$PORT \
-      -v "$HOME/.llama:/root/.llama:Z" \
-      llamastack/distribution-ollama \
-      --port $PORT \
-      --env INFERENCE_MODEL=$INFERENCE_MODEL \
-      --env OLLAMA_URL=http://$HOST_NAME:11434
-  fi
-
-  if [ $? -eq 0 ]; then
-    echo -e "${GREEN}✓${NC} Llama Stack server started successfully"
-
-    echo -e "\n${BOLD}Setup Complete!${NC}"
-    echo -e "\n${BOLD}Access Information:${NC}"
-    echo -e "  • API URL: ${GREEN}http://localhost:$PORT${NC}"
-    echo -e "  • Inference Model: ${GREEN}$INFERENCE_MODEL${NC}"
-    echo -e "  • Ollama URL: ${GREEN}$OLLAMA_URL${NC}"
-
-    echo -e "\n${BOLD}Management Commands:${NC}"
-    echo -e "  • Stop Llama Stack:  ${YELLOW}${CONTAINER_ENGINE} stop $CONTAINER_NAME${NC}"
-    echo -e "  • Start Llama Stack: ${YELLOW}${CONTAINER_ENGINE} start $CONTAINER_NAME${NC}"
-    echo -e "  • View Logs:         ${YELLOW}${CONTAINER_ENGINE} logs $CONTAINER_NAME${NC}"
-    echo -e "  • Stop Ollama:       ${YELLOW}pkill ollama${NC}"
-
-    echo -e "\n${BOLD}Using Llama Stack Client:${NC}"
-    echo -e "1. Activate the virtual environment: ${YELLOW}source $VENV_DIR/bin/activate${NC}"
-    echo -e "2. Set the server URL: ${YELLOW}export LLAMA_STACK_BASE_URL=http://localhost:$PORT${NC}"
-    echo -e "3. Run client commands: ${YELLOW}llama-stack-client --help${NC}"
-
-    # Run a test inference
-    run_test_inference
-  else
-    echo -e "${RED}Error: Failed to start Llama Stack server.${NC}"
-    exit 1
-  fi
-}
-
-# Main installation flow
-main() {
-  print_banner
-  check_prerequisites
-  install_ollama
-  start_ollama
-  pull_models
-  setup_llama_stack_cli
-  run_llama_stack
-}
-
-# Run main function
-main
+###############################################################################
+# Done
+###############################################################################
+log ""
+log "🎉  Llama‑Stack is ready!"
+log "👉  API endpoint: http://localhost:${PORT}"