From e70c4e67de24d7d2e1af6af5edfe573adf019f95 Mon Sep 17 00:00:00 2001 From: reluctantfuturist Date: Wed, 16 Apr 2025 12:05:58 -0700 Subject: [PATCH] =?UTF-8?q?refactor(install):=20simplify=20demo=20to=20two?= =?UTF-8?q?=E2=80=91container=20flow,=20drop=20host=E2=80=91level=20instal?= =?UTF-8?q?ls?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- install.sh | 434 +++++++---------------------------------------------- 1 file changed, 51 insertions(+), 383 deletions(-) diff --git a/install.sh b/install.sh index bd4ea65ba..aa63f9cd9 100644 --- a/install.sh +++ b/install.sh @@ -1,401 +1,69 @@ #!/usr/bin/env bash - # Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -set -e +set -Eeuo pipefail -# Color codes for output formatting -GREEN='\033[0;32m' -YELLOW='\033[0;33m' -RED='\033[0;31m' -NC='\033[0m' # No Color -BOLD='\033[1m' +PORT=8321 +OLLAMA_PORT=11434 +MODEL_ALIAS="llama3.2:1b" +SERVER_IMAGE="llamastack/distribution-ollama:0.2.2" +WAIT_TIMEOUT=300 -# Default values -PORT=5001 -INFERENCE_MODEL="meta-llama/Llama-3.2-3B-Instruct" -OLLAMA_MODEL_ALIAS="llama3.2:3b-instruct-fp16" -OLLAMA_URL="http://localhost:11434" -CONTAINER_ENGINE="" +log(){ printf "\e[1;32m%s\e[0m\n" "$*"; } +die(){ printf "\e[1;31m❌ %s\e[0m\n" "$*" >&2; exit 1; } -# Functions +command -v docker >/dev/null || die "Docker is required but not found." -print_banner() { - echo -e "${BOLD}==================================================${NC}" - echo -e "${BOLD} Llama Stack Ollama Distribution Setup ${NC}" - echo -e "${BOLD}==================================================${NC}" -} +# clean up any leftovers from earlier runs +for name in ollama-server llama-stack; do + docker ps -aq --filter "name=^${name}$" | xargs -r docker rm -f +done -check_command() { - command -v "$1" &> /dev/null -} +############################################################################### +# 1. Ollama +############################################################################### +log "🦙 Starting Ollama…" +docker run -d --name ollama-server -p "$OLLAMA_PORT:11434" \ + -v ollama-models:/root/.ollama \ + ollama/ollama >/dev/null -# Function to check prerequisites -check_prerequisites() { - echo -e "\n${BOLD}Checking prerequisites...${NC}" +log "⏳ Waiting for Ollama daemon…" +timeout "$WAIT_TIMEOUT" bash -c \ + "until curl -fsS http://localhost:${OLLAMA_PORT}/ 2>/dev/null | grep -q 'Ollama'; do sleep 1; done" \ + || die "Ollama did not become ready in ${WAIT_TIMEOUT}s" - # Check for container engine (Docker or Podman) - if check_command docker; then - echo -e "${GREEN}✓${NC} Docker is installed" - CONTAINER_ENGINE="docker" - elif check_command podman; then - echo -e "${GREEN}✓${NC} Podman is installed" - CONTAINER_ENGINE="podman" - else - echo -e "${RED}Error: Neither Docker nor Podman is installed. Please install one of them first.${NC}" - echo "Visit https://docs.docker.com/get-docker/ or https://podman.io/getting-started/installation for installation instructions." - exit 1 - fi +if ! docker exec ollama-server ollama list | grep -q "$MODEL_ALIAS"; then + log "📦 Pulling model $MODEL_ALIAS…" + docker exec ollama-server ollama pull "$MODEL_ALIAS" +fi - # Check Python and pip - if check_command python3; then - PYTHON_VERSION=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[:2])))') - if [[ $(echo "$PYTHON_VERSION >= 3.10" | bc -l) -eq 1 ]]; then - echo -e "${GREEN}✓${NC} Python $PYTHON_VERSION is installed" - HAS_PYTHON=true - else - echo -e "${YELLOW}Warning: Python $PYTHON_VERSION detected. Python 3.10+ recommended.${NC}" - HAS_PYTHON=false - fi - else - echo -e "${YELLOW}Warning: Python 3 is not found. Will use container for operations.${NC}" - HAS_PYTHON=false - fi +log "🚀 Launching model runtime…" +docker exec -d ollama-server ollama run "$MODEL_ALIAS" --keepalive 60m - # Check pip - if [ "$HAS_PYTHON" = true ]; then - if check_command pip || check_command pip3; then - echo -e "${GREEN}✓${NC} pip is installed" - HAS_PIP=true - else - echo -e "${YELLOW}Warning: pip is not found. Will use container for operations.${NC}" - HAS_PIP=false - HAS_PYTHON=false - fi - fi -} +############################################################################### +# 2. Llama‑Stack +############################################################################### +log "🦙📦 Starting Llama‑Stack…" +docker run -d --name llama-stack \ + -p "$PORT:$PORT" \ + --add-host=host.docker.internal:host-gateway \ + "$SERVER_IMAGE" \ + --port "$PORT" \ + --env INFERENCE_MODEL="$MODEL_ALIAS" \ + --env OLLAMA_URL="http://host.docker.internal:${OLLAMA_PORT}" >/dev/null -# Function to install Ollama -install_ollama() { - echo -e "\n${BOLD}Installing Ollama...${NC}" +log "⏳ Waiting for Llama‑Stack API…" +timeout "$WAIT_TIMEOUT" bash -c \ + "until curl -fsS http://localhost:${PORT}/v1/health 2>/dev/null | grep -q 'OK'; do sleep 1; done" \ + || die "Llama‑Stack did not become ready in ${WAIT_TIMEOUT}s" - if check_command ollama; then - echo -e "${GREEN}✓${NC} Ollama is already installed" - else - echo "Installing Ollama..." - curl -fsSL https://ollama.com/install.sh | sh - - if [ $? -eq 0 ]; then - echo -e "${GREEN}✓${NC} Ollama installed successfully" - else - echo -e "${RED}Error: Failed to install Ollama.${NC}" - exit 1 - fi - fi -} - -# Function to start Ollama server -start_ollama() { - echo -e "\n${BOLD}Starting Ollama server...${NC}" - - # Check if Ollama is already running - if curl -s "$OLLAMA_URL" &> /dev/null; then - echo -e "${GREEN}✓${NC} Ollama server is already running" - else - echo "Starting Ollama server..." - ollama serve & - - # Wait for Ollama server to start - MAX_RETRIES=30 - RETRY_COUNT=0 - - while ! curl -s "$OLLAMA_URL" &> /dev/null; do - sleep 1 - RETRY_COUNT=$((RETRY_COUNT + 1)) - - if [ $RETRY_COUNT -ge $MAX_RETRIES ]; then - echo -e "${RED}Error: Ollama server failed to start after $MAX_RETRIES seconds.${NC}" - exit 1 - fi - done - - echo -e "${GREEN}✓${NC} Ollama server started successfully" - fi -} - -# Function to pull models -pull_models() { - echo -e "\n${BOLD}Pulling and running Llama model in Ollama...${NC}" - - # Pull model - echo "Pulling $INFERENCE_MODEL model as $OLLAMA_MODEL_ALIAS..." - ollama pull $OLLAMA_MODEL_ALIAS - if [ $? -ne 0 ]; then - echo -e "${RED}Error: Failed to pull $OLLAMA_MODEL_ALIAS model.${NC}" - exit 1 - fi - - # Kill any existing model processes - pkill -f "ollama run $OLLAMA_MODEL_ALIAS" || true - - # Start model in background - echo "Starting inference model..." - nohup ollama run $OLLAMA_MODEL_ALIAS --keepalive 60m > /dev/null 2>&1 & - - # Verify model is running by checking the Ollama API - echo "Waiting for model to start (this may take a minute)..." - - MAX_RETRIES=30 - RETRY_DELAY=2 - - # Wait for model to appear in the Ollama API - for i in $(seq 1 $MAX_RETRIES); do - echo -n "." - MODELS_RUNNING=$(curl -s "$OLLAMA_URL/api/ps" | grep -E "$OLLAMA_MODEL_ALIAS" | wc -l) - - if [ "$MODELS_RUNNING" -ge 1 ]; then - echo -e "\n${GREEN}✓${NC} Model is running successfully" - break - fi - - if [ $i -eq $MAX_RETRIES ]; then - echo -e "\n${RED}Error: Model failed to start within the expected time.${NC}" - exit 1 - fi - - sleep $RETRY_DELAY - done -} - -# Function to set up Python environment and install llama-stack-client -setup_llama_stack_cli() { - echo -e "\n${BOLD}Setting up llama-stack environment...${NC}" - - # Create virtual environment - echo "Creating Python virtual environment..." - VENV_DIR="$HOME/.venv/llama-stack" - - if [ -d "$VENV_DIR" ]; then - echo "Virtual environment already exists at $VENV_DIR" - else - python3 -m venv "$VENV_DIR" - if [ $? -ne 0 ]; then - echo -e "${RED}Error: Failed to create virtual environment.${NC}" - exit 1 - else - echo -e "${GREEN}✓${NC} Virtual environment created successfully" - fi - fi - - # Activate virtual environment and install packages - source "$VENV_DIR/bin/activate" - - echo "Installing llama-stack-client..." - pip install --upgrade pip - pip install llama-stack-client - - if [ $? -eq 0 ]; then - echo -e "${GREEN}✓${NC} llama-stack-client installed successfully" - - # Configure the client to point to the correct server - echo "Configuring llama-stack-client..." - llama-stack-client configure --endpoint "http://localhost:$PORT" - - if [ $? -eq 0 ]; then - echo -e "${GREEN}✓${NC} llama-stack-client configured to use http://localhost:$PORT" - # Set environment variable for CLI use - export LLAMA_STACK_BASE_URL="http://localhost:$PORT" - # Add to shell config if it exists - if [ -f "$HOME/.bashrc" ]; then - grep -q "LLAMA_STACK_BASE_URL" "$HOME/.bashrc" || echo "export LLAMA_STACK_BASE_URL=\"http://localhost:$PORT\"" >> "$HOME/.bashrc" - elif [ -f "$HOME/.zshrc" ]; then - grep -q "LLAMA_STACK_BASE_URL" "$HOME/.zshrc" || echo "export LLAMA_STACK_BASE_URL=\"http://localhost:$PORT\"" >> "$HOME/.zshrc" - fi - else - echo -e "${YELLOW}Warning: Failed to configure llama-stack-client. You may need to run 'llama-stack-client configure --endpoint http://localhost:$PORT' manually.${NC}" - fi - else - echo -e "${RED}Error: Failed to install llama-stack-client.${NC}" - exit 1 - fi -} - -# Function to run a test inference -run_test_inference() { - # Run a test inference to verify everything is working - echo -e "\n${BOLD}Running test inference...${NC}" - - # Show the query being sent - TEST_QUERY="hello, what model are you?" - echo -e "${BOLD}Query:${NC} \"$TEST_QUERY\"" - - # Send the query and capture the result - echo -e "${BOLD}Sending request...${NC}" - TEST_RESULT=$(llama-stack-client inference chat-completion --message "$TEST_QUERY" 2>&1) - - # Display the full result - echo -e "\n${BOLD}Response:${NC}" - echo "$TEST_RESULT" - - if [[ $? -eq 0 && "$TEST_RESULT" == *"content"* ]]; then - echo -e "\n${GREEN}✓${NC} Test inference successful! Response received from the model." - echo -e "${BOLD}Everything is working correctly!${NC}" - else - echo -e "\n${YELLOW}Warning: Test inference might have failed.${NC}" - echo -e "You can try running a test manually after activation:" - echo -e "${YELLOW}source $VENV_DIR/bin/activate${NC}" - echo -e "${YELLOW}llama-stack-client inference chat-completion --message \"hello, what model are you?\"${NC}" - fi -} - -# Function to run the llama-stack server -run_llama_stack() { - echo -e "\n${BOLD}Starting Llama Stack server...${NC}" - - mkdir -p "$HOME/.llama" - - # Check if container already exists - CONTAINER_NAME="llama-stack-ollama" - CONTAINER_EXISTS=false - CONTAINER_RUNNING=false - - if [ "$CONTAINER_ENGINE" = "docker" ]; then - if docker ps -a --format '{{.Names}}' | grep -q "^$CONTAINER_NAME$"; then - CONTAINER_EXISTS=true - if docker ps --format '{{.Names}}' | grep -q "^$CONTAINER_NAME$"; then - CONTAINER_RUNNING=true - fi - fi - elif [ "$CONTAINER_ENGINE" = "podman" ]; then - if podman ps -a --format '{{.Names}}' | grep -q "^$CONTAINER_NAME$"; then - CONTAINER_EXISTS=true - if podman ps --format '{{.Names}}' | grep -q "^$CONTAINER_NAME$"; then - CONTAINER_RUNNING=true - fi - fi - fi - - # Handle existing container - if [ "$CONTAINER_EXISTS" = true ]; then - if [ "$CONTAINER_RUNNING" = true ]; then - echo -e "${YELLOW}Container $CONTAINER_NAME is already running${NC}" - echo -e "${GREEN}✓${NC} Llama Stack server is already running" - - echo -e "\n${BOLD}Access Information:${NC}" - echo -e " • API URL: ${GREEN}http://localhost:$PORT${NC}" - echo -e " • Inference Model: ${GREEN}$INFERENCE_MODEL${NC}" - echo -e " • Ollama URL: ${GREEN}$OLLAMA_URL${NC}" - - echo -e "\n${BOLD}Management Commands:${NC}" - echo -e " • Stop Llama Stack: ${YELLOW}${CONTAINER_ENGINE} stop $CONTAINER_NAME${NC}" - echo -e " • Start Llama Stack: ${YELLOW}${CONTAINER_ENGINE} start $CONTAINER_NAME${NC}" - echo -e " • View Logs: ${YELLOW}${CONTAINER_ENGINE} logs $CONTAINER_NAME${NC}" - echo -e " • Stop Ollama: ${YELLOW}pkill ollama${NC}" - - # Run a test inference - run_test_inference - - return 0 - else - echo -e "${YELLOW}Container $CONTAINER_NAME exists but is not running${NC}" - if [ "$CONTAINER_ENGINE" = "docker" ]; then - echo "Removing existing container..." - docker rm $CONTAINER_NAME - elif [ "$CONTAINER_ENGINE" = "podman" ]; then - echo "Removing existing container..." - podman rm $CONTAINER_NAME - fi - fi - fi - - # Set the correct host value based on container engine - if [ "$CONTAINER_ENGINE" = "docker" ]; then - if [[ "$OSTYPE" == "linux-gnu"* ]]; then - # Linux with Docker should use host network - echo "Running Llama Stack server on Linux with Docker..." - docker run -d \ - --name $CONTAINER_NAME \ - -p $PORT:$PORT \ - -v "$HOME/.llama:/root/.llama" \ - --network=host \ - llamastack/distribution-ollama \ - --port $PORT \ - --env INFERENCE_MODEL=$INFERENCE_MODEL \ - --env OLLAMA_URL=http://localhost:11434 - else - # macOS/Windows with Docker should use host.docker.internal - echo "Running Llama Stack server with Docker..." - docker run -d \ - --name $CONTAINER_NAME \ - -p $PORT:$PORT \ - -v "$HOME/.llama:/root/.llama" \ - llamastack/distribution-ollama \ - --port $PORT \ - --env INFERENCE_MODEL=$INFERENCE_MODEL \ - --env OLLAMA_URL=http://host.docker.internal:11434 - fi - elif [ "$CONTAINER_ENGINE" = "podman" ]; then - # Check podman version for proper host naming - PODMAN_VERSION=$(podman --version | awk '{print $3}') - if [[ $(echo "$PODMAN_VERSION >= 4.7.0" | bc -l) -eq 1 ]]; then - HOST_NAME="host.docker.internal" - else - HOST_NAME="host.containers.internal" - fi - - echo "Running Llama Stack server with Podman..." - podman run -d \ - --name $CONTAINER_NAME \ - -p $PORT:$PORT \ - -v "$HOME/.llama:/root/.llama:Z" \ - llamastack/distribution-ollama \ - --port $PORT \ - --env INFERENCE_MODEL=$INFERENCE_MODEL \ - --env OLLAMA_URL=http://$HOST_NAME:11434 - fi - - if [ $? -eq 0 ]; then - echo -e "${GREEN}✓${NC} Llama Stack server started successfully" - - echo -e "\n${BOLD}Setup Complete!${NC}" - echo -e "\n${BOLD}Access Information:${NC}" - echo -e " • API URL: ${GREEN}http://localhost:$PORT${NC}" - echo -e " • Inference Model: ${GREEN}$INFERENCE_MODEL${NC}" - echo -e " • Ollama URL: ${GREEN}$OLLAMA_URL${NC}" - - echo -e "\n${BOLD}Management Commands:${NC}" - echo -e " • Stop Llama Stack: ${YELLOW}${CONTAINER_ENGINE} stop $CONTAINER_NAME${NC}" - echo -e " • Start Llama Stack: ${YELLOW}${CONTAINER_ENGINE} start $CONTAINER_NAME${NC}" - echo -e " • View Logs: ${YELLOW}${CONTAINER_ENGINE} logs $CONTAINER_NAME${NC}" - echo -e " • Stop Ollama: ${YELLOW}pkill ollama${NC}" - - echo -e "\n${BOLD}Using Llama Stack Client:${NC}" - echo -e "1. Activate the virtual environment: ${YELLOW}source $VENV_DIR/bin/activate${NC}" - echo -e "2. Set the server URL: ${YELLOW}export LLAMA_STACK_BASE_URL=http://localhost:$PORT${NC}" - echo -e "3. Run client commands: ${YELLOW}llama-stack-client --help${NC}" - - # Run a test inference - run_test_inference - else - echo -e "${RED}Error: Failed to start Llama Stack server.${NC}" - exit 1 - fi -} - -# Main installation flow -main() { - print_banner - check_prerequisites - install_ollama - start_ollama - pull_models - setup_llama_stack_cli - run_llama_stack -} - -# Run main function -main +############################################################################### +# Done +############################################################################### +log "" +log "🎉 Llama‑Stack is ready!" +log "👉 API endpoint: http://localhost:${PORT}"