diff --git a/install.sh b/install.sh index 38fc4af90..bd4ea65ba 100644 --- a/install.sh +++ b/install.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. @@ -18,172 +18,383 @@ BOLD='\033[1m' # Default values PORT=5001 INFERENCE_MODEL="meta-llama/Llama-3.2-3B-Instruct" -SAFETY_MODEL="meta-llama/Llama-Guard-3-1B" -# PROMPT_GUARD_MODEL="meta-llama/Prompt-Guard-86M" # Commented out as it may be deprecated +OLLAMA_MODEL_ALIAS="llama3.2:3b-instruct-fp16" +OLLAMA_URL="http://localhost:11434" +CONTAINER_ENGINE="" -# Banner -echo -e "${BOLD}==================================================${NC}" -echo -e "${BOLD} Llama Stack Meta Reference Installation ${NC}" -echo -e "${BOLD}==================================================${NC}" +# Functions + +print_banner() { + echo -e "${BOLD}==================================================${NC}" + echo -e "${BOLD} Llama Stack Ollama Distribution Setup ${NC}" + echo -e "${BOLD}==================================================${NC}" +} + +check_command() { + command -v "$1" &> /dev/null +} # Function to check prerequisites check_prerequisites() { echo -e "\n${BOLD}Checking prerequisites...${NC}" - # Check Docker - if ! command -v docker &> /dev/null; then - echo -e "${RED}Error: Docker is not installed. Please install Docker first.${NC}" - echo "Visit https://docs.docker.com/get-docker/ for installation instructions." + # Check for container engine (Docker or Podman) + if check_command docker; then + echo -e "${GREEN}✓${NC} Docker is installed" + CONTAINER_ENGINE="docker" + elif check_command podman; then + echo -e "${GREEN}✓${NC} Podman is installed" + CONTAINER_ENGINE="podman" + else + echo -e "${RED}Error: Neither Docker nor Podman is installed. Please install one of them first.${NC}" + echo "Visit https://docs.docker.com/get-docker/ or https://podman.io/getting-started/installation for installation instructions." exit 1 fi - echo -e "${GREEN}✓${NC} Docker is installed" - # Check Python - if ! command -v python3 &> /dev/null; then - echo -e "${YELLOW}Warning: Python 3 is not found. Will use Docker for all operations.${NC}" - HAS_PYTHON=false - else + # Check Python and pip + if check_command python3; then PYTHON_VERSION=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[:2])))') - if [[ $(echo "$PYTHON_VERSION >= 3.10" | bc) -eq 1 ]]; then + if [[ $(echo "$PYTHON_VERSION >= 3.10" | bc -l) -eq 1 ]]; then echo -e "${GREEN}✓${NC} Python $PYTHON_VERSION is installed" HAS_PYTHON=true else echo -e "${YELLOW}Warning: Python $PYTHON_VERSION detected. Python 3.10+ recommended.${NC}" HAS_PYTHON=false fi + else + echo -e "${YELLOW}Warning: Python 3 is not found. Will use container for operations.${NC}" + HAS_PYTHON=false fi - # Check NVIDIA GPU - if ! command -v nvidia-smi &> /dev/null; then - echo -e "${RED}Warning: NVIDIA GPU drivers not detected.${NC}" - echo -e "${YELLOW}This distribution is designed to run on NVIDIA GPUs and may not work on your system.${NC}" - echo -e "It may still be useful for testing the installation process, but model loading will likely fail." - echo -e "For production use, please install on a system with NVIDIA GPUs and proper drivers." + # Check pip + if [ "$HAS_PYTHON" = true ]; then + if check_command pip || check_command pip3; then + echo -e "${GREEN}✓${NC} pip is installed" + HAS_PIP=true + else + echo -e "${YELLOW}Warning: pip is not found. Will use container for operations.${NC}" + HAS_PIP=false + HAS_PYTHON=false + fi + fi +} - read -p "Do you want to continue anyway? This may not work! (y/N): " CONTINUE - if [[ ! "$CONTINUE" =~ ^[Yy]$ ]]; then - echo "Installation aborted." +# Function to install Ollama +install_ollama() { + echo -e "\n${BOLD}Installing Ollama...${NC}" + + if check_command ollama; then + echo -e "${GREEN}✓${NC} Ollama is already installed" + else + echo "Installing Ollama..." + curl -fsSL https://ollama.com/install.sh | sh + + if [ $? -eq 0 ]; then + echo -e "${GREEN}✓${NC} Ollama installed successfully" + else + echo -e "${RED}Error: Failed to install Ollama.${NC}" exit 1 fi - echo -e "${YELLOW}Continuing without NVIDIA GPU. Expect issues.${NC}" - else - echo -e "${GREEN}✓${NC} NVIDIA GPU detected" fi } -# Function to set up Python environment and install llama-stack -setup_llama_stack_cli() { - echo -e "\n${BOLD}Setting up llama-stack CLI...${NC}" +# Function to start Ollama server +start_ollama() { + echo -e "\n${BOLD}Starting Ollama server...${NC}" - if [ "$HAS_PYTHON" = true ]; then - # Create virtual environment - echo "Creating Python virtual environment..." - VENV_DIR="$HOME/.venv/llama-stack" - python3 -m venv "$VENV_DIR" - source "$VENV_DIR/bin/activate" - - # Install pip and llama-stack - echo "Installing llama-stack package..." - pip install --upgrade pip - pip install llama-stack - - echo -e "${GREEN}✓${NC} llama-stack CLI installed in virtual environment" - LLAMA_CMD="$VENV_DIR/bin/llama" + # Check if Ollama is already running + if curl -s "$OLLAMA_URL" &> /dev/null; then + echo -e "${GREEN}✓${NC} Ollama server is already running" else - echo -e "${YELLOW}Using Docker for llama-stack CLI operations${NC}" - LLAMA_CMD="docker run --rm -v $HOME/.llama:/root/.llama llamastack/distribution-meta-reference-gpu llama" + echo "Starting Ollama server..." + ollama serve & + + # Wait for Ollama server to start + MAX_RETRIES=30 + RETRY_COUNT=0 + + while ! curl -s "$OLLAMA_URL" &> /dev/null; do + sleep 1 + RETRY_COUNT=$((RETRY_COUNT + 1)) + + if [ $RETRY_COUNT -ge $MAX_RETRIES ]; then + echo -e "${RED}Error: Ollama server failed to start after $MAX_RETRIES seconds.${NC}" + exit 1 + fi + done + + echo -e "${GREEN}✓${NC} Ollama server started successfully" fi } -# Function to download models -download_models() { - echo -e "\n${BOLD}Downloading Llama models...${NC}" +# Function to pull models +pull_models() { + echo -e "\n${BOLD}Pulling and running Llama model in Ollama...${NC}" - # Prompt for META_URL if not provided - echo -e "Please enter your META_URL for model downloads." - echo -e "${YELLOW}Note: You can get this URL from Meta's website when you're approved for model access.${NC}" - read -p "META_URL: " META_URL - - if [ -z "$META_URL" ]; then - echo -e "${RED}No META_URL provided. Cannot download models.${NC}" + # Pull model + echo "Pulling $INFERENCE_MODEL model as $OLLAMA_MODEL_ALIAS..." + ollama pull $OLLAMA_MODEL_ALIAS + if [ $? -ne 0 ]; then + echo -e "${RED}Error: Failed to pull $OLLAMA_MODEL_ALIAS model.${NC}" exit 1 fi - echo "Downloading $INFERENCE_MODEL..." - $LLAMA_CMD model download --source meta --model-id "$INFERENCE_MODEL" --meta-url "$META_URL" + # Kill any existing model processes + pkill -f "ollama run $OLLAMA_MODEL_ALIAS" || true - echo "Downloading $SAFETY_MODEL..." - $LLAMA_CMD model download --source meta --model-id "$SAFETY_MODEL" --meta-url "$META_URL" + # Start model in background + echo "Starting inference model..." + nohup ollama run $OLLAMA_MODEL_ALIAS --keepalive 60m > /dev/null 2>&1 & - # Prompt Guard model may be deprecated - # echo "Downloading $PROMPT_GUARD_MODEL..." - # $LLAMA_CMD model download --source meta --model-id "$PROMPT_GUARD_MODEL" --meta-url "$META_URL" + # Verify model is running by checking the Ollama API + echo "Waiting for model to start (this may take a minute)..." - echo -e "${GREEN}✓${NC} Models downloaded successfully" + MAX_RETRIES=30 + RETRY_DELAY=2 + + # Wait for model to appear in the Ollama API + for i in $(seq 1 $MAX_RETRIES); do + echo -n "." + MODELS_RUNNING=$(curl -s "$OLLAMA_URL/api/ps" | grep -E "$OLLAMA_MODEL_ALIAS" | wc -l) + + if [ "$MODELS_RUNNING" -ge 1 ]; then + echo -e "\n${GREEN}✓${NC} Model is running successfully" + break + fi + + if [ $i -eq $MAX_RETRIES ]; then + echo -e "\n${RED}Error: Model failed to start within the expected time.${NC}" + exit 1 + fi + + sleep $RETRY_DELAY + done } -# Function to run the Docker container -run_docker_container() { - echo -e "\n${BOLD}Setting up Docker container...${NC}" +# Function to set up Python environment and install llama-stack-client +setup_llama_stack_cli() { + echo -e "\n${BOLD}Setting up llama-stack environment...${NC}" - # Pull the latest image - echo "Pulling llamastack/distribution-meta-reference-gpu image..." - docker pull llamastack/distribution-meta-reference-gpu + # Create virtual environment + echo "Creating Python virtual environment..." + VENV_DIR="$HOME/.venv/llama-stack" - # Run the container - echo "Starting container on port $PORT..." - - # Check if NVIDIA GPU is available - if command -v nvidia-smi &> /dev/null; then - # With GPU - echo "Using NVIDIA GPU for Docker container..." - docker run \ - -d \ - --name llama-stack-meta \ - -p $PORT:$PORT \ - -v $HOME/.llama:/root/.llama \ - --gpus all \ - llamastack/distribution-meta-reference-gpu \ - --port $PORT \ - --env INFERENCE_MODEL=$INFERENCE_MODEL \ - --env SAFETY_MODEL=$SAFETY_MODEL + if [ -d "$VENV_DIR" ]; then + echo "Virtual environment already exists at $VENV_DIR" else - # Without GPU (may not work) - echo -e "${YELLOW}Warning: Running without GPU support. This will likely fail for model loading!${NC}" - docker run \ - -d \ - --name llama-stack-meta \ - -p $PORT:$PORT \ - -v $HOME/.llama:/root/.llama \ - llamastack/distribution-meta-reference-gpu \ - --port $PORT \ - --env INFERENCE_MODEL=$INFERENCE_MODEL \ - --env SAFETY_MODEL=$SAFETY_MODEL + python3 -m venv "$VENV_DIR" + if [ $? -ne 0 ]; then + echo -e "${RED}Error: Failed to create virtual environment.${NC}" + exit 1 + else + echo -e "${GREEN}✓${NC} Virtual environment created successfully" + fi fi - # Check if container started successfully + # Activate virtual environment and install packages + source "$VENV_DIR/bin/activate" + + echo "Installing llama-stack-client..." + pip install --upgrade pip + pip install llama-stack-client + if [ $? -eq 0 ]; then - echo -e "${GREEN}✓${NC} Llama Stack Meta Reference is now running!" + echo -e "${GREEN}✓${NC} llama-stack-client installed successfully" + + # Configure the client to point to the correct server + echo "Configuring llama-stack-client..." + llama-stack-client configure --endpoint "http://localhost:$PORT" + + if [ $? -eq 0 ]; then + echo -e "${GREEN}✓${NC} llama-stack-client configured to use http://localhost:$PORT" + # Set environment variable for CLI use + export LLAMA_STACK_BASE_URL="http://localhost:$PORT" + # Add to shell config if it exists + if [ -f "$HOME/.bashrc" ]; then + grep -q "LLAMA_STACK_BASE_URL" "$HOME/.bashrc" || echo "export LLAMA_STACK_BASE_URL=\"http://localhost:$PORT\"" >> "$HOME/.bashrc" + elif [ -f "$HOME/.zshrc" ]; then + grep -q "LLAMA_STACK_BASE_URL" "$HOME/.zshrc" || echo "export LLAMA_STACK_BASE_URL=\"http://localhost:$PORT\"" >> "$HOME/.zshrc" + fi + else + echo -e "${YELLOW}Warning: Failed to configure llama-stack-client. You may need to run 'llama-stack-client configure --endpoint http://localhost:$PORT' manually.${NC}" + fi + else + echo -e "${RED}Error: Failed to install llama-stack-client.${NC}" + exit 1 + fi +} + +# Function to run a test inference +run_test_inference() { + # Run a test inference to verify everything is working + echo -e "\n${BOLD}Running test inference...${NC}" + + # Show the query being sent + TEST_QUERY="hello, what model are you?" + echo -e "${BOLD}Query:${NC} \"$TEST_QUERY\"" + + # Send the query and capture the result + echo -e "${BOLD}Sending request...${NC}" + TEST_RESULT=$(llama-stack-client inference chat-completion --message "$TEST_QUERY" 2>&1) + + # Display the full result + echo -e "\n${BOLD}Response:${NC}" + echo "$TEST_RESULT" + + if [[ $? -eq 0 && "$TEST_RESULT" == *"content"* ]]; then + echo -e "\n${GREEN}✓${NC} Test inference successful! Response received from the model." + echo -e "${BOLD}Everything is working correctly!${NC}" + else + echo -e "\n${YELLOW}Warning: Test inference might have failed.${NC}" + echo -e "You can try running a test manually after activation:" + echo -e "${YELLOW}source $VENV_DIR/bin/activate${NC}" + echo -e "${YELLOW}llama-stack-client inference chat-completion --message \"hello, what model are you?\"${NC}" + fi +} + +# Function to run the llama-stack server +run_llama_stack() { + echo -e "\n${BOLD}Starting Llama Stack server...${NC}" + + mkdir -p "$HOME/.llama" + + # Check if container already exists + CONTAINER_NAME="llama-stack-ollama" + CONTAINER_EXISTS=false + CONTAINER_RUNNING=false + + if [ "$CONTAINER_ENGINE" = "docker" ]; then + if docker ps -a --format '{{.Names}}' | grep -q "^$CONTAINER_NAME$"; then + CONTAINER_EXISTS=true + if docker ps --format '{{.Names}}' | grep -q "^$CONTAINER_NAME$"; then + CONTAINER_RUNNING=true + fi + fi + elif [ "$CONTAINER_ENGINE" = "podman" ]; then + if podman ps -a --format '{{.Names}}' | grep -q "^$CONTAINER_NAME$"; then + CONTAINER_EXISTS=true + if podman ps --format '{{.Names}}' | grep -q "^$CONTAINER_NAME$"; then + CONTAINER_RUNNING=true + fi + fi + fi + + # Handle existing container + if [ "$CONTAINER_EXISTS" = true ]; then + if [ "$CONTAINER_RUNNING" = true ]; then + echo -e "${YELLOW}Container $CONTAINER_NAME is already running${NC}" + echo -e "${GREEN}✓${NC} Llama Stack server is already running" + + echo -e "\n${BOLD}Access Information:${NC}" + echo -e " • API URL: ${GREEN}http://localhost:$PORT${NC}" + echo -e " • Inference Model: ${GREEN}$INFERENCE_MODEL${NC}" + echo -e " • Ollama URL: ${GREEN}$OLLAMA_URL${NC}" + + echo -e "\n${BOLD}Management Commands:${NC}" + echo -e " • Stop Llama Stack: ${YELLOW}${CONTAINER_ENGINE} stop $CONTAINER_NAME${NC}" + echo -e " • Start Llama Stack: ${YELLOW}${CONTAINER_ENGINE} start $CONTAINER_NAME${NC}" + echo -e " • View Logs: ${YELLOW}${CONTAINER_ENGINE} logs $CONTAINER_NAME${NC}" + echo -e " • Stop Ollama: ${YELLOW}pkill ollama${NC}" + + # Run a test inference + run_test_inference + + return 0 + else + echo -e "${YELLOW}Container $CONTAINER_NAME exists but is not running${NC}" + if [ "$CONTAINER_ENGINE" = "docker" ]; then + echo "Removing existing container..." + docker rm $CONTAINER_NAME + elif [ "$CONTAINER_ENGINE" = "podman" ]; then + echo "Removing existing container..." + podman rm $CONTAINER_NAME + fi + fi + fi + + # Set the correct host value based on container engine + if [ "$CONTAINER_ENGINE" = "docker" ]; then + if [[ "$OSTYPE" == "linux-gnu"* ]]; then + # Linux with Docker should use host network + echo "Running Llama Stack server on Linux with Docker..." + docker run -d \ + --name $CONTAINER_NAME \ + -p $PORT:$PORT \ + -v "$HOME/.llama:/root/.llama" \ + --network=host \ + llamastack/distribution-ollama \ + --port $PORT \ + --env INFERENCE_MODEL=$INFERENCE_MODEL \ + --env OLLAMA_URL=http://localhost:11434 + else + # macOS/Windows with Docker should use host.docker.internal + echo "Running Llama Stack server with Docker..." + docker run -d \ + --name $CONTAINER_NAME \ + -p $PORT:$PORT \ + -v "$HOME/.llama:/root/.llama" \ + llamastack/distribution-ollama \ + --port $PORT \ + --env INFERENCE_MODEL=$INFERENCE_MODEL \ + --env OLLAMA_URL=http://host.docker.internal:11434 + fi + elif [ "$CONTAINER_ENGINE" = "podman" ]; then + # Check podman version for proper host naming + PODMAN_VERSION=$(podman --version | awk '{print $3}') + if [[ $(echo "$PODMAN_VERSION >= 4.7.0" | bc -l) -eq 1 ]]; then + HOST_NAME="host.docker.internal" + else + HOST_NAME="host.containers.internal" + fi + + echo "Running Llama Stack server with Podman..." + podman run -d \ + --name $CONTAINER_NAME \ + -p $PORT:$PORT \ + -v "$HOME/.llama:/root/.llama:Z" \ + llamastack/distribution-ollama \ + --port $PORT \ + --env INFERENCE_MODEL=$INFERENCE_MODEL \ + --env OLLAMA_URL=http://$HOST_NAME:11434 + fi + + if [ $? -eq 0 ]; then + echo -e "${GREEN}✓${NC} Llama Stack server started successfully" + + echo -e "\n${BOLD}Setup Complete!${NC}" echo -e "\n${BOLD}Access Information:${NC}" echo -e " • API URL: ${GREEN}http://localhost:$PORT${NC}" echo -e " • Inference Model: ${GREEN}$INFERENCE_MODEL${NC}" - echo -e " • Safety Model: ${GREEN}$SAFETY_MODEL${NC}" + echo -e " • Ollama URL: ${GREEN}$OLLAMA_URL${NC}" + echo -e "\n${BOLD}Management Commands:${NC}" - echo -e " • Stop server: ${YELLOW}docker stop llama-stack-meta${NC}" - echo -e " • Start server: ${YELLOW}docker start llama-stack-meta${NC}" - echo -e " • View logs: ${YELLOW}docker logs llama-stack-meta${NC}" + echo -e " • Stop Llama Stack: ${YELLOW}${CONTAINER_ENGINE} stop $CONTAINER_NAME${NC}" + echo -e " • Start Llama Stack: ${YELLOW}${CONTAINER_ENGINE} start $CONTAINER_NAME${NC}" + echo -e " • View Logs: ${YELLOW}${CONTAINER_ENGINE} logs $CONTAINER_NAME${NC}" + echo -e " • Stop Ollama: ${YELLOW}pkill ollama${NC}" + + echo -e "\n${BOLD}Using Llama Stack Client:${NC}" + echo -e "1. Activate the virtual environment: ${YELLOW}source $VENV_DIR/bin/activate${NC}" + echo -e "2. Set the server URL: ${YELLOW}export LLAMA_STACK_BASE_URL=http://localhost:$PORT${NC}" + echo -e "3. Run client commands: ${YELLOW}llama-stack-client --help${NC}" + + # Run a test inference + run_test_inference else - echo -e "${RED}Failed to start the container. Please check Docker logs.${NC}" + echo -e "${RED}Error: Failed to start Llama Stack server.${NC}" exit 1 fi } # Main installation flow main() { + print_banner check_prerequisites + install_ollama + start_ollama + pull_models setup_llama_stack_cli - download_models - run_docker_container + run_llama_stack } # Run main function