mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-02 08:44:44 +00:00
ollama distro install
This commit is contained in:
parent
6d4a4438ac
commit
6d5d1480c9
1 changed files with 321 additions and 110 deletions
431
install.sh
431
install.sh
|
@ -1,4 +1,4 @@
|
|||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
|
@ -18,172 +18,383 @@ BOLD='\033[1m'
|
|||
# Default values
|
||||
PORT=5001
|
||||
INFERENCE_MODEL="meta-llama/Llama-3.2-3B-Instruct"
|
||||
SAFETY_MODEL="meta-llama/Llama-Guard-3-1B"
|
||||
# PROMPT_GUARD_MODEL="meta-llama/Prompt-Guard-86M" # Commented out as it may be deprecated
|
||||
OLLAMA_MODEL_ALIAS="llama3.2:3b-instruct-fp16"
|
||||
OLLAMA_URL="http://localhost:11434"
|
||||
CONTAINER_ENGINE=""
|
||||
|
||||
# Banner
|
||||
echo -e "${BOLD}==================================================${NC}"
|
||||
echo -e "${BOLD} Llama Stack Meta Reference Installation ${NC}"
|
||||
echo -e "${BOLD}==================================================${NC}"
|
||||
# Functions
|
||||
|
||||
print_banner() {
|
||||
echo -e "${BOLD}==================================================${NC}"
|
||||
echo -e "${BOLD} Llama Stack Ollama Distribution Setup ${NC}"
|
||||
echo -e "${BOLD}==================================================${NC}"
|
||||
}
|
||||
|
||||
check_command() {
|
||||
command -v "$1" &> /dev/null
|
||||
}
|
||||
|
||||
# Function to check prerequisites
|
||||
check_prerequisites() {
|
||||
echo -e "\n${BOLD}Checking prerequisites...${NC}"
|
||||
|
||||
# Check Docker
|
||||
if ! command -v docker &> /dev/null; then
|
||||
echo -e "${RED}Error: Docker is not installed. Please install Docker first.${NC}"
|
||||
echo "Visit https://docs.docker.com/get-docker/ for installation instructions."
|
||||
# Check for container engine (Docker or Podman)
|
||||
if check_command docker; then
|
||||
echo -e "${GREEN}✓${NC} Docker is installed"
|
||||
CONTAINER_ENGINE="docker"
|
||||
elif check_command podman; then
|
||||
echo -e "${GREEN}✓${NC} Podman is installed"
|
||||
CONTAINER_ENGINE="podman"
|
||||
else
|
||||
echo -e "${RED}Error: Neither Docker nor Podman is installed. Please install one of them first.${NC}"
|
||||
echo "Visit https://docs.docker.com/get-docker/ or https://podman.io/getting-started/installation for installation instructions."
|
||||
exit 1
|
||||
fi
|
||||
echo -e "${GREEN}✓${NC} Docker is installed"
|
||||
|
||||
# Check Python
|
||||
if ! command -v python3 &> /dev/null; then
|
||||
echo -e "${YELLOW}Warning: Python 3 is not found. Will use Docker for all operations.${NC}"
|
||||
HAS_PYTHON=false
|
||||
else
|
||||
# Check Python and pip
|
||||
if check_command python3; then
|
||||
PYTHON_VERSION=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[:2])))')
|
||||
if [[ $(echo "$PYTHON_VERSION >= 3.10" | bc) -eq 1 ]]; then
|
||||
if [[ $(echo "$PYTHON_VERSION >= 3.10" | bc -l) -eq 1 ]]; then
|
||||
echo -e "${GREEN}✓${NC} Python $PYTHON_VERSION is installed"
|
||||
HAS_PYTHON=true
|
||||
else
|
||||
echo -e "${YELLOW}Warning: Python $PYTHON_VERSION detected. Python 3.10+ recommended.${NC}"
|
||||
HAS_PYTHON=false
|
||||
fi
|
||||
else
|
||||
echo -e "${YELLOW}Warning: Python 3 is not found. Will use container for operations.${NC}"
|
||||
HAS_PYTHON=false
|
||||
fi
|
||||
|
||||
# Check NVIDIA GPU
|
||||
if ! command -v nvidia-smi &> /dev/null; then
|
||||
echo -e "${RED}Warning: NVIDIA GPU drivers not detected.${NC}"
|
||||
echo -e "${YELLOW}This distribution is designed to run on NVIDIA GPUs and may not work on your system.${NC}"
|
||||
echo -e "It may still be useful for testing the installation process, but model loading will likely fail."
|
||||
echo -e "For production use, please install on a system with NVIDIA GPUs and proper drivers."
|
||||
# Check pip
|
||||
if [ "$HAS_PYTHON" = true ]; then
|
||||
if check_command pip || check_command pip3; then
|
||||
echo -e "${GREEN}✓${NC} pip is installed"
|
||||
HAS_PIP=true
|
||||
else
|
||||
echo -e "${YELLOW}Warning: pip is not found. Will use container for operations.${NC}"
|
||||
HAS_PIP=false
|
||||
HAS_PYTHON=false
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
read -p "Do you want to continue anyway? This may not work! (y/N): " CONTINUE
|
||||
if [[ ! "$CONTINUE" =~ ^[Yy]$ ]]; then
|
||||
echo "Installation aborted."
|
||||
# Function to install Ollama
|
||||
install_ollama() {
|
||||
echo -e "\n${BOLD}Installing Ollama...${NC}"
|
||||
|
||||
if check_command ollama; then
|
||||
echo -e "${GREEN}✓${NC} Ollama is already installed"
|
||||
else
|
||||
echo "Installing Ollama..."
|
||||
curl -fsSL https://ollama.com/install.sh | sh
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
echo -e "${GREEN}✓${NC} Ollama installed successfully"
|
||||
else
|
||||
echo -e "${RED}Error: Failed to install Ollama.${NC}"
|
||||
exit 1
|
||||
fi
|
||||
echo -e "${YELLOW}Continuing without NVIDIA GPU. Expect issues.${NC}"
|
||||
else
|
||||
echo -e "${GREEN}✓${NC} NVIDIA GPU detected"
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to set up Python environment and install llama-stack
|
||||
setup_llama_stack_cli() {
|
||||
echo -e "\n${BOLD}Setting up llama-stack CLI...${NC}"
|
||||
# Function to start Ollama server
|
||||
start_ollama() {
|
||||
echo -e "\n${BOLD}Starting Ollama server...${NC}"
|
||||
|
||||
if [ "$HAS_PYTHON" = true ]; then
|
||||
# Create virtual environment
|
||||
echo "Creating Python virtual environment..."
|
||||
VENV_DIR="$HOME/.venv/llama-stack"
|
||||
python3 -m venv "$VENV_DIR"
|
||||
source "$VENV_DIR/bin/activate"
|
||||
|
||||
# Install pip and llama-stack
|
||||
echo "Installing llama-stack package..."
|
||||
pip install --upgrade pip
|
||||
pip install llama-stack
|
||||
|
||||
echo -e "${GREEN}✓${NC} llama-stack CLI installed in virtual environment"
|
||||
LLAMA_CMD="$VENV_DIR/bin/llama"
|
||||
# Check if Ollama is already running
|
||||
if curl -s "$OLLAMA_URL" &> /dev/null; then
|
||||
echo -e "${GREEN}✓${NC} Ollama server is already running"
|
||||
else
|
||||
echo -e "${YELLOW}Using Docker for llama-stack CLI operations${NC}"
|
||||
LLAMA_CMD="docker run --rm -v $HOME/.llama:/root/.llama llamastack/distribution-meta-reference-gpu llama"
|
||||
echo "Starting Ollama server..."
|
||||
ollama serve &
|
||||
|
||||
# Wait for Ollama server to start
|
||||
MAX_RETRIES=30
|
||||
RETRY_COUNT=0
|
||||
|
||||
while ! curl -s "$OLLAMA_URL" &> /dev/null; do
|
||||
sleep 1
|
||||
RETRY_COUNT=$((RETRY_COUNT + 1))
|
||||
|
||||
if [ $RETRY_COUNT -ge $MAX_RETRIES ]; then
|
||||
echo -e "${RED}Error: Ollama server failed to start after $MAX_RETRIES seconds.${NC}"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
echo -e "${GREEN}✓${NC} Ollama server started successfully"
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to download models
|
||||
download_models() {
|
||||
echo -e "\n${BOLD}Downloading Llama models...${NC}"
|
||||
# Function to pull models
|
||||
pull_models() {
|
||||
echo -e "\n${BOLD}Pulling and running Llama model in Ollama...${NC}"
|
||||
|
||||
# Prompt for META_URL if not provided
|
||||
echo -e "Please enter your META_URL for model downloads."
|
||||
echo -e "${YELLOW}Note: You can get this URL from Meta's website when you're approved for model access.${NC}"
|
||||
read -p "META_URL: " META_URL
|
||||
|
||||
if [ -z "$META_URL" ]; then
|
||||
echo -e "${RED}No META_URL provided. Cannot download models.${NC}"
|
||||
# Pull model
|
||||
echo "Pulling $INFERENCE_MODEL model as $OLLAMA_MODEL_ALIAS..."
|
||||
ollama pull $OLLAMA_MODEL_ALIAS
|
||||
if [ $? -ne 0 ]; then
|
||||
echo -e "${RED}Error: Failed to pull $OLLAMA_MODEL_ALIAS model.${NC}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Downloading $INFERENCE_MODEL..."
|
||||
$LLAMA_CMD model download --source meta --model-id "$INFERENCE_MODEL" --meta-url "$META_URL"
|
||||
# Kill any existing model processes
|
||||
pkill -f "ollama run $OLLAMA_MODEL_ALIAS" || true
|
||||
|
||||
echo "Downloading $SAFETY_MODEL..."
|
||||
$LLAMA_CMD model download --source meta --model-id "$SAFETY_MODEL" --meta-url "$META_URL"
|
||||
# Start model in background
|
||||
echo "Starting inference model..."
|
||||
nohup ollama run $OLLAMA_MODEL_ALIAS --keepalive 60m > /dev/null 2>&1 &
|
||||
|
||||
# Prompt Guard model may be deprecated
|
||||
# echo "Downloading $PROMPT_GUARD_MODEL..."
|
||||
# $LLAMA_CMD model download --source meta --model-id "$PROMPT_GUARD_MODEL" --meta-url "$META_URL"
|
||||
# Verify model is running by checking the Ollama API
|
||||
echo "Waiting for model to start (this may take a minute)..."
|
||||
|
||||
echo -e "${GREEN}✓${NC} Models downloaded successfully"
|
||||
MAX_RETRIES=30
|
||||
RETRY_DELAY=2
|
||||
|
||||
# Wait for model to appear in the Ollama API
|
||||
for i in $(seq 1 $MAX_RETRIES); do
|
||||
echo -n "."
|
||||
MODELS_RUNNING=$(curl -s "$OLLAMA_URL/api/ps" | grep -E "$OLLAMA_MODEL_ALIAS" | wc -l)
|
||||
|
||||
if [ "$MODELS_RUNNING" -ge 1 ]; then
|
||||
echo -e "\n${GREEN}✓${NC} Model is running successfully"
|
||||
break
|
||||
fi
|
||||
|
||||
if [ $i -eq $MAX_RETRIES ]; then
|
||||
echo -e "\n${RED}Error: Model failed to start within the expected time.${NC}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
sleep $RETRY_DELAY
|
||||
done
|
||||
}
|
||||
|
||||
# Function to run the Docker container
|
||||
run_docker_container() {
|
||||
echo -e "\n${BOLD}Setting up Docker container...${NC}"
|
||||
# Function to set up Python environment and install llama-stack-client
|
||||
setup_llama_stack_cli() {
|
||||
echo -e "\n${BOLD}Setting up llama-stack environment...${NC}"
|
||||
|
||||
# Pull the latest image
|
||||
echo "Pulling llamastack/distribution-meta-reference-gpu image..."
|
||||
docker pull llamastack/distribution-meta-reference-gpu
|
||||
# Create virtual environment
|
||||
echo "Creating Python virtual environment..."
|
||||
VENV_DIR="$HOME/.venv/llama-stack"
|
||||
|
||||
# Run the container
|
||||
echo "Starting container on port $PORT..."
|
||||
|
||||
# Check if NVIDIA GPU is available
|
||||
if command -v nvidia-smi &> /dev/null; then
|
||||
# With GPU
|
||||
echo "Using NVIDIA GPU for Docker container..."
|
||||
docker run \
|
||||
-d \
|
||||
--name llama-stack-meta \
|
||||
-p $PORT:$PORT \
|
||||
-v $HOME/.llama:/root/.llama \
|
||||
--gpus all \
|
||||
llamastack/distribution-meta-reference-gpu \
|
||||
--port $PORT \
|
||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
--env SAFETY_MODEL=$SAFETY_MODEL
|
||||
if [ -d "$VENV_DIR" ]; then
|
||||
echo "Virtual environment already exists at $VENV_DIR"
|
||||
else
|
||||
# Without GPU (may not work)
|
||||
echo -e "${YELLOW}Warning: Running without GPU support. This will likely fail for model loading!${NC}"
|
||||
docker run \
|
||||
-d \
|
||||
--name llama-stack-meta \
|
||||
-p $PORT:$PORT \
|
||||
-v $HOME/.llama:/root/.llama \
|
||||
llamastack/distribution-meta-reference-gpu \
|
||||
--port $PORT \
|
||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
--env SAFETY_MODEL=$SAFETY_MODEL
|
||||
python3 -m venv "$VENV_DIR"
|
||||
if [ $? -ne 0 ]; then
|
||||
echo -e "${RED}Error: Failed to create virtual environment.${NC}"
|
||||
exit 1
|
||||
else
|
||||
echo -e "${GREEN}✓${NC} Virtual environment created successfully"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check if container started successfully
|
||||
# Activate virtual environment and install packages
|
||||
source "$VENV_DIR/bin/activate"
|
||||
|
||||
echo "Installing llama-stack-client..."
|
||||
pip install --upgrade pip
|
||||
pip install llama-stack-client
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
echo -e "${GREEN}✓${NC} Llama Stack Meta Reference is now running!"
|
||||
echo -e "${GREEN}✓${NC} llama-stack-client installed successfully"
|
||||
|
||||
# Configure the client to point to the correct server
|
||||
echo "Configuring llama-stack-client..."
|
||||
llama-stack-client configure --endpoint "http://localhost:$PORT"
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
echo -e "${GREEN}✓${NC} llama-stack-client configured to use http://localhost:$PORT"
|
||||
# Set environment variable for CLI use
|
||||
export LLAMA_STACK_BASE_URL="http://localhost:$PORT"
|
||||
# Add to shell config if it exists
|
||||
if [ -f "$HOME/.bashrc" ]; then
|
||||
grep -q "LLAMA_STACK_BASE_URL" "$HOME/.bashrc" || echo "export LLAMA_STACK_BASE_URL=\"http://localhost:$PORT\"" >> "$HOME/.bashrc"
|
||||
elif [ -f "$HOME/.zshrc" ]; then
|
||||
grep -q "LLAMA_STACK_BASE_URL" "$HOME/.zshrc" || echo "export LLAMA_STACK_BASE_URL=\"http://localhost:$PORT\"" >> "$HOME/.zshrc"
|
||||
fi
|
||||
else
|
||||
echo -e "${YELLOW}Warning: Failed to configure llama-stack-client. You may need to run 'llama-stack-client configure --endpoint http://localhost:$PORT' manually.${NC}"
|
||||
fi
|
||||
else
|
||||
echo -e "${RED}Error: Failed to install llama-stack-client.${NC}"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to run a test inference
|
||||
run_test_inference() {
|
||||
# Run a test inference to verify everything is working
|
||||
echo -e "\n${BOLD}Running test inference...${NC}"
|
||||
|
||||
# Show the query being sent
|
||||
TEST_QUERY="hello, what model are you?"
|
||||
echo -e "${BOLD}Query:${NC} \"$TEST_QUERY\""
|
||||
|
||||
# Send the query and capture the result
|
||||
echo -e "${BOLD}Sending request...${NC}"
|
||||
TEST_RESULT=$(llama-stack-client inference chat-completion --message "$TEST_QUERY" 2>&1)
|
||||
|
||||
# Display the full result
|
||||
echo -e "\n${BOLD}Response:${NC}"
|
||||
echo "$TEST_RESULT"
|
||||
|
||||
if [[ $? -eq 0 && "$TEST_RESULT" == *"content"* ]]; then
|
||||
echo -e "\n${GREEN}✓${NC} Test inference successful! Response received from the model."
|
||||
echo -e "${BOLD}Everything is working correctly!${NC}"
|
||||
else
|
||||
echo -e "\n${YELLOW}Warning: Test inference might have failed.${NC}"
|
||||
echo -e "You can try running a test manually after activation:"
|
||||
echo -e "${YELLOW}source $VENV_DIR/bin/activate${NC}"
|
||||
echo -e "${YELLOW}llama-stack-client inference chat-completion --message \"hello, what model are you?\"${NC}"
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to run the llama-stack server
|
||||
run_llama_stack() {
|
||||
echo -e "\n${BOLD}Starting Llama Stack server...${NC}"
|
||||
|
||||
mkdir -p "$HOME/.llama"
|
||||
|
||||
# Check if container already exists
|
||||
CONTAINER_NAME="llama-stack-ollama"
|
||||
CONTAINER_EXISTS=false
|
||||
CONTAINER_RUNNING=false
|
||||
|
||||
if [ "$CONTAINER_ENGINE" = "docker" ]; then
|
||||
if docker ps -a --format '{{.Names}}' | grep -q "^$CONTAINER_NAME$"; then
|
||||
CONTAINER_EXISTS=true
|
||||
if docker ps --format '{{.Names}}' | grep -q "^$CONTAINER_NAME$"; then
|
||||
CONTAINER_RUNNING=true
|
||||
fi
|
||||
fi
|
||||
elif [ "$CONTAINER_ENGINE" = "podman" ]; then
|
||||
if podman ps -a --format '{{.Names}}' | grep -q "^$CONTAINER_NAME$"; then
|
||||
CONTAINER_EXISTS=true
|
||||
if podman ps --format '{{.Names}}' | grep -q "^$CONTAINER_NAME$"; then
|
||||
CONTAINER_RUNNING=true
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# Handle existing container
|
||||
if [ "$CONTAINER_EXISTS" = true ]; then
|
||||
if [ "$CONTAINER_RUNNING" = true ]; then
|
||||
echo -e "${YELLOW}Container $CONTAINER_NAME is already running${NC}"
|
||||
echo -e "${GREEN}✓${NC} Llama Stack server is already running"
|
||||
|
||||
echo -e "\n${BOLD}Access Information:${NC}"
|
||||
echo -e " • API URL: ${GREEN}http://localhost:$PORT${NC}"
|
||||
echo -e " • Inference Model: ${GREEN}$INFERENCE_MODEL${NC}"
|
||||
echo -e " • Ollama URL: ${GREEN}$OLLAMA_URL${NC}"
|
||||
|
||||
echo -e "\n${BOLD}Management Commands:${NC}"
|
||||
echo -e " • Stop Llama Stack: ${YELLOW}${CONTAINER_ENGINE} stop $CONTAINER_NAME${NC}"
|
||||
echo -e " • Start Llama Stack: ${YELLOW}${CONTAINER_ENGINE} start $CONTAINER_NAME${NC}"
|
||||
echo -e " • View Logs: ${YELLOW}${CONTAINER_ENGINE} logs $CONTAINER_NAME${NC}"
|
||||
echo -e " • Stop Ollama: ${YELLOW}pkill ollama${NC}"
|
||||
|
||||
# Run a test inference
|
||||
run_test_inference
|
||||
|
||||
return 0
|
||||
else
|
||||
echo -e "${YELLOW}Container $CONTAINER_NAME exists but is not running${NC}"
|
||||
if [ "$CONTAINER_ENGINE" = "docker" ]; then
|
||||
echo "Removing existing container..."
|
||||
docker rm $CONTAINER_NAME
|
||||
elif [ "$CONTAINER_ENGINE" = "podman" ]; then
|
||||
echo "Removing existing container..."
|
||||
podman rm $CONTAINER_NAME
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# Set the correct host value based on container engine
|
||||
if [ "$CONTAINER_ENGINE" = "docker" ]; then
|
||||
if [[ "$OSTYPE" == "linux-gnu"* ]]; then
|
||||
# Linux with Docker should use host network
|
||||
echo "Running Llama Stack server on Linux with Docker..."
|
||||
docker run -d \
|
||||
--name $CONTAINER_NAME \
|
||||
-p $PORT:$PORT \
|
||||
-v "$HOME/.llama:/root/.llama" \
|
||||
--network=host \
|
||||
llamastack/distribution-ollama \
|
||||
--port $PORT \
|
||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
--env OLLAMA_URL=http://localhost:11434
|
||||
else
|
||||
# macOS/Windows with Docker should use host.docker.internal
|
||||
echo "Running Llama Stack server with Docker..."
|
||||
docker run -d \
|
||||
--name $CONTAINER_NAME \
|
||||
-p $PORT:$PORT \
|
||||
-v "$HOME/.llama:/root/.llama" \
|
||||
llamastack/distribution-ollama \
|
||||
--port $PORT \
|
||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
--env OLLAMA_URL=http://host.docker.internal:11434
|
||||
fi
|
||||
elif [ "$CONTAINER_ENGINE" = "podman" ]; then
|
||||
# Check podman version for proper host naming
|
||||
PODMAN_VERSION=$(podman --version | awk '{print $3}')
|
||||
if [[ $(echo "$PODMAN_VERSION >= 4.7.0" | bc -l) -eq 1 ]]; then
|
||||
HOST_NAME="host.docker.internal"
|
||||
else
|
||||
HOST_NAME="host.containers.internal"
|
||||
fi
|
||||
|
||||
echo "Running Llama Stack server with Podman..."
|
||||
podman run -d \
|
||||
--name $CONTAINER_NAME \
|
||||
-p $PORT:$PORT \
|
||||
-v "$HOME/.llama:/root/.llama:Z" \
|
||||
llamastack/distribution-ollama \
|
||||
--port $PORT \
|
||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
--env OLLAMA_URL=http://$HOST_NAME:11434
|
||||
fi
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
echo -e "${GREEN}✓${NC} Llama Stack server started successfully"
|
||||
|
||||
echo -e "\n${BOLD}Setup Complete!${NC}"
|
||||
echo -e "\n${BOLD}Access Information:${NC}"
|
||||
echo -e " • API URL: ${GREEN}http://localhost:$PORT${NC}"
|
||||
echo -e " • Inference Model: ${GREEN}$INFERENCE_MODEL${NC}"
|
||||
echo -e " • Safety Model: ${GREEN}$SAFETY_MODEL${NC}"
|
||||
echo -e " • Ollama URL: ${GREEN}$OLLAMA_URL${NC}"
|
||||
|
||||
echo -e "\n${BOLD}Management Commands:${NC}"
|
||||
echo -e " • Stop server: ${YELLOW}docker stop llama-stack-meta${NC}"
|
||||
echo -e " • Start server: ${YELLOW}docker start llama-stack-meta${NC}"
|
||||
echo -e " • View logs: ${YELLOW}docker logs llama-stack-meta${NC}"
|
||||
echo -e " • Stop Llama Stack: ${YELLOW}${CONTAINER_ENGINE} stop $CONTAINER_NAME${NC}"
|
||||
echo -e " • Start Llama Stack: ${YELLOW}${CONTAINER_ENGINE} start $CONTAINER_NAME${NC}"
|
||||
echo -e " • View Logs: ${YELLOW}${CONTAINER_ENGINE} logs $CONTAINER_NAME${NC}"
|
||||
echo -e " • Stop Ollama: ${YELLOW}pkill ollama${NC}"
|
||||
|
||||
echo -e "\n${BOLD}Using Llama Stack Client:${NC}"
|
||||
echo -e "1. Activate the virtual environment: ${YELLOW}source $VENV_DIR/bin/activate${NC}"
|
||||
echo -e "2. Set the server URL: ${YELLOW}export LLAMA_STACK_BASE_URL=http://localhost:$PORT${NC}"
|
||||
echo -e "3. Run client commands: ${YELLOW}llama-stack-client --help${NC}"
|
||||
|
||||
# Run a test inference
|
||||
run_test_inference
|
||||
else
|
||||
echo -e "${RED}Failed to start the container. Please check Docker logs.${NC}"
|
||||
echo -e "${RED}Error: Failed to start Llama Stack server.${NC}"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Main installation flow
|
||||
main() {
|
||||
print_banner
|
||||
check_prerequisites
|
||||
install_ollama
|
||||
start_ollama
|
||||
pull_models
|
||||
setup_llama_stack_cli
|
||||
download_models
|
||||
run_docker_container
|
||||
run_llama_stack
|
||||
}
|
||||
|
||||
# Run main function
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue