llama-stack-mirror/install.sh

69 lines
2.5 KiB
Bash
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env bash
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
set -Eeuo pipefail
PORT=8321
OLLAMA_PORT=11434
MODEL_ALIAS="llama3.2:1b"
SERVER_IMAGE="llamastack/distribution-ollama:0.2.2"
WAIT_TIMEOUT=300
log(){ printf "\e[1;32m%s\e[0m\n" "$*"; }
die(){ printf "\e[1;31m❌ %s\e[0m\n" "$*" >&2; exit 1; }
command -v docker >/dev/null || die "Docker is required but not found."
# clean up any leftovers from earlier runs
for name in ollama-server llama-stack; do
docker ps -aq --filter "name=^${name}$" | xargs -r docker rm -f
done
###############################################################################
# 1. Ollama
###############################################################################
log "🦙 Starting Ollama…"
docker run -d --name ollama-server -p "$OLLAMA_PORT:11434" \
-v ollama-models:/root/.ollama \
ollama/ollama >/dev/null
log "⏳ Waiting for Ollama daemon…"
timeout "$WAIT_TIMEOUT" bash -c \
"until curl -fsS http://localhost:${OLLAMA_PORT}/ 2>/dev/null | grep -q 'Ollama'; do sleep 1; done" \
|| die "Ollama did not become ready in ${WAIT_TIMEOUT}s"
if ! docker exec ollama-server ollama list | grep -q "$MODEL_ALIAS"; then
log "📦 Pulling model $MODEL_ALIAS"
docker exec ollama-server ollama pull "$MODEL_ALIAS"
fi
log "🚀 Launching model runtime…"
docker exec -d ollama-server ollama run "$MODEL_ALIAS" --keepalive 60m
###############################################################################
# 2. LlamaStack
###############################################################################
log "🦙📦 Starting LlamaStack…"
docker run -d --name llama-stack \
-p "$PORT:$PORT" \
--add-host=host.docker.internal:host-gateway \
"$SERVER_IMAGE" \
--port "$PORT" \
--env INFERENCE_MODEL="$MODEL_ALIAS" \
--env OLLAMA_URL="http://host.docker.internal:${OLLAMA_PORT}" >/dev/null
log "⏳ Waiting for LlamaStack API…"
timeout "$WAIT_TIMEOUT" bash -c \
"until curl -fsS http://localhost:${PORT}/v1/health 2>/dev/null | grep -q 'OK'; do sleep 1; done" \
|| die "LlamaStack did not become ready in ${WAIT_TIMEOUT}s"
###############################################################################
# Done
###############################################################################
log ""
log "🎉 LlamaStack is ready!"
log "👉 API endpoint: http://localhost:${PORT}"