llama-stack-mirror/install.sh

84 lines
3.1 KiB
Bash
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env bash
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
set -Eeuo pipefail
PORT=8321
OLLAMA_PORT=11434
MODEL_ALIAS="llama3.2:3b"
SERVER_IMAGE="llamastack/distribution-ollama:0.2.2"
WAIT_TIMEOUT=300
NETWORK="llama-stack-net"
log(){ printf "\e[1;32m%s\e[0m\n" "$*"; }
die(){ printf "\e[1;31m❌ %s\e[0m\n" "$*" >&2; exit 1; }
if command -v docker &> /dev/null; then ENGINE="docker"
elif command -v podman &> /dev/null; then ENGINE="podman"
else die "Docker or Podman is required. Install Docker: https://docs.docker.com/get-docker/ or Podman: https://podman.io/getting-started/installation"
fi
if ! $ENGINE network ls --filter name=^${NETWORK}$ --format '{{.Name}}' | grep -q "^${NETWORK}$"; then
log "Creating Docker network: ${NETWORK}"
$ENGINE network create "${NETWORK}"
fi
# Clean up any leftovers from earlier runs
for name in ollama-server llama-stack; do
$ENGINE ps -aq --filter "name=^${name}$" | xargs -r $ENGINE rm -f
done
###############################################################################
# 1. Ollama
###############################################################################
log "🦙 Starting Ollama…"
$ENGINE run -d --name ollama-server \
--network "${NETWORK}" \
-p "${OLLAMA_PORT}:11434" \
-v ollama-models:/root/.ollama \
ollama/ollama >/dev/null
log "⏳ Waiting for Ollama daemon…"
timeout "$WAIT_TIMEOUT" bash -c \
"until curl -fsS http://localhost:${OLLAMA_PORT}/ 2>/dev/null | grep -q 'Ollama'; do sleep 1; done" \
|| die "Ollama did not become ready in ${WAIT_TIMEOUT}s"
if ! $ENGINE exec ollama-server ollama list | grep -q "$MODEL_ALIAS"; then
log "📦 Pulling model $MODEL_ALIAS"
$ENGINE exec ollama-server ollama pull "$MODEL_ALIAS"
fi
log "🚀 Launching model runtime…"
$ENGINE exec -d ollama-server ollama run "$MODEL_ALIAS" --keepalive 60m
###############################################################################
# 2. LlamaStack
###############################################################################
log "🦙📦 Starting LlamaStack…"
$ENGINE run -d --name llama-stack \
--network "${NETWORK}" \
-p "${PORT}:${PORT}" \
"${SERVER_IMAGE}" \
--port "${PORT}" \
--env INFERENCE_MODEL="${MODEL_ALIAS}" \
--env OLLAMA_URL="http://ollama-server:${OLLAMA_PORT}" >/dev/null
log "⏳ Waiting for LlamaStack API…"
timeout "$WAIT_TIMEOUT" bash -c \
"until curl -fsS http://localhost:${PORT}/v1/health 2>/dev/null | grep -q 'OK'; do sleep 1; done" \
|| die "LlamaStack did not become ready in ${WAIT_TIMEOUT}s"
###############################################################################
# Done
###############################################################################
log ""
log "🎉 LlamaStack is ready!"
log "👉 API endpoint: http://localhost:${PORT}"
# Note: if youre calling from another container on the “${NETWORK}” network,
# you can use the internal DNS name http://llama-stack:${PORT}
log ""