mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-04 10:10:36 +00:00
chore: add telemetry setup to install.sh
# What does this PR do? ## Test Plan
This commit is contained in:
parent
d709eeb33f
commit
6e83f07d12
1 changed files with 130 additions and 5 deletions
|
|
@ -5,25 +5,37 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
[ -z "$BASH_VERSION" ] && {
|
[ -z "${BASH_VERSION:-}" ] && exec /usr/bin/env bash "$0" "$@"
|
||||||
echo "This script must be run with bash" >&2
|
if set -o | grep -Eq 'posix[[:space:]]+on'; then
|
||||||
exit 1
|
exec /usr/bin/env bash "$0" "$@"
|
||||||
}
|
fi
|
||||||
|
|
||||||
set -Eeuo pipefail
|
set -Eeuo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
TELEMETRY_DIR="${SCRIPT_DIR}/telemetry"
|
||||||
|
TELEMETRY_REMOTE_BASE_DEFAULT="https://raw.githubusercontent.com/meta-llama/llama-stack/main/scripts/telemetry"
|
||||||
|
|
||||||
PORT=8321
|
PORT=8321
|
||||||
OLLAMA_PORT=11434
|
OLLAMA_PORT=11434
|
||||||
MODEL_ALIAS="llama3.2:3b"
|
MODEL_ALIAS="llama3.2:3b"
|
||||||
SERVER_IMAGE="docker.io/llamastack/distribution-starter:latest"
|
SERVER_IMAGE="docker.io/llamastack/distribution-starter:latest"
|
||||||
WAIT_TIMEOUT=30
|
WAIT_TIMEOUT=30
|
||||||
TEMP_LOG=""
|
TEMP_LOG=""
|
||||||
|
WITH_TELEMETRY=true
|
||||||
|
TELEMETRY_SERVICE_NAME="llama-stack"
|
||||||
|
TELEMETRY_SINKS="otel_trace,otel_metric"
|
||||||
|
OTEL_EXPORTER_OTLP_ENDPOINT="http://otel-collector:4318"
|
||||||
|
TEMP_TELEMETRY_DIR=""
|
||||||
|
|
||||||
# Cleanup function to remove temporary files
|
# Cleanup function to remove temporary files
|
||||||
cleanup() {
|
cleanup() {
|
||||||
if [ -n "$TEMP_LOG" ] && [ -f "$TEMP_LOG" ]; then
|
if [ -n "$TEMP_LOG" ] && [ -f "$TEMP_LOG" ]; then
|
||||||
rm -f "$TEMP_LOG"
|
rm -f "$TEMP_LOG"
|
||||||
fi
|
fi
|
||||||
|
if [ -n "$TEMP_TELEMETRY_DIR" ] && [ -d "$TEMP_TELEMETRY_DIR" ]; then
|
||||||
|
rm -rf "$TEMP_TELEMETRY_DIR"
|
||||||
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
# Set up trap to clean up on exit, error, or interrupt
|
# Set up trap to clean up on exit, error, or interrupt
|
||||||
|
|
@ -89,6 +101,12 @@ Options:
|
||||||
-m, --model MODEL Model alias to use (default: ${MODEL_ALIAS})
|
-m, --model MODEL Model alias to use (default: ${MODEL_ALIAS})
|
||||||
-i, --image IMAGE Server image (default: ${SERVER_IMAGE})
|
-i, --image IMAGE Server image (default: ${SERVER_IMAGE})
|
||||||
-t, --timeout SECONDS Service wait timeout in seconds (default: ${WAIT_TIMEOUT})
|
-t, --timeout SECONDS Service wait timeout in seconds (default: ${WAIT_TIMEOUT})
|
||||||
|
--with-telemetry Provision Jaeger, OTEL Collector, Prometheus, and Grafana (default: enabled)
|
||||||
|
--no-telemetry, --without-telemetry
|
||||||
|
Skip provisioning the telemetry stack
|
||||||
|
--telemetry-service NAME Service name reported to telemetry (default: ${TELEMETRY_SERVICE_NAME})
|
||||||
|
--telemetry-sinks SINKS Comma-separated telemetry sinks (default: ${TELEMETRY_SINKS})
|
||||||
|
--otel-endpoint URL OTLP endpoint provided to Llama Stack (default: ${OTEL_EXPORTER_OTLP_ENDPOINT})
|
||||||
-h, --help Show this help message
|
-h, --help Show this help message
|
||||||
|
|
||||||
For more information:
|
For more information:
|
||||||
|
|
@ -127,6 +145,26 @@ while [[ $# -gt 0 ]]; do
|
||||||
WAIT_TIMEOUT="$2"
|
WAIT_TIMEOUT="$2"
|
||||||
shift 2
|
shift 2
|
||||||
;;
|
;;
|
||||||
|
--with-telemetry)
|
||||||
|
WITH_TELEMETRY=true
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
--no-telemetry|--without-telemetry)
|
||||||
|
WITH_TELEMETRY=false
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
--telemetry-service)
|
||||||
|
TELEMETRY_SERVICE_NAME="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
--telemetry-sinks)
|
||||||
|
TELEMETRY_SINKS="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
--otel-endpoint)
|
||||||
|
OTEL_EXPORTER_OTLP_ENDPOINT="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
*)
|
*)
|
||||||
die "Unknown option: $1"
|
die "Unknown option: $1"
|
||||||
;;
|
;;
|
||||||
|
|
@ -171,7 +209,11 @@ if [ "$ENGINE" = "podman" ] && [ "$(uname -s)" = "Darwin" ]; then
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Clean up any leftovers from earlier runs
|
# Clean up any leftovers from earlier runs
|
||||||
for name in ollama-server llama-stack; do
|
containers=(ollama-server llama-stack)
|
||||||
|
if [ "$WITH_TELEMETRY" = true ]; then
|
||||||
|
containers+=(jaeger otel-collector prometheus grafana)
|
||||||
|
fi
|
||||||
|
for name in "${containers[@]}"; do
|
||||||
ids=$($ENGINE ps -aq --filter "name=^${name}$")
|
ids=$($ENGINE ps -aq --filter "name=^${name}$")
|
||||||
if [ -n "$ids" ]; then
|
if [ -n "$ids" ]; then
|
||||||
log "⚠️ Found existing container(s) for '${name}', removing..."
|
log "⚠️ Found existing container(s) for '${name}', removing..."
|
||||||
|
|
@ -191,6 +233,72 @@ if ! $ENGINE network inspect llama-net >/dev/null 2>&1; then
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
# Telemetry Stack
|
||||||
|
###############################################################################
|
||||||
|
if [ "$WITH_TELEMETRY" = true ]; then
|
||||||
|
TELEMETRY_ASSETS_DIR="$TELEMETRY_DIR"
|
||||||
|
if [ ! -d "$TELEMETRY_ASSETS_DIR" ]; then
|
||||||
|
TELEMETRY_REMOTE_BASE="${TELEMETRY_REMOTE_BASE:-$TELEMETRY_REMOTE_BASE_DEFAULT}"
|
||||||
|
TEMP_TELEMETRY_DIR="$(mktemp -d)"
|
||||||
|
TELEMETRY_ASSETS_DIR="$TEMP_TELEMETRY_DIR"
|
||||||
|
log "📥 Fetching telemetry assets from ${TELEMETRY_REMOTE_BASE}..."
|
||||||
|
for asset in otel-collector-config.yaml prometheus.yml grafana-datasources.yaml; do
|
||||||
|
if ! curl -fsSL "${TELEMETRY_REMOTE_BASE}/${asset}" -o "${TELEMETRY_ASSETS_DIR}/${asset}"; then
|
||||||
|
die "Failed to download telemetry asset: ${asset}"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
log "📡 Starting telemetry stack..."
|
||||||
|
|
||||||
|
if ! execute_with_log $ENGINE run -d "${PLATFORM_OPTS[@]}" --name jaeger \
|
||||||
|
--network llama-net \
|
||||||
|
-e COLLECTOR_ZIPKIN_HOST_PORT=:9411 \
|
||||||
|
-p 16686:16686 \
|
||||||
|
-p 14250:14250 \
|
||||||
|
-p 9411:9411 \
|
||||||
|
docker.io/jaegertracing/all-in-one:latest > /dev/null 2>&1; then
|
||||||
|
die "Jaeger startup failed"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ! execute_with_log $ENGINE run -d "${PLATFORM_OPTS[@]}" --name otel-collector \
|
||||||
|
--network llama-net \
|
||||||
|
-p 4318:4318 \
|
||||||
|
-p 4317:4317 \
|
||||||
|
-p 9464:9464 \
|
||||||
|
-p 13133:13133 \
|
||||||
|
-v "${TELEMETRY_ASSETS_DIR}/otel-collector-config.yaml:/etc/otel-collector-config.yaml:Z" \
|
||||||
|
docker.io/otel/opentelemetry-collector-contrib:latest \
|
||||||
|
--config /etc/otel-collector-config.yaml > /dev/null 2>&1; then
|
||||||
|
die "OpenTelemetry Collector startup failed"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ! execute_with_log $ENGINE run -d "${PLATFORM_OPTS[@]}" --name prometheus \
|
||||||
|
--network llama-net \
|
||||||
|
-p 9090:9090 \
|
||||||
|
-v "${TELEMETRY_ASSETS_DIR}/prometheus.yml:/etc/prometheus/prometheus.yml:Z" \
|
||||||
|
docker.io/prom/prometheus:latest \
|
||||||
|
--config.file=/etc/prometheus/prometheus.yml \
|
||||||
|
--storage.tsdb.path=/prometheus \
|
||||||
|
--web.console.libraries=/etc/prometheus/console_libraries \
|
||||||
|
--web.console.templates=/etc/prometheus/consoles \
|
||||||
|
--storage.tsdb.retention.time=200h \
|
||||||
|
--web.enable-lifecycle > /dev/null 2>&1; then
|
||||||
|
die "Prometheus startup failed"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ! execute_with_log $ENGINE run -d "${PLATFORM_OPTS[@]}" --name grafana \
|
||||||
|
--network llama-net \
|
||||||
|
-p 3000:3000 \
|
||||||
|
-e GF_SECURITY_ADMIN_PASSWORD=admin \
|
||||||
|
-e GF_USERS_ALLOW_SIGN_UP=false \
|
||||||
|
-v "${TELEMETRY_ASSETS_DIR}/grafana-datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:Z" \
|
||||||
|
docker.io/grafana/grafana:11.0.0 > /dev/null 2>&1; then
|
||||||
|
die "Grafana startup failed"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
# 1. Ollama
|
# 1. Ollama
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
@ -218,9 +326,19 @@ fi
|
||||||
###############################################################################
|
###############################################################################
|
||||||
# 2. Llama‑Stack
|
# 2. Llama‑Stack
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
server_env_opts=()
|
||||||
|
if [ "$WITH_TELEMETRY" = true ]; then
|
||||||
|
server_env_opts+=(
|
||||||
|
-e TELEMETRY_SINKS="${TELEMETRY_SINKS}"
|
||||||
|
-e OTEL_EXPORTER_OTLP_ENDPOINT="${OTEL_EXPORTER_OTLP_ENDPOINT}"
|
||||||
|
-e OTEL_SERVICE_NAME="${TELEMETRY_SERVICE_NAME}"
|
||||||
|
)
|
||||||
|
fi
|
||||||
|
|
||||||
cmd=( run -d "${PLATFORM_OPTS[@]}" --name llama-stack \
|
cmd=( run -d "${PLATFORM_OPTS[@]}" --name llama-stack \
|
||||||
--network llama-net \
|
--network llama-net \
|
||||||
-p "${PORT}:${PORT}" \
|
-p "${PORT}:${PORT}" \
|
||||||
|
"${server_env_opts[@]}" \
|
||||||
-e OLLAMA_URL="http://ollama-server:${OLLAMA_PORT}" \
|
-e OLLAMA_URL="http://ollama-server:${OLLAMA_PORT}" \
|
||||||
"${SERVER_IMAGE}" --port "${PORT}")
|
"${SERVER_IMAGE}" --port "${PORT}")
|
||||||
|
|
||||||
|
|
@ -244,5 +362,12 @@ log "👉 API endpoint: http://localhost:${PORT}"
|
||||||
log "📖 Documentation: https://llamastack.github.io/latest/references/api_reference/index.html"
|
log "📖 Documentation: https://llamastack.github.io/latest/references/api_reference/index.html"
|
||||||
log "💻 To access the llama stack CLI, exec into the container:"
|
log "💻 To access the llama stack CLI, exec into the container:"
|
||||||
log " $ENGINE exec -ti llama-stack bash"
|
log " $ENGINE exec -ti llama-stack bash"
|
||||||
|
if [ "$WITH_TELEMETRY" = true ]; then
|
||||||
|
log "📡 Telemetry dashboards:"
|
||||||
|
log " Jaeger UI: http://localhost:16686"
|
||||||
|
log " Prometheus UI: http://localhost:9090"
|
||||||
|
log " Grafana UI: http://localhost:3000 (admin/admin)"
|
||||||
|
log " OTEL Collector: http://localhost:4318"
|
||||||
|
fi
|
||||||
log "🐛 Report an issue @ https://github.com/llamastack/llama-stack/issues if you think it's a bug"
|
log "🐛 Report an issue @ https://github.com/llamastack/llama-stack/issues if you think it's a bug"
|
||||||
log ""
|
log ""
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue