feat(ci): add support for docker:distro in tests (#3832)
Some checks failed
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 1s
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 0s
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 0s
Test External Providers Installed via Module / test-external-providers-from-module (venv) (push) Has been skipped
Python Package Build Test / build (3.13) (push) Failing after 2s
Test Llama Stack Build / generate-matrix (push) Successful in 6s
Unit Tests / unit-tests (3.12) (push) Failing after 5s
Test Llama Stack Build / build-single-provider (push) Failing after 9s
Test Llama Stack Build / build-ubi9-container-distribution (push) Failing after 10s
Vector IO Integration Tests / test-matrix (push) Failing after 14s
Unit Tests / unit-tests (3.13) (push) Failing after 7s
Test External API and Providers / test-external (venv) (push) Failing after 12s
API Conformance Tests / check-schema-compatibility (push) Successful in 19s
Test Llama Stack Build / build (push) Failing after 7s
Integration Tests (Replay) / Integration Tests (, , , client=, ) (push) Failing after 26s
Test Llama Stack Build / build-custom-container-distribution (push) Failing after 25s
Python Package Build Test / build (3.12) (push) Failing after 33s
UI Tests / ui-tests (22) (push) Successful in 1m26s
Pre-commit / pre-commit (push) Successful in 2m18s

Also a critical bug fix so test recordings can be found inside docker
This commit is contained in:
Ashwin Bharambe 2025-10-16 19:33:13 -07:00 committed by GitHub
parent b3099d40e2
commit cd152f4240
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 932 additions and 12 deletions

370
scripts/diagnose_recordings.py Executable file
View file

@ -0,0 +1,370 @@
#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
"""
Diagnostic tool for debugging test recording issues.
Usage:
# Find where a hash would be looked up
./scripts/diagnose_recordings.py find-hash 7526c930eab04ce337496a26cd15f2591d7943035f2527182861643da9b837a7
# Show what's in a recording file
./scripts/diagnose_recordings.py show tests/integration/agents/recordings/7526c930....json
# List all recordings for a test
./scripts/diagnose_recordings.py list-test "tests/integration/agents/test_agents.py::test_custom_tool"
# Explain lookup paths for a test
./scripts/diagnose_recordings.py explain-paths --test-id "tests/integration/agents/test_agents.py::test_foo"
# Compare request hash computation
./scripts/diagnose_recordings.py compute-hash --endpoint /v1/chat/completions --method POST --body '{"model":"llama3.2:3b"}' --test-id "..."
"""
import argparse
import json
import sys
from pathlib import Path
# Add parent directory to path to import from llama_stack
REPO_ROOT = Path(__file__).parent.parent
sys.path.insert(0, str(REPO_ROOT))
try:
from llama_stack.testing.api_recorder import normalize_inference_request
except ImportError:
normalize_inference_request = None
def find_hash(hash_value: str, base_dir: Path | None = None, test_id: str | None = None):
"""Find where a hash would be looked up and what exists"""
if base_dir is None:
base_dir = REPO_ROOT / "tests/integration/common"
print(f"Searching for hash: {hash_value}\n")
print(f"Base dir: {base_dir} (absolute={base_dir.is_absolute()})")
# Compute test directory
if test_id:
test_file = test_id.split("::")[0]
test_dir = Path(test_file).parent
if base_dir.is_absolute():
repo_root = base_dir.parent.parent.parent
test_recordings_dir = repo_root / test_dir / "recordings"
else:
test_recordings_dir = test_dir / "recordings"
print(f"Test ID: {test_id}")
print(f"Test dir: {test_recordings_dir}\n")
else:
test_recordings_dir = base_dir / "recordings"
print("No test ID provided, using base dir\n")
# Check primary location
response_file = f"{hash_value}.json"
response_path = test_recordings_dir / response_file
print("Checking primary location:")
print(f" {response_path}")
if response_path.exists():
print(" EXISTS")
print("\nFound! Contents:")
show_recording(response_path)
return True
else:
print(" Does not exist")
# Check fallback location
fallback_dir = base_dir / "recordings"
fallback_path = fallback_dir / response_file
print("\nChecking fallback location:")
print(f" {fallback_path}")
if fallback_path.exists():
print(" EXISTS")
print("\nFound in fallback! Contents:")
show_recording(fallback_path)
return True
else:
print(" Does not exist")
# Show what files DO exist
print(f"\nFiles in test directory ({test_recordings_dir}):")
if test_recordings_dir.exists():
json_files = list(test_recordings_dir.glob("*.json"))
if json_files:
for f in json_files[:20]:
print(f" - {f.name}")
if len(json_files) > 20:
print(f" ... and {len(json_files) - 20} more")
else:
print(" (empty)")
else:
print(" Directory does not exist")
print(f"\nFiles in fallback directory ({fallback_dir}):")
if fallback_dir.exists():
json_files = list(fallback_dir.glob("*.json"))
if json_files:
for f in json_files[:20]:
print(f" - {f.name}")
if len(json_files) > 20:
print(f" ... and {len(json_files) - 20} more")
else:
print(" (empty)")
else:
print(" Directory does not exist")
# Try partial hash match
print("\nLooking for partial matches (first 16 chars)...")
partial = hash_value[:16]
matches = []
for dir_to_search in [test_recordings_dir, fallback_dir]:
if dir_to_search.exists():
for f in dir_to_search.glob("*.json"):
if f.stem.startswith(partial):
matches.append(f)
if matches:
print(f"Found {len(matches)} partial match(es):")
for m in matches:
print(f" {m}")
else:
print("No partial matches found")
return False
def show_recording(file_path: Path):
"""Show contents of a recording file"""
if not file_path.exists():
print(f"File does not exist: {file_path}")
return
with open(file_path) as f:
data = json.load(f)
print(f"\nRecording: {file_path.name}\n")
print(f"Test ID: {data.get('test_id', 'N/A')}")
print("\nRequest:")
req = data.get("request", {})
print(f" Method: {req.get('method', 'N/A')}")
print(f" URL: {req.get('url', 'N/A')}")
print(f" Endpoint: {req.get('endpoint', 'N/A')}")
print(f" Model: {req.get('model', 'N/A')}")
body = req.get("body", {})
if body:
print("\nRequest Body:")
print(f" Model: {body.get('model', 'N/A')}")
print(f" Stream: {body.get('stream', 'N/A')}")
if "messages" in body:
print(f" Messages: {len(body['messages'])} message(s)")
for i, msg in enumerate(body["messages"][:3]):
role = msg.get("role", "unknown")
content = msg.get("content", "")
if isinstance(content, str):
preview = content[:80] + "..." if len(content) > 80 else content
else:
preview = f"[{type(content).__name__}]"
print(f" [{i}] {role}: {preview}")
if "tools" in body:
print(f" Tools: {len(body['tools'])} tool(s)")
response = data.get("response", {})
if response:
print("\nResponse:")
print(f" Is streaming: {response.get('is_streaming', False)}")
response_body = response.get("body", {})
if isinstance(response_body, dict):
if "__type__" in response_body:
print(f" Type: {response_body['__type__']}")
if "__data__" in response_body:
response_data = response_body["__data__"]
if "choices" in response_data:
print(f" Choices: {len(response_data['choices'])}")
if "usage" in response_data:
usage = response_data["usage"]
print(f" Usage: in={usage.get('input_tokens')}, out={usage.get('output_tokens')}")
def list_test_recordings(test_id: str, base_dir: Path | None = None):
"""List all recordings for a specific test"""
if base_dir is None:
base_dir = REPO_ROOT / "tests/integration/common"
test_file = test_id.split("::")[0]
test_dir = Path(test_file).parent
if base_dir.is_absolute():
repo_root = base_dir.parent.parent.parent
test_recordings_dir = repo_root / test_dir / "recordings"
else:
test_recordings_dir = test_dir / "recordings"
print(f"Recordings for test: {test_id}\n")
print(f"Directory: {test_recordings_dir}\n")
if not test_recordings_dir.exists():
print("Directory does not exist")
return
# Find all recordings for this specific test
recordings = []
for f in test_recordings_dir.glob("*.json"):
try:
with open(f) as fp:
data = json.load(fp)
if data.get("test_id") == test_id:
recordings.append((f, data))
except Exception as e:
print(f"Could not read {f.name}: {e}")
if not recordings:
print("No recordings found for this exact test ID")
print("\nAll files in directory:")
for f in test_recordings_dir.glob("*.json"):
print(f" - {f.name}")
return
print(f"Found {len(recordings)} recording(s):\n")
for f, data in recordings:
req = data.get("request", {})
print(f" {f.name}")
print(f" Endpoint: {req.get('endpoint', 'N/A')}")
print(f" Model: {req.get('model', 'N/A')}")
print("")
def explain_paths(test_id: str | None = None, base_dir: Path | None = None):
"""Explain where recordings would be searched"""
if base_dir is None:
base_dir = REPO_ROOT / "tests/integration/common"
print("Recording Lookup Path Explanation\n")
print(f"Base directory: {base_dir}")
print(f" Absolute: {base_dir.is_absolute()}")
print("")
if test_id:
print(f"Test ID: {test_id}")
test_file = test_id.split("::")[0]
print(f" Test file: {test_file}")
test_dir = Path(test_file).parent
print(f" Test dir (relative): {test_dir}")
if base_dir.is_absolute():
repo_root = base_dir.parent.parent.parent
print(f" Repo root: {repo_root}")
test_recordings_dir = repo_root / test_dir / "recordings"
print(f" Test recordings dir (absolute): {test_recordings_dir}")
else:
test_recordings_dir = test_dir / "recordings"
print(f" Test recordings dir (relative): {test_recordings_dir}")
print("\nLookup order for recordings:")
print(f" 1. Test-specific: {test_recordings_dir}/<hash>.json")
print(f" 2. Fallback: {base_dir}/recordings/<hash>.json")
else:
print("No test ID provided")
print("\nLookup location:")
print(f" {base_dir}/recordings/<hash>.json")
def compute_hash(endpoint: str, method: str, body_json: str, test_id: str | None = None):
"""Compute hash for a request"""
if normalize_inference_request is None:
print("Could not import normalize_inference_request from llama_stack.testing.api_recorder")
print("Make sure you're running from the repo root with proper PYTHONPATH")
return
try:
body = json.loads(body_json)
except json.JSONDecodeError as e:
print(f"Invalid JSON in body: {e}")
return
# Create a fake URL with the endpoint
url = f"http://example.com{endpoint}"
# Set test context if provided
if test_id:
from llama_stack.core.testing_context import set_test_context
set_test_context(test_id)
hash_result = normalize_inference_request(method, url, {}, body)
print("Hash Computation\n")
print(f"Method: {method}")
print(f"Endpoint: {endpoint}")
print(f"Test ID: {test_id or 'None (excluded from hash for model-list endpoints)'}")
print("\nBody:")
print(json.dumps(body, indent=2))
print(f"\nComputed Hash: {hash_result}")
print(f"\nLooking for file: {hash_result}.json")
def main():
parser = argparse.ArgumentParser(
description="Diagnostic tool for test recording issues",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=__doc__,
)
subparsers = parser.add_subparsers(dest="command", help="Command to run")
# find-hash command
find_parser = subparsers.add_parser("find-hash", help="Find where a hash would be looked up")
find_parser.add_argument("hash", help="Hash value to search for (full or partial)")
find_parser.add_argument("--test-id", help="Test ID to determine search paths")
find_parser.add_argument("--base-dir", type=Path, help="Base directory (default: tests/integration/common)")
# show command
show_parser = subparsers.add_parser("show", help="Show contents of a recording file")
show_parser.add_argument("file", type=Path, help="Path to recording JSON file")
# list-test command
list_parser = subparsers.add_parser("list-test", help="List all recordings for a test")
list_parser.add_argument("test_id", help="Full test ID (e.g., tests/integration/agents/test_agents.py::test_foo)")
list_parser.add_argument("--base-dir", type=Path, help="Base directory (default: tests/integration/common)")
# explain-paths command
explain_parser = subparsers.add_parser("explain-paths", help="Explain where recordings are searched")
explain_parser.add_argument("--test-id", help="Test ID to show paths for")
explain_parser.add_argument("--base-dir", type=Path, help="Base directory (default: tests/integration/common)")
# compute-hash command
hash_parser = subparsers.add_parser("compute-hash", help="Compute hash for a request")
hash_parser.add_argument("--endpoint", required=True, help="Endpoint path (e.g., /v1/chat/completions)")
hash_parser.add_argument("--method", default="POST", help="HTTP method (default: POST)")
hash_parser.add_argument("--body", required=True, help="Request body as JSON string")
hash_parser.add_argument("--test-id", help="Test ID (affects hash for non-model-list endpoints)")
args = parser.parse_args()
if not args.command:
parser.print_help()
return
if args.command == "find-hash":
find_hash(args.hash, args.base_dir, args.test_id)
elif args.command == "show":
show_recording(args.file)
elif args.command == "list-test":
list_test_recordings(args.test_id, args.base_dir)
elif args.command == "explain-paths":
explain_paths(args.test_id, args.base_dir)
elif args.command == "compute-hash":
compute_hash(args.endpoint, args.method, args.body, args.test_id)
if __name__ == "__main__":
main()

358
scripts/docker.sh Executable file
View file

@ -0,0 +1,358 @@
#!/bin/bash
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
set -euo pipefail
# Docker container management script for Llama Stack
# Allows starting/stopping/restarting a Llama Stack docker container for testing
# Default values
DISTRO=""
PORT=8321
INFERENCE_MODE="replay"
COMMAND=""
USE_COPY_NOT_MOUNT=false
NO_REBUILD=false
# Function to display usage
usage() {
cat <<EOF
Usage: $0 COMMAND [OPTIONS]
Commands:
start Build and start the docker container
stop Stop and remove the docker container
restart Restart the docker container
status Check if the container is running
logs Show container logs (add -f to follow)
Options:
--distro STRING Distribution name (e.g., 'ci-tests', 'starter') (required for start/restart)
--port NUMBER Port to run on (default: 8321)
--inference-mode STRING Inference mode: replay, record-if-missing or record (default: replay)
--copy-source Copy source into image instead of mounting (default: auto-detect CI, otherwise mount)
--no-rebuild Skip building the image, just start the container (default: false)
--help Show this help message
Examples:
# Start a docker container (local dev mode - mounts source, builds image)
$0 start --distro ci-tests
# Start without rebuilding (uses existing image)
$0 start --distro ci-tests --no-rebuild
# Start with source copied into image (like CI)
$0 start --distro ci-tests --copy-source
# Start with custom port
$0 start --distro starter --port 8080
# Check status
$0 status --distro ci-tests
# View logs
$0 logs --distro ci-tests
# Stop container
$0 stop --distro ci-tests
# Restart container
$0 restart --distro ci-tests
Note: In CI environments (detected via CI or GITHUB_ACTIONS env vars), source is
automatically copied into the image. Locally, source is mounted for live development
unless --copy-source is specified.
EOF
}
# Parse command (first positional arg)
if [[ $# -eq 0 ]]; then
echo "Error: Command required"
usage
exit 1
fi
COMMAND="$1"
shift
# Validate command
case "$COMMAND" in
start | stop | restart | status | logs) ;;
--help)
usage
exit 0
;;
*)
echo "Error: Unknown command: $COMMAND"
usage
exit 1
;;
esac
# Parse options
while [[ $# -gt 0 ]]; do
case $1 in
--distro)
DISTRO="$2"
shift 2
;;
--port)
PORT="$2"
shift 2
;;
--inference-mode)
INFERENCE_MODE="$2"
shift 2
;;
--copy-source)
USE_COPY_NOT_MOUNT=true
shift
;;
--no-rebuild)
NO_REBUILD=true
shift
;;
--help)
usage
exit 0
;;
*)
echo "Unknown option: $1"
usage
exit 1
;;
esac
done
# Validate required parameters for commands that need them
if [[ "$COMMAND" != "stop" && "$COMMAND" != "status" && "$COMMAND" != "logs" ]]; then
if [[ -z "$DISTRO" ]]; then
echo "Error: --distro is required for '$COMMAND' command"
usage
exit 1
fi
fi
# If distro not provided for stop/status/logs, try to infer from running containers
if [[ -z "$DISTRO" && ("$COMMAND" == "stop" || "$COMMAND" == "status" || "$COMMAND" == "logs") ]]; then
# Look for any llama-stack-test-* container
RUNNING_CONTAINERS=$(docker ps -a --filter "name=llama-stack-test-" --format "{{.Names}}" | head -1)
if [[ -n "$RUNNING_CONTAINERS" ]]; then
DISTRO=$(echo "$RUNNING_CONTAINERS" | sed 's/llama-stack-test-//')
echo "Found running container for distro: $DISTRO"
else
echo "Error: --distro is required (no running containers found)"
usage
exit 1
fi
fi
# Remove docker: prefix if present
DISTRO=$(echo "$DISTRO" | sed 's/^docker://')
CONTAINER_NAME="llama-stack-test-$DISTRO"
# Function to check if container is running
is_container_running() {
docker ps --filter "name=^${CONTAINER_NAME}$" --format '{{.Names}}' | grep -q "^${CONTAINER_NAME}$"
}
# Function to check if container exists (running or stopped)
container_exists() {
docker ps -a --filter "name=^${CONTAINER_NAME}$" --format '{{.Names}}' | grep -q "^${CONTAINER_NAME}$"
}
# Function to stop and remove container
stop_container() {
if container_exists; then
echo "Stopping container: $CONTAINER_NAME"
docker stop "$CONTAINER_NAME" 2>/dev/null || true
echo "Removing container: $CONTAINER_NAME"
docker rm "$CONTAINER_NAME" 2>/dev/null || true
echo "✅ Container stopped and removed"
else
echo "⚠️ Container $CONTAINER_NAME does not exist"
fi
}
# Function to build docker image
build_image() {
echo "=== Building Docker Image for distribution: $DISTRO ==="
# Get the repo root (parent of scripts directory)
SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
REPO_ROOT=$(cd "$SCRIPT_DIR/.." && pwd)
# Determine whether to copy or mount source
# Copy in CI or if explicitly requested, otherwise mount for live development
BUILD_ENV="LLAMA_STACK_DIR=$REPO_ROOT"
if [[ "$USE_COPY_NOT_MOUNT" == "true" ]] || [[ "${CI:-false}" == "true" ]] || [[ "${GITHUB_ACTIONS:-false}" == "true" ]]; then
echo "Copying source into image (USE_COPY_NOT_MOUNT=true, CI=${CI:-false}, GITHUB_ACTIONS=${GITHUB_ACTIONS:-false})"
BUILD_ENV="USE_COPY_NOT_MOUNT=true $BUILD_ENV"
else
echo "Will mount source for live development"
fi
if ! eval "$BUILD_ENV llama stack build --distro '$DISTRO' --image-type container"; then
echo "❌ Failed to build Docker image"
exit 1
fi
echo "✅ Docker image built successfully"
}
# Function to start container
start_container() {
# Check if already running
if is_container_running; then
echo "⚠️ Container $CONTAINER_NAME is already running"
echo "URL: http://localhost:$PORT"
exit 0
fi
# Stop and remove if exists but not running
if container_exists; then
echo "Removing existing stopped container..."
docker rm "$CONTAINER_NAME" 2>/dev/null || true
fi
# Build the image unless --no-rebuild was specified
if [[ "$NO_REBUILD" == "true" ]]; then
echo "Skipping build (--no-rebuild specified)"
# Check if image exists (with or without localhost/ prefix)
if ! docker images --format "{{.Repository}}:{{.Tag}}" | grep -q "distribution-$DISTRO:dev$"; then
echo "❌ Error: Image distribution-$DISTRO:dev does not exist"
echo "Either build it first without --no-rebuild, or run: llama stack build --distro $DISTRO --image-type container"
exit 1
fi
echo "✅ Found existing image for distribution-$DISTRO:dev"
else
build_image
fi
echo ""
echo "=== Starting Docker Container ==="
# Get the repo root for volume mount
SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
REPO_ROOT=$(cd "$SCRIPT_DIR/.." && pwd)
# Determine the actual image name (may have localhost/ prefix)
IMAGE_NAME=$(docker images --format "{{.Repository}}:{{.Tag}}" | grep "distribution-$DISTRO:dev$" | head -1)
if [[ -z "$IMAGE_NAME" ]]; then
echo "❌ Error: Could not find image for distribution-$DISTRO:dev"
exit 1
fi
echo "Using image: $IMAGE_NAME"
# Build environment variables for docker run
DOCKER_ENV_VARS=""
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_INFERENCE_MODE=$INFERENCE_MODE"
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_STACK_CONFIG_TYPE=server"
# Set default OLLAMA_URL if not provided
# On macOS/Windows, use host.docker.internal to reach host from container
# On Linux with --network host, use localhost
if [[ "$(uname)" == "Darwin" ]] || [[ "$(uname)" == *"MINGW"* ]]; then
OLLAMA_URL="${OLLAMA_URL:-http://host.docker.internal:11434}"
else
OLLAMA_URL="${OLLAMA_URL:-http://localhost:11434}"
fi
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OLLAMA_URL=$OLLAMA_URL"
# Pass through API keys if they exist
[ -n "${TOGETHER_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e TOGETHER_API_KEY=$TOGETHER_API_KEY"
[ -n "${FIREWORKS_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e FIREWORKS_API_KEY=$FIREWORKS_API_KEY"
[ -n "${TAVILY_SEARCH_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e TAVILY_SEARCH_API_KEY=$TAVILY_SEARCH_API_KEY"
[ -n "${OPENAI_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OPENAI_API_KEY=$OPENAI_API_KEY"
[ -n "${ANTHROPIC_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY"
[ -n "${GROQ_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e GROQ_API_KEY=$GROQ_API_KEY"
[ -n "${GEMINI_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e GEMINI_API_KEY=$GEMINI_API_KEY"
[ -n "${SAFETY_MODEL:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e SAFETY_MODEL=$SAFETY_MODEL"
[ -n "${SQLITE_STORE_DIR:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e SQLITE_STORE_DIR=$SQLITE_STORE_DIR"
# Use --network host on Linux only (macOS doesn't support it properly)
NETWORK_MODE=""
if [[ "$(uname)" != "Darwin" ]] && [[ "$(uname)" != *"MINGW"* ]]; then
NETWORK_MODE="--network host"
fi
docker run -d $NETWORK_MODE --name "$CONTAINER_NAME" \
-p $PORT:$PORT \
$DOCKER_ENV_VARS \
-v "$REPO_ROOT":/app/llama-stack-source \
"$IMAGE_NAME" \
--port $PORT
echo "Waiting for container to start..."
for i in {1..30}; do
if curl -s http://localhost:$PORT/v1/health 2>/dev/null | grep -q "OK"; then
echo "✅ Container started successfully"
echo ""
echo "=== Container Information ==="
echo "Container name: $CONTAINER_NAME"
echo "URL: http://localhost:$PORT"
echo "Health check: http://localhost:$PORT/v1/health"
echo ""
echo "To view logs: $0 logs --distro $DISTRO"
echo "To stop: $0 stop --distro $DISTRO"
return 0
fi
if [[ $i -eq 30 ]]; then
echo "❌ Container failed to start within timeout"
echo "Showing container logs:"
docker logs "$CONTAINER_NAME"
exit 1
fi
sleep 1
done
}
# Execute command
case "$COMMAND" in
start)
start_container
;;
stop)
stop_container
;;
restart)
echo "Restarting container: $CONTAINER_NAME"
stop_container
echo ""
start_container
;;
status)
if is_container_running; then
echo "✅ Container $CONTAINER_NAME is running"
echo "URL: http://localhost:$PORT"
# Try to get the actual port from the container
ACTUAL_PORT=$(docker port "$CONTAINER_NAME" 2>/dev/null | grep "8321/tcp" | cut -d':' -f2 | head -1)
if [[ -n "$ACTUAL_PORT" ]]; then
echo "Port: $ACTUAL_PORT"
fi
elif container_exists; then
echo "⚠️ Container $CONTAINER_NAME exists but is not running"
echo "Start it with: $0 start --distro $DISTRO"
else
echo "❌ Container $CONTAINER_NAME does not exist"
echo "Start it with: $0 start --distro $DISTRO"
fi
;;
logs)
if container_exists; then
echo "=== Logs for $CONTAINER_NAME ==="
# Check if -f flag was passed after 'logs' command
if [[ "${1:-}" == "-f" || "${1:-}" == "--follow" ]]; then
docker logs --tail 100 --follow "$CONTAINER_NAME"
else
docker logs --tail 100 "$CONTAINER_NAME"
fi
else
echo "❌ Container $CONTAINER_NAME does not exist"
exit 1
fi
;;
esac

View file

@ -42,9 +42,12 @@ Setups are defined in tests/integration/setups.py and provide global configurati
You can also specify subdirectories (of tests/integration) to select tests from, which will override the suite.
Examples:
# Basic inference tests with ollama
# Basic inference tests with ollama (server mode)
$0 --stack-config server:ci-tests --suite base --setup ollama
# Basic inference tests with docker
$0 --stack-config docker:ci-tests --suite base --setup ollama
# Multiple test directories with vllm
$0 --stack-config server:ci-tests --subdirs 'inference,agents' --setup vllm
@ -153,7 +156,7 @@ echo "Setting SQLITE_STORE_DIR: $SQLITE_STORE_DIR"
# Determine stack config type for api_recorder test isolation
if [[ "$COLLECT_ONLY" == false ]]; then
if [[ "$STACK_CONFIG" == server:* ]]; then
if [[ "$STACK_CONFIG" == server:* ]] || [[ "$STACK_CONFIG" == docker:* ]]; then
export LLAMA_STACK_TEST_STACK_CONFIG_TYPE="server"
echo "Setting stack config type: server"
else
@ -229,6 +232,104 @@ if [[ "$STACK_CONFIG" == *"server:"* && "$COLLECT_ONLY" == false ]]; then
trap stop_server EXIT ERR INT TERM
fi
# Start Docker Container if needed
if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
stop_container() {
echo "Stopping Docker container..."
container_name="llama-stack-test-$DISTRO"
if docker ps -a --format '{{.Names}}' | grep -q "^${container_name}$"; then
echo "Stopping and removing container: $container_name"
docker stop "$container_name" 2>/dev/null || true
docker rm "$container_name" 2>/dev/null || true
else
echo "No container named $container_name found"
fi
echo "Docker container stopped"
}
# Extract distribution name from docker:distro format
DISTRO=$(echo "$STACK_CONFIG" | sed 's/^docker://')
export LLAMA_STACK_PORT=8321
echo "=== Building Docker Image for distribution: $DISTRO ==="
# Set LLAMA_STACK_DIR to repo root
# USE_COPY_NOT_MOUNT copies files into image (for CI), otherwise mounts for live development
BUILD_ENV="LLAMA_STACK_DIR=$ROOT_DIR"
if [[ "${CI:-false}" == "true" ]] || [[ "${GITHUB_ACTIONS:-false}" == "true" ]]; then
echo "CI detected (CI=$CI, GITHUB_ACTIONS=$GITHUB_ACTIONS): copying source into image"
BUILD_ENV="USE_COPY_NOT_MOUNT=true $BUILD_ENV"
else
echo "Local mode: will mount source for live development"
fi
eval "$BUILD_ENV llama stack build --distro '$DISTRO' --image-type container"
if [ $? -ne 0 ]; then
echo "❌ Failed to build Docker image"
exit 1
fi
echo ""
echo "=== Starting Docker Container ==="
container_name="llama-stack-test-$DISTRO"
# Stop and remove existing container if it exists
docker stop "$container_name" 2>/dev/null || true
docker rm "$container_name" 2>/dev/null || true
# Build environment variables for docker run
DOCKER_ENV_VARS=""
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_INFERENCE_MODE=$INFERENCE_MODE"
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_STACK_CONFIG_TYPE=server"
# Pass through API keys if they exist
[ -n "${TOGETHER_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e TOGETHER_API_KEY=$TOGETHER_API_KEY"
[ -n "${FIREWORKS_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e FIREWORKS_API_KEY=$FIREWORKS_API_KEY"
[ -n "${TAVILY_SEARCH_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e TAVILY_SEARCH_API_KEY=$TAVILY_SEARCH_API_KEY"
[ -n "${OPENAI_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OPENAI_API_KEY=$OPENAI_API_KEY"
[ -n "${ANTHROPIC_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY"
[ -n "${GROQ_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e GROQ_API_KEY=$GROQ_API_KEY"
[ -n "${GEMINI_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e GEMINI_API_KEY=$GEMINI_API_KEY"
[ -n "${OLLAMA_URL:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OLLAMA_URL=$OLLAMA_URL"
[ -n "${SAFETY_MODEL:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e SAFETY_MODEL=$SAFETY_MODEL"
# Determine the actual image name (may have localhost/ prefix)
IMAGE_NAME=$(docker images --format "{{.Repository}}:{{.Tag}}" | grep "distribution-$DISTRO:dev$" | head -1)
if [[ -z "$IMAGE_NAME" ]]; then
echo "❌ Error: Could not find image for distribution-$DISTRO:dev"
exit 1
fi
echo "Using image: $IMAGE_NAME"
docker run -d --network host --name "$container_name" \
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
$DOCKER_ENV_VARS \
-v $ROOT_DIR:/app/llama-stack-source \
"$IMAGE_NAME" \
--port $LLAMA_STACK_PORT
echo "Waiting for Docker container to start..."
for i in {1..30}; do
if curl -s http://localhost:$LLAMA_STACK_PORT/v1/health 2>/dev/null | grep -q "OK"; then
echo "✅ Docker container started successfully"
break
fi
if [[ $i -eq 30 ]]; then
echo "❌ Docker container failed to start"
echo "Container logs:"
docker logs "$container_name"
exit 1
fi
sleep 1
done
echo ""
# Update STACK_CONFIG to point to the running container
STACK_CONFIG="http://localhost:$LLAMA_STACK_PORT"
trap stop_container EXIT ERR INT TERM
fi
# Run tests
echo "=== Running Integration Tests ==="
EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag"