feat(ci): add support for docker:distro in tests

Also a critical bug fix so test recordings can be found inside docker
2025-12-12 12:06:04 +00:00 · 2025-10-16 14:31:10 -07:00 · 2025-10-16 14:31:10 -07:00 · 1fc0fd5935
commit 1fc0fd5935
parent 07ff15d917
5 changed files with 477 additions and 8 deletions
--- a/llama_stack/core/build_container.sh
+++ b/llama_stack/core/build_container.sh
@ -338,7 +338,7 @@ fi
 # Add other require item commands genearic to all containers
 add_to_container << EOF
-RUN mkdir -p /.llama /.cache && chmod -R g+rw /app /.llama /.cache
+RUN mkdir -p /.llama /.cache && chmod -R g+rw /.llama /.cache && (chmod -R g+rw /app 2>/dev/null || true)
 EOF
 printf "Containerfile created successfully in %s/Containerfile\n\n" "$TEMP_DIR"
--- a/llama_stack/testing/api_recorder.py
+++ b/llama_stack/testing/api_recorder.py
@ -355,8 +355,14 @@ class ResponseStorage:
            test_file = test_id.split("::")[0]  # Remove test function part
            test_dir = Path(test_file).parent  # Get parent directory
-            # Put recordings in a "recordings" subdirectory of the test's parent dir
+            # Make it absolute by resolving against base_dir
-            # e.g., "tests/integration/inference" -> "tests/integration/inference/recordings"
+            # If base_dir is absolute, use it as the root, otherwise resolve relative to cwd
            if self.base_dir.is_absolute():
                # base_dir is something like /app/llama-stack-source/tests/integration/common
                # We need to go up to the repo root and then back down to the test dir
                repo_root = self.base_dir.parent.parent.parent  # go up from common -> integration -> tests -> repo
                return repo_root / test_dir / "recordings"
            else:
                return test_dir / "recordings"
        else:
            # Fallback for non-test contexts
--- a/scripts/docker.sh
+++ b/scripts/docker.sh
@ -0,0 +1,358 @@
 #!/bin/bash
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 set -euo pipefail
 # Docker container management script for Llama Stack
 # Allows starting/stopping/restarting a Llama Stack docker container for testing
 # Default values
 DISTRO=""
 PORT=8321
 INFERENCE_MODE="replay"
 COMMAND=""
 USE_COPY_NOT_MOUNT=false
 NO_REBUILD=false
 # Function to display usage
 usage() {
    cat <<EOF
 Usage: $0 COMMAND [OPTIONS]
 Commands:
    start       Build and start the docker container
    stop        Stop and remove the docker container
    restart     Restart the docker container
    status      Check if the container is running
    logs        Show container logs (add -f to follow)
 Options:
    --distro STRING          Distribution name (e.g., 'ci-tests', 'starter') (required for start/restart)
    --port NUMBER            Port to run on (default: 8321)
    --inference-mode STRING  Inference mode: replay, record-if-missing or record (default: replay)
    --copy-source            Copy source into image instead of mounting (default: auto-detect CI, otherwise mount)
    --no-rebuild             Skip building the image, just start the container (default: false)
    --help                   Show this help message
 Examples:
    # Start a docker container (local dev mode - mounts source, builds image)
    $0 start --distro ci-tests
    # Start without rebuilding (uses existing image)
    $0 start --distro ci-tests --no-rebuild
    # Start with source copied into image (like CI)
    $0 start --distro ci-tests --copy-source
    # Start with custom port
    $0 start --distro starter --port 8080
    # Check status
    $0 status --distro ci-tests
    # View logs
    $0 logs --distro ci-tests
    # Stop container
    $0 stop --distro ci-tests
    # Restart container
    $0 restart --distro ci-tests
 Note: In CI environments (detected via CI or GITHUB_ACTIONS env vars), source is
      automatically copied into the image. Locally, source is mounted for live development
      unless --copy-source is specified.
 EOF
 }
 # Parse command (first positional arg)
 if [[ $# -eq 0 ]]; then
    echo "Error: Command required"
    usage
    exit 1
 fi
 COMMAND="$1"
 shift
 # Validate command
 case "$COMMAND" in
 start | stop | restart | status | logs) ;;
 --help)
    usage
    exit 0
    ;;
 *)
    echo "Error: Unknown command: $COMMAND"
    usage
    exit 1
    ;;
 esac
 # Parse options
 while [[ $# -gt 0 ]]; do
    case $1 in
    --distro)
        DISTRO="$2"
        shift 2
        ;;
    --port)
        PORT="$2"
        shift 2
        ;;
    --inference-mode)
        INFERENCE_MODE="$2"
        shift 2
        ;;
    --copy-source)
        USE_COPY_NOT_MOUNT=true
        shift
        ;;
    --no-rebuild)
        NO_REBUILD=true
        shift
        ;;
    --help)
        usage
        exit 0
        ;;
    *)
        echo "Unknown option: $1"
        usage
        exit 1
        ;;
    esac
 done
 # Validate required parameters for commands that need them
 if [[ "$COMMAND" != "stop" && "$COMMAND" != "status" && "$COMMAND" != "logs" ]]; then
    if [[ -z "$DISTRO" ]]; then
        echo "Error: --distro is required for '$COMMAND' command"
        usage
        exit 1
    fi
 fi
 # If distro not provided for stop/status/logs, try to infer from running containers
 if [[ -z "$DISTRO" && ("$COMMAND" == "stop" || "$COMMAND" == "status" || "$COMMAND" == "logs") ]]; then
    # Look for any llama-stack-test-* container
    RUNNING_CONTAINERS=$(docker ps -a --filter "name=llama-stack-test-" --format "{{.Names}}" | head -1)
    if [[ -n "$RUNNING_CONTAINERS" ]]; then
        DISTRO=$(echo "$RUNNING_CONTAINERS" | sed 's/llama-stack-test-//')
        echo "Found running container for distro: $DISTRO"
    else
        echo "Error: --distro is required (no running containers found)"
        usage
        exit 1
    fi
 fi
 # Remove docker: prefix if present
 DISTRO=$(echo "$DISTRO" | sed 's/^docker://')
 CONTAINER_NAME="llama-stack-test-$DISTRO"
 # Function to check if container is running
 is_container_running() {
    docker ps --filter "name=^${CONTAINER_NAME}$" --format '{{.Names}}' | grep -q "^${CONTAINER_NAME}$"
 }
 # Function to check if container exists (running or stopped)
 container_exists() {
    docker ps -a --filter "name=^${CONTAINER_NAME}$" --format '{{.Names}}' | grep -q "^${CONTAINER_NAME}$"
 }
 # Function to stop and remove container
 stop_container() {
    if container_exists; then
        echo "Stopping container: $CONTAINER_NAME"
        docker stop "$CONTAINER_NAME" 2>/dev/null || true
        echo "Removing container: $CONTAINER_NAME"
        docker rm "$CONTAINER_NAME" 2>/dev/null || true
        echo "✅ Container stopped and removed"
    else
        echo "⚠️  Container $CONTAINER_NAME does not exist"
    fi
 }
 # Function to build docker image
 build_image() {
    echo "=== Building Docker Image for distribution: $DISTRO ==="
    # Get the repo root (parent of scripts directory)
    SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
    REPO_ROOT=$(cd "$SCRIPT_DIR/.." && pwd)
    # Determine whether to copy or mount source
    # Copy in CI or if explicitly requested, otherwise mount for live development
    BUILD_ENV="LLAMA_STACK_DIR=$REPO_ROOT"
    if [[ "$USE_COPY_NOT_MOUNT" == "true" ]] || [[ "${CI:-false}" == "true" ]] || [[ "${GITHUB_ACTIONS:-false}" == "true" ]]; then
        echo "Copying source into image (USE_COPY_NOT_MOUNT=true, CI=${CI:-false}, GITHUB_ACTIONS=${GITHUB_ACTIONS:-false})"
        BUILD_ENV="USE_COPY_NOT_MOUNT=true $BUILD_ENV"
    else
        echo "Will mount source for live development"
    fi
    if ! eval "$BUILD_ENV llama stack build --distro '$DISTRO' --image-type container"; then
        echo "❌ Failed to build Docker image"
        exit 1
    fi
    echo "✅ Docker image built successfully"
 }
 # Function to start container
 start_container() {
    # Check if already running
    if is_container_running; then
        echo "⚠️  Container $CONTAINER_NAME is already running"
        echo "URL: http://localhost:$PORT"
        exit 0
    fi
    # Stop and remove if exists but not running
    if container_exists; then
        echo "Removing existing stopped container..."
        docker rm "$CONTAINER_NAME" 2>/dev/null || true
    fi
    # Build the image unless --no-rebuild was specified
    if [[ "$NO_REBUILD" == "true" ]]; then
        echo "Skipping build (--no-rebuild specified)"
        # Check if image exists (with or without localhost/ prefix)
        if ! docker images --format "{{.Repository}}:{{.Tag}}" | grep -q "distribution-$DISTRO:dev$"; then
            echo "❌ Error: Image distribution-$DISTRO:dev does not exist"
            echo "Either build it first without --no-rebuild, or run: llama stack build --distro $DISTRO --image-type container"
            exit 1
        fi
        echo "✅ Found existing image for distribution-$DISTRO:dev"
    else
        build_image
    fi
    echo ""
    echo "=== Starting Docker Container ==="
    # Get the repo root for volume mount
    SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
    REPO_ROOT=$(cd "$SCRIPT_DIR/.." && pwd)
    # Determine the actual image name (may have localhost/ prefix)
    IMAGE_NAME=$(docker images --format "{{.Repository}}:{{.Tag}}" | grep "distribution-$DISTRO:dev$" | head -1)
    if [[ -z "$IMAGE_NAME" ]]; then
        echo "❌ Error: Could not find image for distribution-$DISTRO:dev"
        exit 1
    fi
    echo "Using image: $IMAGE_NAME"
    # Build environment variables for docker run
    DOCKER_ENV_VARS=""
    DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_INFERENCE_MODE=$INFERENCE_MODE"
    DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_STACK_CONFIG_TYPE=server"
    # Set default OLLAMA_URL if not provided
    # On macOS/Windows, use host.docker.internal to reach host from container
    # On Linux with --network host, use localhost
    if [[ "$(uname)" == "Darwin" ]] || [[ "$(uname)" == *"MINGW"* ]]; then
        OLLAMA_URL="${OLLAMA_URL:-http://host.docker.internal:11434}"
    else
        OLLAMA_URL="${OLLAMA_URL:-http://localhost:11434}"
    fi
    DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OLLAMA_URL=$OLLAMA_URL"
    # Pass through API keys if they exist
    [ -n "${TOGETHER_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e TOGETHER_API_KEY=$TOGETHER_API_KEY"
    [ -n "${FIREWORKS_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e FIREWORKS_API_KEY=$FIREWORKS_API_KEY"
    [ -n "${TAVILY_SEARCH_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e TAVILY_SEARCH_API_KEY=$TAVILY_SEARCH_API_KEY"
    [ -n "${OPENAI_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OPENAI_API_KEY=$OPENAI_API_KEY"
    [ -n "${ANTHROPIC_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY"
    [ -n "${GROQ_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e GROQ_API_KEY=$GROQ_API_KEY"
    [ -n "${GEMINI_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e GEMINI_API_KEY=$GEMINI_API_KEY"
    [ -n "${SAFETY_MODEL:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e SAFETY_MODEL=$SAFETY_MODEL"
    [ -n "${SQLITE_STORE_DIR:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e SQLITE_STORE_DIR=$SQLITE_STORE_DIR"
    # Use --network host on Linux only (macOS doesn't support it properly)
    NETWORK_MODE=""
    if [[ "$(uname)" != "Darwin" ]] && [[ "$(uname)" != *"MINGW"* ]]; then
        NETWORK_MODE="--network host"
    fi
    docker run -d $NETWORK_MODE --name "$CONTAINER_NAME" \
        -p $PORT:$PORT \
        $DOCKER_ENV_VARS \
        -v "$REPO_ROOT":/app/llama-stack-source \
        "$IMAGE_NAME" \
        --port $PORT
    echo "Waiting for container to start..."
    for i in {1..30}; do
        if curl -s http://localhost:$PORT/v1/health 2>/dev/null | grep -q "OK"; then
            echo "✅ Container started successfully"
            echo ""
            echo "=== Container Information ==="
            echo "Container name: $CONTAINER_NAME"
            echo "URL: http://localhost:$PORT"
            echo "Health check: http://localhost:$PORT/v1/health"
            echo ""
            echo "To view logs: $0 logs --distro $DISTRO"
            echo "To stop: $0 stop --distro $DISTRO"
            return 0
        fi
        if [[ $i -eq 30 ]]; then
            echo "❌ Container failed to start within timeout"
            echo "Showing container logs:"
            docker logs "$CONTAINER_NAME"
            exit 1
        fi
        sleep 1
    done
 }
 # Execute command
 case "$COMMAND" in
 start)
    start_container
    ;;
 stop)
    stop_container
    ;;
 restart)
    echo "Restarting container: $CONTAINER_NAME"
    stop_container
    echo ""
    start_container
    ;;
 status)
    if is_container_running; then
        echo "✅ Container $CONTAINER_NAME is running"
        echo "URL: http://localhost:$PORT"
        # Try to get the actual port from the container
        ACTUAL_PORT=$(docker port "$CONTAINER_NAME" 2>/dev/null | grep "8321/tcp" | cut -d':' -f2 | head -1)
        if [[ -n "$ACTUAL_PORT" ]]; then
            echo "Port: $ACTUAL_PORT"
        fi
    elif container_exists; then
        echo "⚠️  Container $CONTAINER_NAME exists but is not running"
        echo "Start it with: $0 start --distro $DISTRO"
    else
        echo "❌ Container $CONTAINER_NAME does not exist"
        echo "Start it with: $0 start --distro $DISTRO"
    fi
    ;;
 logs)
    if container_exists; then
        echo "=== Logs for $CONTAINER_NAME ==="
        # Check if -f flag was passed after 'logs' command
        if [[ "${1:-}" == "-f" || "${1:-}" == "--follow" ]]; then
            docker logs --tail 100 --follow "$CONTAINER_NAME"
        else
            docker logs --tail 100 "$CONTAINER_NAME"
        fi
    else
        echo "❌ Container $CONTAINER_NAME does not exist"
        exit 1
    fi
    ;;
 esac
--- a/scripts/integration-tests.sh
+++ b/scripts/integration-tests.sh
@ -42,9 +42,12 @@ Setups are defined in tests/integration/setups.py and provide global configurati
 You can also specify subdirectories (of tests/integration) to select tests from, which will override the suite.
 Examples:
-    # Basic inference tests with ollama
+    # Basic inference tests with ollama (server mode)
    $0 --stack-config server:ci-tests --suite base --setup ollama
    # Basic inference tests with docker
    $0 --stack-config docker:ci-tests --suite base --setup ollama
    # Multiple test directories with vllm
    $0 --stack-config server:ci-tests --subdirs 'inference,agents' --setup vllm
@ -153,7 +156,7 @@ echo "Setting SQLITE_STORE_DIR: $SQLITE_STORE_DIR"
 # Determine stack config type for api_recorder test isolation
 if [[ "$COLLECT_ONLY" == false ]]; then
-    if [[ "$STACK_CONFIG" == server:* ]]; then
+    if [[ "$STACK_CONFIG" == server:* ]] || [[ "$STACK_CONFIG" == docker:* ]]; then
        export LLAMA_STACK_TEST_STACK_CONFIG_TYPE="server"
        echo "Setting stack config type: server"
    else
@ -229,6 +232,104 @@ if [[ "$STACK_CONFIG" == *"server:"* && "$COLLECT_ONLY" == false ]]; then
    trap stop_server EXIT ERR INT TERM
 fi
 # Start Docker Container if needed
 if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
    stop_container() {
        echo "Stopping Docker container..."
        container_name="llama-stack-test-$DISTRO"
        if docker ps -a --format '{{.Names}}' | grep -q "^${container_name}$"; then
            echo "Stopping and removing container: $container_name"
            docker stop "$container_name" 2>/dev/null || true
            docker rm "$container_name" 2>/dev/null || true
        else
            echo "No container named $container_name found"
        fi
        echo "Docker container stopped"
    }
    # Extract distribution name from docker:distro format
    DISTRO=$(echo "$STACK_CONFIG" | sed 's/^docker://')
    export LLAMA_STACK_PORT=8321
    echo "=== Building Docker Image for distribution: $DISTRO ==="
    # Set LLAMA_STACK_DIR to repo root
    # USE_COPY_NOT_MOUNT copies files into image (for CI), otherwise mounts for live development
    BUILD_ENV="LLAMA_STACK_DIR=$ROOT_DIR"
    if [[ "${CI:-false}" == "true" ]] || [[ "${GITHUB_ACTIONS:-false}" == "true" ]]; then
        echo "CI detected (CI=$CI, GITHUB_ACTIONS=$GITHUB_ACTIONS): copying source into image"
        BUILD_ENV="USE_COPY_NOT_MOUNT=true $BUILD_ENV"
    else
        echo "Local mode: will mount source for live development"
    fi
    eval "$BUILD_ENV llama stack build --distro '$DISTRO' --image-type container"
    if [ $? -ne 0 ]; then
        echo "❌ Failed to build Docker image"
        exit 1
    fi
    echo ""
    echo "=== Starting Docker Container ==="
    container_name="llama-stack-test-$DISTRO"
    # Stop and remove existing container if it exists
    docker stop "$container_name" 2>/dev/null || true
    docker rm "$container_name" 2>/dev/null || true
    # Build environment variables for docker run
    DOCKER_ENV_VARS=""
    DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_INFERENCE_MODE=$INFERENCE_MODE"
    DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_STACK_CONFIG_TYPE=server"
    # Pass through API keys if they exist
    [ -n "${TOGETHER_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e TOGETHER_API_KEY=$TOGETHER_API_KEY"
    [ -n "${FIREWORKS_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e FIREWORKS_API_KEY=$FIREWORKS_API_KEY"
    [ -n "${TAVILY_SEARCH_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e TAVILY_SEARCH_API_KEY=$TAVILY_SEARCH_API_KEY"
    [ -n "${OPENAI_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OPENAI_API_KEY=$OPENAI_API_KEY"
    [ -n "${ANTHROPIC_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY"
    [ -n "${GROQ_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e GROQ_API_KEY=$GROQ_API_KEY"
    [ -n "${GEMINI_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e GEMINI_API_KEY=$GEMINI_API_KEY"
    [ -n "${OLLAMA_URL:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OLLAMA_URL=$OLLAMA_URL"
    [ -n "${SAFETY_MODEL:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e SAFETY_MODEL=$SAFETY_MODEL"
    # Determine the actual image name (may have localhost/ prefix)
    IMAGE_NAME=$(docker images --format "{{.Repository}}:{{.Tag}}" | grep "distribution-$DISTRO:dev$" | head -1)
    if [[ -z "$IMAGE_NAME" ]]; then
        echo "❌ Error: Could not find image for distribution-$DISTRO:dev"
        exit 1
    fi
    echo "Using image: $IMAGE_NAME"
    docker run -d --network host --name "$container_name" \
        -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
        $DOCKER_ENV_VARS \
        -v $ROOT_DIR:/app/llama-stack-source \
        "$IMAGE_NAME" \
        --port $LLAMA_STACK_PORT
    echo "Waiting for Docker container to start..."
    for i in {1..30}; do
        if curl -s http://localhost:$LLAMA_STACK_PORT/v1/health 2>/dev/null | grep -q "OK"; then
            echo "✅ Docker container started successfully"
            break
        fi
        if [[ $i -eq 30 ]]; then
            echo "❌ Docker container failed to start"
            echo "Container logs:"
            docker logs "$container_name"
            exit 1
        fi
        sleep 1
    done
    echo ""
    # Update STACK_CONFIG to point to the running container
    STACK_CONFIG="http://localhost:$LLAMA_STACK_PORT"
    trap stop_container EXIT ERR INT TERM
 fi
 # Run tests
 echo "=== Running Integration Tests ==="
 EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag"
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@ -42,7 +42,9 @@ def pytest_sessionstart(session):
    # Set test stack config type for api_recorder test isolation
    stack_config = session.config.getoption("--stack-config", default=None)
-    if stack_config and (stack_config.startswith("server:") or stack_config.startswith("http")):
+    if stack_config and (
        stack_config.startswith("server:") or stack_config.startswith("docker:") or stack_config.startswith("http")
    ):
        os.environ["LLAMA_STACK_TEST_STACK_CONFIG_TYPE"] = "server"
        logger.info(f"Test stack config type: server (stack_config={stack_config})")
    else:
@ -139,7 +141,9 @@ def pytest_addoption(parser):
            a 'pointer' to the stack. this can be either be:
            (a) a template name like `starter`, or
            (b) a path to a run.yaml file, or
-            (c) an adhoc config spec, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`
+            (c) an adhoc config spec, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`, or
            (d) a server config like `server:ci-tests`, or
            (e) a docker config like `docker:ci-tests` (builds and runs container)
            """
        ),
    )