From cd152f424025df2509749e5b9aebeb17597891ca Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Thu, 16 Oct 2025 19:33:13 -0700 Subject: [PATCH] feat(ci): add support for docker:distro in tests (#3832) Also a critical bug fix so test recordings can be found inside docker --- llama_stack/core/build_container.sh | 2 +- llama_stack/core/testing_context.py | 5 + llama_stack/testing/api_recorder.py | 96 +++++++- scripts/diagnose_recordings.py | 370 ++++++++++++++++++++++++++++ scripts/docker.sh | 358 +++++++++++++++++++++++++++ scripts/integration-tests.sh | 105 +++++++- tests/integration/conftest.py | 8 +- 7 files changed, 932 insertions(+), 12 deletions(-) create mode 100755 scripts/diagnose_recordings.py create mode 100755 scripts/docker.sh diff --git a/llama_stack/core/build_container.sh b/llama_stack/core/build_container.sh index 09878f535..03ed846d9 100755 --- a/llama_stack/core/build_container.sh +++ b/llama_stack/core/build_container.sh @@ -338,7 +338,7 @@ fi # Add other require item commands genearic to all containers add_to_container << EOF -RUN mkdir -p /.llama /.cache && chmod -R g+rw /app /.llama /.cache +RUN mkdir -p /.llama /.cache && chmod -R g+rw /.llama /.cache && (chmod -R g+rw /app 2>/dev/null || true) EOF printf "Containerfile created successfully in %s/Containerfile\n\n" "$TEMP_DIR" diff --git a/llama_stack/core/testing_context.py b/llama_stack/core/testing_context.py index 23cef751b..6c3e09987 100644 --- a/llama_stack/core/testing_context.py +++ b/llama_stack/core/testing_context.py @@ -42,3 +42,8 @@ def sync_test_context_from_provider_data(): return TEST_CONTEXT.set(provider_data["__test_id"]) return None + + +def is_debug_mode() -> bool: + """Check if test recording debug mode is enabled via LLAMA_STACK_TEST_DEBUG env var.""" + return os.environ.get("LLAMA_STACK_TEST_DEBUG", "").lower() in ("1", "true", "yes") diff --git a/llama_stack/testing/api_recorder.py b/llama_stack/testing/api_recorder.py index b0d68fd8a..9e272ca3a 100644 --- a/llama_stack/testing/api_recorder.py +++ b/llama_stack/testing/api_recorder.py @@ -37,7 +37,7 @@ _id_counters: dict[str, dict[str, int]] = {} # Test context uses ContextVar since it changes per-test and needs async isolation from openai.types.completion_choice import CompletionChoice -from llama_stack.core.testing_context import get_test_context +from llama_stack.core.testing_context import get_test_context, is_debug_mode # update the "finish_reason" field, since its type definition is wrong (no None is accepted) CompletionChoice.model_fields["finish_reason"].annotation = Literal["stop", "length", "content_filter"] | None @@ -146,6 +146,7 @@ def normalize_inference_request(method: str, url: str, headers: dict[str, Any], body_for_hash = _normalize_body_for_hash(body) + test_id = get_test_context() normalized: dict[str, Any] = { "method": method.upper(), "endpoint": parsed.path, @@ -154,10 +155,20 @@ def normalize_inference_request(method: str, url: str, headers: dict[str, Any], # Include test_id for isolation, except for shared infrastructure endpoints if parsed.path not in ("/api/tags", "/v1/models"): - normalized["test_id"] = get_test_context() + normalized["test_id"] = test_id normalized_json = json.dumps(normalized, sort_keys=True) - return hashlib.sha256(normalized_json.encode()).hexdigest() + request_hash = hashlib.sha256(normalized_json.encode()).hexdigest() + + if is_debug_mode(): + logger.info("[RECORDING DEBUG] Hash computation:") + logger.info(f" Test ID: {test_id}") + logger.info(f" Method: {method.upper()}") + logger.info(f" Endpoint: {parsed.path}") + logger.info(f" Model: {body.get('model', 'N/A')}") + logger.info(f" Computed hash: {request_hash}") + + return request_hash def normalize_tool_request(provider_name: str, tool_name: str, kwargs: dict[str, Any]) -> str: @@ -212,6 +223,11 @@ def patch_httpx_for_test_id(): provider_data["__test_id"] = test_id request.headers["X-LlamaStack-Provider-Data"] = json.dumps(provider_data) + if is_debug_mode(): + logger.info("[RECORDING DEBUG] Injected test ID into request header:") + logger.info(f" Test ID: {test_id}") + logger.info(f" URL: {request.url}") + return None LlamaStackClient._prepare_request = patched_prepare_request @@ -355,12 +371,35 @@ class ResponseStorage: test_file = test_id.split("::")[0] # Remove test function part test_dir = Path(test_file).parent # Get parent directory - # Put recordings in a "recordings" subdirectory of the test's parent dir - # e.g., "tests/integration/inference" -> "tests/integration/inference/recordings" - return test_dir / "recordings" + if self.base_dir.is_absolute(): + repo_root = self.base_dir.parent.parent.parent + result = repo_root / test_dir / "recordings" + if is_debug_mode(): + logger.info("[RECORDING DEBUG] Path resolution (absolute base_dir):") + logger.info(f" Test ID: {test_id}") + logger.info(f" Base dir: {self.base_dir}") + logger.info(f" Repo root: {repo_root}") + logger.info(f" Test file: {test_file}") + logger.info(f" Test dir: {test_dir}") + logger.info(f" Recordings dir: {result}") + return result + else: + result = test_dir / "recordings" + if is_debug_mode(): + logger.info("[RECORDING DEBUG] Path resolution (relative base_dir):") + logger.info(f" Test ID: {test_id}") + logger.info(f" Base dir: {self.base_dir}") + logger.info(f" Test dir: {test_dir}") + logger.info(f" Recordings dir: {result}") + return result else: # Fallback for non-test contexts - return self.base_dir / "recordings" + result = self.base_dir / "recordings" + if is_debug_mode(): + logger.info("[RECORDING DEBUG] Path resolution (no test context):") + logger.info(f" Base dir: {self.base_dir}") + logger.info(f" Recordings dir: {result}") + return result def _ensure_directory(self): """Ensure test-specific directories exist.""" @@ -395,6 +434,13 @@ class ResponseStorage: response_path = responses_dir / response_file + if is_debug_mode(): + logger.info("[RECORDING DEBUG] Storing recording:") + logger.info(f" Request hash: {request_hash}") + logger.info(f" File: {response_path}") + logger.info(f" Test ID: {get_test_context()}") + logger.info(f" Endpoint: {endpoint}") + # Save response to JSON file with metadata with open(response_path, "w") as f: json.dump( @@ -423,16 +469,33 @@ class ResponseStorage: test_dir = self._get_test_dir() response_path = test_dir / response_file + if is_debug_mode(): + logger.info("[RECORDING DEBUG] Looking up recording:") + logger.info(f" Request hash: {request_hash}") + logger.info(f" Primary path: {response_path}") + logger.info(f" Primary exists: {response_path.exists()}") + if response_path.exists(): + if is_debug_mode(): + logger.info(" Found in primary location") return _recording_from_file(response_path) # Fallback to base recordings directory (for session-level recordings) fallback_dir = self.base_dir / "recordings" fallback_path = fallback_dir / response_file + if is_debug_mode(): + logger.info(f" Fallback path: {fallback_path}") + logger.info(f" Fallback exists: {fallback_path.exists()}") + if fallback_path.exists(): + if is_debug_mode(): + logger.info(" Found in fallback location") return _recording_from_file(fallback_path) + if is_debug_mode(): + logger.info(" Recording not found in either location") + return None def _model_list_responses(self, request_hash: str) -> list[dict[str, Any]]: @@ -588,6 +651,13 @@ async def _patched_inference_method(original_method, self, client_type, endpoint mode = _current_mode storage = _current_storage + if is_debug_mode(): + logger.info("[RECORDING DEBUG] Entering inference method:") + logger.info(f" Mode: {mode}") + logger.info(f" Client type: {client_type}") + logger.info(f" Endpoint: {endpoint}") + logger.info(f" Test context: {get_test_context()}") + if mode == APIRecordingMode.LIVE or storage is None: if endpoint == "/v1/models": return original_method(self, *args, **kwargs) @@ -643,6 +713,18 @@ async def _patched_inference_method(original_method, self, client_type, endpoint return response_body elif mode == APIRecordingMode.REPLAY: # REPLAY mode requires recording to exist + if is_debug_mode(): + logger.error("[RECORDING DEBUG] Recording not found!") + logger.error(f" Mode: {mode}") + logger.error(f" Request hash: {request_hash}") + logger.error(f" Method: {method}") + logger.error(f" URL: {url}") + logger.error(f" Endpoint: {endpoint}") + logger.error(f" Model: {body.get('model', 'unknown')}") + logger.error(f" Test context: {get_test_context()}") + logger.error( + f" Stack config type: {os.environ.get('LLAMA_STACK_TEST_STACK_CONFIG_TYPE', 'library_client')}" + ) raise RuntimeError( f"Recording not found for request hash: {request_hash}\n" f"Model: {body.get('model', 'unknown')} | Request: {method} {url}\n" diff --git a/scripts/diagnose_recordings.py b/scripts/diagnose_recordings.py new file mode 100755 index 000000000..9c255f6f0 --- /dev/null +++ b/scripts/diagnose_recordings.py @@ -0,0 +1,370 @@ +#!/usr/bin/env python3 +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +""" +Diagnostic tool for debugging test recording issues. + +Usage: + # Find where a hash would be looked up + ./scripts/diagnose_recordings.py find-hash 7526c930eab04ce337496a26cd15f2591d7943035f2527182861643da9b837a7 + + # Show what's in a recording file + ./scripts/diagnose_recordings.py show tests/integration/agents/recordings/7526c930....json + + # List all recordings for a test + ./scripts/diagnose_recordings.py list-test "tests/integration/agents/test_agents.py::test_custom_tool" + + # Explain lookup paths for a test + ./scripts/diagnose_recordings.py explain-paths --test-id "tests/integration/agents/test_agents.py::test_foo" + + # Compare request hash computation + ./scripts/diagnose_recordings.py compute-hash --endpoint /v1/chat/completions --method POST --body '{"model":"llama3.2:3b"}' --test-id "..." +""" + +import argparse +import json +import sys +from pathlib import Path + +# Add parent directory to path to import from llama_stack +REPO_ROOT = Path(__file__).parent.parent +sys.path.insert(0, str(REPO_ROOT)) + +try: + from llama_stack.testing.api_recorder import normalize_inference_request +except ImportError: + normalize_inference_request = None + + +def find_hash(hash_value: str, base_dir: Path | None = None, test_id: str | None = None): + """Find where a hash would be looked up and what exists""" + if base_dir is None: + base_dir = REPO_ROOT / "tests/integration/common" + + print(f"Searching for hash: {hash_value}\n") + print(f"Base dir: {base_dir} (absolute={base_dir.is_absolute()})") + + # Compute test directory + if test_id: + test_file = test_id.split("::")[0] + test_dir = Path(test_file).parent + + if base_dir.is_absolute(): + repo_root = base_dir.parent.parent.parent + test_recordings_dir = repo_root / test_dir / "recordings" + else: + test_recordings_dir = test_dir / "recordings" + print(f"Test ID: {test_id}") + print(f"Test dir: {test_recordings_dir}\n") + else: + test_recordings_dir = base_dir / "recordings" + print("No test ID provided, using base dir\n") + + # Check primary location + response_file = f"{hash_value}.json" + response_path = test_recordings_dir / response_file + + print("Checking primary location:") + print(f" {response_path}") + if response_path.exists(): + print(" EXISTS") + print("\nFound! Contents:") + show_recording(response_path) + return True + else: + print(" Does not exist") + + # Check fallback location + fallback_dir = base_dir / "recordings" + fallback_path = fallback_dir / response_file + + print("\nChecking fallback location:") + print(f" {fallback_path}") + if fallback_path.exists(): + print(" EXISTS") + print("\nFound in fallback! Contents:") + show_recording(fallback_path) + return True + else: + print(" Does not exist") + + # Show what files DO exist + print(f"\nFiles in test directory ({test_recordings_dir}):") + if test_recordings_dir.exists(): + json_files = list(test_recordings_dir.glob("*.json")) + if json_files: + for f in json_files[:20]: + print(f" - {f.name}") + if len(json_files) > 20: + print(f" ... and {len(json_files) - 20} more") + else: + print(" (empty)") + else: + print(" Directory does not exist") + + print(f"\nFiles in fallback directory ({fallback_dir}):") + if fallback_dir.exists(): + json_files = list(fallback_dir.glob("*.json")) + if json_files: + for f in json_files[:20]: + print(f" - {f.name}") + if len(json_files) > 20: + print(f" ... and {len(json_files) - 20} more") + else: + print(" (empty)") + else: + print(" Directory does not exist") + + # Try partial hash match + print("\nLooking for partial matches (first 16 chars)...") + partial = hash_value[:16] + matches = [] + + for dir_to_search in [test_recordings_dir, fallback_dir]: + if dir_to_search.exists(): + for f in dir_to_search.glob("*.json"): + if f.stem.startswith(partial): + matches.append(f) + + if matches: + print(f"Found {len(matches)} partial match(es):") + for m in matches: + print(f" {m}") + else: + print("No partial matches found") + + return False + + +def show_recording(file_path: Path): + """Show contents of a recording file""" + if not file_path.exists(): + print(f"File does not exist: {file_path}") + return + + with open(file_path) as f: + data = json.load(f) + + print(f"\nRecording: {file_path.name}\n") + print(f"Test ID: {data.get('test_id', 'N/A')}") + print("\nRequest:") + req = data.get("request", {}) + print(f" Method: {req.get('method', 'N/A')}") + print(f" URL: {req.get('url', 'N/A')}") + print(f" Endpoint: {req.get('endpoint', 'N/A')}") + print(f" Model: {req.get('model', 'N/A')}") + + body = req.get("body", {}) + if body: + print("\nRequest Body:") + print(f" Model: {body.get('model', 'N/A')}") + print(f" Stream: {body.get('stream', 'N/A')}") + if "messages" in body: + print(f" Messages: {len(body['messages'])} message(s)") + for i, msg in enumerate(body["messages"][:3]): + role = msg.get("role", "unknown") + content = msg.get("content", "") + if isinstance(content, str): + preview = content[:80] + "..." if len(content) > 80 else content + else: + preview = f"[{type(content).__name__}]" + print(f" [{i}] {role}: {preview}") + if "tools" in body: + print(f" Tools: {len(body['tools'])} tool(s)") + + response = data.get("response", {}) + if response: + print("\nResponse:") + print(f" Is streaming: {response.get('is_streaming', False)}") + response_body = response.get("body", {}) + if isinstance(response_body, dict): + if "__type__" in response_body: + print(f" Type: {response_body['__type__']}") + if "__data__" in response_body: + response_data = response_body["__data__"] + if "choices" in response_data: + print(f" Choices: {len(response_data['choices'])}") + if "usage" in response_data: + usage = response_data["usage"] + print(f" Usage: in={usage.get('input_tokens')}, out={usage.get('output_tokens')}") + + +def list_test_recordings(test_id: str, base_dir: Path | None = None): + """List all recordings for a specific test""" + if base_dir is None: + base_dir = REPO_ROOT / "tests/integration/common" + + test_file = test_id.split("::")[0] + test_dir = Path(test_file).parent + + if base_dir.is_absolute(): + repo_root = base_dir.parent.parent.parent + test_recordings_dir = repo_root / test_dir / "recordings" + else: + test_recordings_dir = test_dir / "recordings" + + print(f"Recordings for test: {test_id}\n") + print(f"Directory: {test_recordings_dir}\n") + + if not test_recordings_dir.exists(): + print("Directory does not exist") + return + + # Find all recordings for this specific test + recordings = [] + for f in test_recordings_dir.glob("*.json"): + try: + with open(f) as fp: + data = json.load(fp) + if data.get("test_id") == test_id: + recordings.append((f, data)) + except Exception as e: + print(f"Could not read {f.name}: {e}") + + if not recordings: + print("No recordings found for this exact test ID") + print("\nAll files in directory:") + for f in test_recordings_dir.glob("*.json"): + print(f" - {f.name}") + return + + print(f"Found {len(recordings)} recording(s):\n") + for f, data in recordings: + req = data.get("request", {}) + print(f" {f.name}") + print(f" Endpoint: {req.get('endpoint', 'N/A')}") + print(f" Model: {req.get('model', 'N/A')}") + print("") + + +def explain_paths(test_id: str | None = None, base_dir: Path | None = None): + """Explain where recordings would be searched""" + if base_dir is None: + base_dir = REPO_ROOT / "tests/integration/common" + + print("Recording Lookup Path Explanation\n") + print(f"Base directory: {base_dir}") + print(f" Absolute: {base_dir.is_absolute()}") + print("") + + if test_id: + print(f"Test ID: {test_id}") + test_file = test_id.split("::")[0] + print(f" Test file: {test_file}") + + test_dir = Path(test_file).parent + print(f" Test dir (relative): {test_dir}") + + if base_dir.is_absolute(): + repo_root = base_dir.parent.parent.parent + print(f" Repo root: {repo_root}") + test_recordings_dir = repo_root / test_dir / "recordings" + print(f" Test recordings dir (absolute): {test_recordings_dir}") + else: + test_recordings_dir = test_dir / "recordings" + print(f" Test recordings dir (relative): {test_recordings_dir}") + + print("\nLookup order for recordings:") + print(f" 1. Test-specific: {test_recordings_dir}/.json") + print(f" 2. Fallback: {base_dir}/recordings/.json") + + else: + print("No test ID provided") + print("\nLookup location:") + print(f" {base_dir}/recordings/.json") + + +def compute_hash(endpoint: str, method: str, body_json: str, test_id: str | None = None): + """Compute hash for a request""" + if normalize_inference_request is None: + print("Could not import normalize_inference_request from llama_stack.testing.api_recorder") + print("Make sure you're running from the repo root with proper PYTHONPATH") + return + + try: + body = json.loads(body_json) + except json.JSONDecodeError as e: + print(f"Invalid JSON in body: {e}") + return + + # Create a fake URL with the endpoint + url = f"http://example.com{endpoint}" + + # Set test context if provided + if test_id: + from llama_stack.core.testing_context import set_test_context + + set_test_context(test_id) + + hash_result = normalize_inference_request(method, url, {}, body) + + print("Hash Computation\n") + print(f"Method: {method}") + print(f"Endpoint: {endpoint}") + print(f"Test ID: {test_id or 'None (excluded from hash for model-list endpoints)'}") + print("\nBody:") + print(json.dumps(body, indent=2)) + print(f"\nComputed Hash: {hash_result}") + print(f"\nLooking for file: {hash_result}.json") + + +def main(): + parser = argparse.ArgumentParser( + description="Diagnostic tool for test recording issues", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=__doc__, + ) + + subparsers = parser.add_subparsers(dest="command", help="Command to run") + + # find-hash command + find_parser = subparsers.add_parser("find-hash", help="Find where a hash would be looked up") + find_parser.add_argument("hash", help="Hash value to search for (full or partial)") + find_parser.add_argument("--test-id", help="Test ID to determine search paths") + find_parser.add_argument("--base-dir", type=Path, help="Base directory (default: tests/integration/common)") + + # show command + show_parser = subparsers.add_parser("show", help="Show contents of a recording file") + show_parser.add_argument("file", type=Path, help="Path to recording JSON file") + + # list-test command + list_parser = subparsers.add_parser("list-test", help="List all recordings for a test") + list_parser.add_argument("test_id", help="Full test ID (e.g., tests/integration/agents/test_agents.py::test_foo)") + list_parser.add_argument("--base-dir", type=Path, help="Base directory (default: tests/integration/common)") + + # explain-paths command + explain_parser = subparsers.add_parser("explain-paths", help="Explain where recordings are searched") + explain_parser.add_argument("--test-id", help="Test ID to show paths for") + explain_parser.add_argument("--base-dir", type=Path, help="Base directory (default: tests/integration/common)") + + # compute-hash command + hash_parser = subparsers.add_parser("compute-hash", help="Compute hash for a request") + hash_parser.add_argument("--endpoint", required=True, help="Endpoint path (e.g., /v1/chat/completions)") + hash_parser.add_argument("--method", default="POST", help="HTTP method (default: POST)") + hash_parser.add_argument("--body", required=True, help="Request body as JSON string") + hash_parser.add_argument("--test-id", help="Test ID (affects hash for non-model-list endpoints)") + + args = parser.parse_args() + + if not args.command: + parser.print_help() + return + + if args.command == "find-hash": + find_hash(args.hash, args.base_dir, args.test_id) + elif args.command == "show": + show_recording(args.file) + elif args.command == "list-test": + list_test_recordings(args.test_id, args.base_dir) + elif args.command == "explain-paths": + explain_paths(args.test_id, args.base_dir) + elif args.command == "compute-hash": + compute_hash(args.endpoint, args.method, args.body, args.test_id) + + +if __name__ == "__main__": + main() diff --git a/scripts/docker.sh b/scripts/docker.sh new file mode 100755 index 000000000..1ba1d9adf --- /dev/null +++ b/scripts/docker.sh @@ -0,0 +1,358 @@ +#!/bin/bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +set -euo pipefail + +# Docker container management script for Llama Stack +# Allows starting/stopping/restarting a Llama Stack docker container for testing + +# Default values +DISTRO="" +PORT=8321 +INFERENCE_MODE="replay" +COMMAND="" +USE_COPY_NOT_MOUNT=false +NO_REBUILD=false + +# Function to display usage +usage() { + cat </dev/null || true + echo "Removing container: $CONTAINER_NAME" + docker rm "$CONTAINER_NAME" 2>/dev/null || true + echo "✅ Container stopped and removed" + else + echo "⚠️ Container $CONTAINER_NAME does not exist" + fi +} + +# Function to build docker image +build_image() { + echo "=== Building Docker Image for distribution: $DISTRO ===" + # Get the repo root (parent of scripts directory) + SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) + REPO_ROOT=$(cd "$SCRIPT_DIR/.." && pwd) + + # Determine whether to copy or mount source + # Copy in CI or if explicitly requested, otherwise mount for live development + BUILD_ENV="LLAMA_STACK_DIR=$REPO_ROOT" + if [[ "$USE_COPY_NOT_MOUNT" == "true" ]] || [[ "${CI:-false}" == "true" ]] || [[ "${GITHUB_ACTIONS:-false}" == "true" ]]; then + echo "Copying source into image (USE_COPY_NOT_MOUNT=true, CI=${CI:-false}, GITHUB_ACTIONS=${GITHUB_ACTIONS:-false})" + BUILD_ENV="USE_COPY_NOT_MOUNT=true $BUILD_ENV" + else + echo "Will mount source for live development" + fi + + if ! eval "$BUILD_ENV llama stack build --distro '$DISTRO' --image-type container"; then + echo "❌ Failed to build Docker image" + exit 1 + fi + echo "✅ Docker image built successfully" +} + +# Function to start container +start_container() { + # Check if already running + if is_container_running; then + echo "⚠️ Container $CONTAINER_NAME is already running" + echo "URL: http://localhost:$PORT" + exit 0 + fi + + # Stop and remove if exists but not running + if container_exists; then + echo "Removing existing stopped container..." + docker rm "$CONTAINER_NAME" 2>/dev/null || true + fi + + # Build the image unless --no-rebuild was specified + if [[ "$NO_REBUILD" == "true" ]]; then + echo "Skipping build (--no-rebuild specified)" + # Check if image exists (with or without localhost/ prefix) + if ! docker images --format "{{.Repository}}:{{.Tag}}" | grep -q "distribution-$DISTRO:dev$"; then + echo "❌ Error: Image distribution-$DISTRO:dev does not exist" + echo "Either build it first without --no-rebuild, or run: llama stack build --distro $DISTRO --image-type container" + exit 1 + fi + echo "✅ Found existing image for distribution-$DISTRO:dev" + else + build_image + fi + + echo "" + echo "=== Starting Docker Container ===" + + # Get the repo root for volume mount + SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) + REPO_ROOT=$(cd "$SCRIPT_DIR/.." && pwd) + + # Determine the actual image name (may have localhost/ prefix) + IMAGE_NAME=$(docker images --format "{{.Repository}}:{{.Tag}}" | grep "distribution-$DISTRO:dev$" | head -1) + if [[ -z "$IMAGE_NAME" ]]; then + echo "❌ Error: Could not find image for distribution-$DISTRO:dev" + exit 1 + fi + echo "Using image: $IMAGE_NAME" + + # Build environment variables for docker run + DOCKER_ENV_VARS="" + DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_INFERENCE_MODE=$INFERENCE_MODE" + DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_STACK_CONFIG_TYPE=server" + + # Set default OLLAMA_URL if not provided + # On macOS/Windows, use host.docker.internal to reach host from container + # On Linux with --network host, use localhost + if [[ "$(uname)" == "Darwin" ]] || [[ "$(uname)" == *"MINGW"* ]]; then + OLLAMA_URL="${OLLAMA_URL:-http://host.docker.internal:11434}" + else + OLLAMA_URL="${OLLAMA_URL:-http://localhost:11434}" + fi + DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OLLAMA_URL=$OLLAMA_URL" + + # Pass through API keys if they exist + [ -n "${TOGETHER_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e TOGETHER_API_KEY=$TOGETHER_API_KEY" + [ -n "${FIREWORKS_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e FIREWORKS_API_KEY=$FIREWORKS_API_KEY" + [ -n "${TAVILY_SEARCH_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e TAVILY_SEARCH_API_KEY=$TAVILY_SEARCH_API_KEY" + [ -n "${OPENAI_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OPENAI_API_KEY=$OPENAI_API_KEY" + [ -n "${ANTHROPIC_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY" + [ -n "${GROQ_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e GROQ_API_KEY=$GROQ_API_KEY" + [ -n "${GEMINI_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e GEMINI_API_KEY=$GEMINI_API_KEY" + [ -n "${SAFETY_MODEL:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e SAFETY_MODEL=$SAFETY_MODEL" + [ -n "${SQLITE_STORE_DIR:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e SQLITE_STORE_DIR=$SQLITE_STORE_DIR" + + # Use --network host on Linux only (macOS doesn't support it properly) + NETWORK_MODE="" + if [[ "$(uname)" != "Darwin" ]] && [[ "$(uname)" != *"MINGW"* ]]; then + NETWORK_MODE="--network host" + fi + + docker run -d $NETWORK_MODE --name "$CONTAINER_NAME" \ + -p $PORT:$PORT \ + $DOCKER_ENV_VARS \ + -v "$REPO_ROOT":/app/llama-stack-source \ + "$IMAGE_NAME" \ + --port $PORT + + echo "Waiting for container to start..." + for i in {1..30}; do + if curl -s http://localhost:$PORT/v1/health 2>/dev/null | grep -q "OK"; then + echo "✅ Container started successfully" + echo "" + echo "=== Container Information ===" + echo "Container name: $CONTAINER_NAME" + echo "URL: http://localhost:$PORT" + echo "Health check: http://localhost:$PORT/v1/health" + echo "" + echo "To view logs: $0 logs --distro $DISTRO" + echo "To stop: $0 stop --distro $DISTRO" + return 0 + fi + if [[ $i -eq 30 ]]; then + echo "❌ Container failed to start within timeout" + echo "Showing container logs:" + docker logs "$CONTAINER_NAME" + exit 1 + fi + sleep 1 + done +} + +# Execute command +case "$COMMAND" in +start) + start_container + ;; +stop) + stop_container + ;; +restart) + echo "Restarting container: $CONTAINER_NAME" + stop_container + echo "" + start_container + ;; +status) + if is_container_running; then + echo "✅ Container $CONTAINER_NAME is running" + echo "URL: http://localhost:$PORT" + # Try to get the actual port from the container + ACTUAL_PORT=$(docker port "$CONTAINER_NAME" 2>/dev/null | grep "8321/tcp" | cut -d':' -f2 | head -1) + if [[ -n "$ACTUAL_PORT" ]]; then + echo "Port: $ACTUAL_PORT" + fi + elif container_exists; then + echo "⚠️ Container $CONTAINER_NAME exists but is not running" + echo "Start it with: $0 start --distro $DISTRO" + else + echo "❌ Container $CONTAINER_NAME does not exist" + echo "Start it with: $0 start --distro $DISTRO" + fi + ;; +logs) + if container_exists; then + echo "=== Logs for $CONTAINER_NAME ===" + # Check if -f flag was passed after 'logs' command + if [[ "${1:-}" == "-f" || "${1:-}" == "--follow" ]]; then + docker logs --tail 100 --follow "$CONTAINER_NAME" + else + docker logs --tail 100 "$CONTAINER_NAME" + fi + else + echo "❌ Container $CONTAINER_NAME does not exist" + exit 1 + fi + ;; +esac diff --git a/scripts/integration-tests.sh b/scripts/integration-tests.sh index 138f1d144..e19a5cc55 100755 --- a/scripts/integration-tests.sh +++ b/scripts/integration-tests.sh @@ -42,9 +42,12 @@ Setups are defined in tests/integration/setups.py and provide global configurati You can also specify subdirectories (of tests/integration) to select tests from, which will override the suite. Examples: - # Basic inference tests with ollama + # Basic inference tests with ollama (server mode) $0 --stack-config server:ci-tests --suite base --setup ollama + # Basic inference tests with docker + $0 --stack-config docker:ci-tests --suite base --setup ollama + # Multiple test directories with vllm $0 --stack-config server:ci-tests --subdirs 'inference,agents' --setup vllm @@ -153,7 +156,7 @@ echo "Setting SQLITE_STORE_DIR: $SQLITE_STORE_DIR" # Determine stack config type for api_recorder test isolation if [[ "$COLLECT_ONLY" == false ]]; then - if [[ "$STACK_CONFIG" == server:* ]]; then + if [[ "$STACK_CONFIG" == server:* ]] || [[ "$STACK_CONFIG" == docker:* ]]; then export LLAMA_STACK_TEST_STACK_CONFIG_TYPE="server" echo "Setting stack config type: server" else @@ -229,6 +232,104 @@ if [[ "$STACK_CONFIG" == *"server:"* && "$COLLECT_ONLY" == false ]]; then trap stop_server EXIT ERR INT TERM fi +# Start Docker Container if needed +if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then + stop_container() { + echo "Stopping Docker container..." + container_name="llama-stack-test-$DISTRO" + if docker ps -a --format '{{.Names}}' | grep -q "^${container_name}$"; then + echo "Stopping and removing container: $container_name" + docker stop "$container_name" 2>/dev/null || true + docker rm "$container_name" 2>/dev/null || true + else + echo "No container named $container_name found" + fi + echo "Docker container stopped" + } + + # Extract distribution name from docker:distro format + DISTRO=$(echo "$STACK_CONFIG" | sed 's/^docker://') + export LLAMA_STACK_PORT=8321 + + echo "=== Building Docker Image for distribution: $DISTRO ===" + # Set LLAMA_STACK_DIR to repo root + # USE_COPY_NOT_MOUNT copies files into image (for CI), otherwise mounts for live development + BUILD_ENV="LLAMA_STACK_DIR=$ROOT_DIR" + if [[ "${CI:-false}" == "true" ]] || [[ "${GITHUB_ACTIONS:-false}" == "true" ]]; then + echo "CI detected (CI=$CI, GITHUB_ACTIONS=$GITHUB_ACTIONS): copying source into image" + BUILD_ENV="USE_COPY_NOT_MOUNT=true $BUILD_ENV" + else + echo "Local mode: will mount source for live development" + fi + + eval "$BUILD_ENV llama stack build --distro '$DISTRO' --image-type container" + + if [ $? -ne 0 ]; then + echo "❌ Failed to build Docker image" + exit 1 + fi + + echo "" + echo "=== Starting Docker Container ===" + container_name="llama-stack-test-$DISTRO" + + # Stop and remove existing container if it exists + docker stop "$container_name" 2>/dev/null || true + docker rm "$container_name" 2>/dev/null || true + + # Build environment variables for docker run + DOCKER_ENV_VARS="" + DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_INFERENCE_MODE=$INFERENCE_MODE" + DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_STACK_CONFIG_TYPE=server" + + # Pass through API keys if they exist + [ -n "${TOGETHER_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e TOGETHER_API_KEY=$TOGETHER_API_KEY" + [ -n "${FIREWORKS_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e FIREWORKS_API_KEY=$FIREWORKS_API_KEY" + [ -n "${TAVILY_SEARCH_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e TAVILY_SEARCH_API_KEY=$TAVILY_SEARCH_API_KEY" + [ -n "${OPENAI_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OPENAI_API_KEY=$OPENAI_API_KEY" + [ -n "${ANTHROPIC_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY" + [ -n "${GROQ_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e GROQ_API_KEY=$GROQ_API_KEY" + [ -n "${GEMINI_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e GEMINI_API_KEY=$GEMINI_API_KEY" + [ -n "${OLLAMA_URL:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OLLAMA_URL=$OLLAMA_URL" + [ -n "${SAFETY_MODEL:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e SAFETY_MODEL=$SAFETY_MODEL" + + # Determine the actual image name (may have localhost/ prefix) + IMAGE_NAME=$(docker images --format "{{.Repository}}:{{.Tag}}" | grep "distribution-$DISTRO:dev$" | head -1) + if [[ -z "$IMAGE_NAME" ]]; then + echo "❌ Error: Could not find image for distribution-$DISTRO:dev" + exit 1 + fi + echo "Using image: $IMAGE_NAME" + + docker run -d --network host --name "$container_name" \ + -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + $DOCKER_ENV_VARS \ + -v $ROOT_DIR:/app/llama-stack-source \ + "$IMAGE_NAME" \ + --port $LLAMA_STACK_PORT + + echo "Waiting for Docker container to start..." + for i in {1..30}; do + if curl -s http://localhost:$LLAMA_STACK_PORT/v1/health 2>/dev/null | grep -q "OK"; then + echo "✅ Docker container started successfully" + break + fi + if [[ $i -eq 30 ]]; then + echo "❌ Docker container failed to start" + echo "Container logs:" + docker logs "$container_name" + exit 1 + fi + sleep 1 + done + echo "" + + # Update STACK_CONFIG to point to the running container + STACK_CONFIG="http://localhost:$LLAMA_STACK_PORT" + + trap stop_container EXIT ERR INT TERM +fi + # Run tests echo "=== Running Integration Tests ===" EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag" diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 7734b8654..3137de0de 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -42,7 +42,9 @@ def pytest_sessionstart(session): # Set test stack config type for api_recorder test isolation stack_config = session.config.getoption("--stack-config", default=None) - if stack_config and (stack_config.startswith("server:") or stack_config.startswith("http")): + if stack_config and ( + stack_config.startswith("server:") or stack_config.startswith("docker:") or stack_config.startswith("http") + ): os.environ["LLAMA_STACK_TEST_STACK_CONFIG_TYPE"] = "server" logger.info(f"Test stack config type: server (stack_config={stack_config})") else: @@ -139,7 +141,9 @@ def pytest_addoption(parser): a 'pointer' to the stack. this can be either be: (a) a template name like `starter`, or (b) a path to a run.yaml file, or - (c) an adhoc config spec, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference` + (c) an adhoc config spec, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`, or + (d) a server config like `server:ci-tests`, or + (e) a docker config like `docker:ci-tests` (builds and runs container) """ ), )