feat(ci): add support for docker:distro in tests (#3832)
Some checks failed
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 1s
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 0s
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 0s
Test External Providers Installed via Module / test-external-providers-from-module (venv) (push) Has been skipped
Python Package Build Test / build (3.13) (push) Failing after 2s
Test Llama Stack Build / generate-matrix (push) Successful in 6s
Unit Tests / unit-tests (3.12) (push) Failing after 5s
Test Llama Stack Build / build-single-provider (push) Failing after 9s
Test Llama Stack Build / build-ubi9-container-distribution (push) Failing after 10s
Vector IO Integration Tests / test-matrix (push) Failing after 14s
Unit Tests / unit-tests (3.13) (push) Failing after 7s
Test External API and Providers / test-external (venv) (push) Failing after 12s
API Conformance Tests / check-schema-compatibility (push) Successful in 19s
Test Llama Stack Build / build (push) Failing after 7s
Integration Tests (Replay) / Integration Tests (, , , client=, ) (push) Failing after 26s
Test Llama Stack Build / build-custom-container-distribution (push) Failing after 25s
Python Package Build Test / build (3.12) (push) Failing after 33s
UI Tests / ui-tests (22) (push) Successful in 1m26s
Pre-commit / pre-commit (push) Successful in 2m18s

Also a critical bug fix so test recordings can be found inside docker
This commit is contained in:
Ashwin Bharambe 2025-10-16 19:33:13 -07:00 committed by GitHub
parent b3099d40e2
commit cd152f4240
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 932 additions and 12 deletions

View file

@ -338,7 +338,7 @@ fi
# Add other require item commands genearic to all containers
add_to_container << EOF
RUN mkdir -p /.llama /.cache && chmod -R g+rw /app /.llama /.cache
RUN mkdir -p /.llama /.cache && chmod -R g+rw /.llama /.cache && (chmod -R g+rw /app 2>/dev/null || true)
EOF
printf "Containerfile created successfully in %s/Containerfile\n\n" "$TEMP_DIR"

View file

@ -42,3 +42,8 @@ def sync_test_context_from_provider_data():
return TEST_CONTEXT.set(provider_data["__test_id"])
return None
def is_debug_mode() -> bool:
"""Check if test recording debug mode is enabled via LLAMA_STACK_TEST_DEBUG env var."""
return os.environ.get("LLAMA_STACK_TEST_DEBUG", "").lower() in ("1", "true", "yes")

View file

@ -37,7 +37,7 @@ _id_counters: dict[str, dict[str, int]] = {}
# Test context uses ContextVar since it changes per-test and needs async isolation
from openai.types.completion_choice import CompletionChoice
from llama_stack.core.testing_context import get_test_context
from llama_stack.core.testing_context import get_test_context, is_debug_mode
# update the "finish_reason" field, since its type definition is wrong (no None is accepted)
CompletionChoice.model_fields["finish_reason"].annotation = Literal["stop", "length", "content_filter"] | None
@ -146,6 +146,7 @@ def normalize_inference_request(method: str, url: str, headers: dict[str, Any],
body_for_hash = _normalize_body_for_hash(body)
test_id = get_test_context()
normalized: dict[str, Any] = {
"method": method.upper(),
"endpoint": parsed.path,
@ -154,10 +155,20 @@ def normalize_inference_request(method: str, url: str, headers: dict[str, Any],
# Include test_id for isolation, except for shared infrastructure endpoints
if parsed.path not in ("/api/tags", "/v1/models"):
normalized["test_id"] = get_test_context()
normalized["test_id"] = test_id
normalized_json = json.dumps(normalized, sort_keys=True)
return hashlib.sha256(normalized_json.encode()).hexdigest()
request_hash = hashlib.sha256(normalized_json.encode()).hexdigest()
if is_debug_mode():
logger.info("[RECORDING DEBUG] Hash computation:")
logger.info(f" Test ID: {test_id}")
logger.info(f" Method: {method.upper()}")
logger.info(f" Endpoint: {parsed.path}")
logger.info(f" Model: {body.get('model', 'N/A')}")
logger.info(f" Computed hash: {request_hash}")
return request_hash
def normalize_tool_request(provider_name: str, tool_name: str, kwargs: dict[str, Any]) -> str:
@ -212,6 +223,11 @@ def patch_httpx_for_test_id():
provider_data["__test_id"] = test_id
request.headers["X-LlamaStack-Provider-Data"] = json.dumps(provider_data)
if is_debug_mode():
logger.info("[RECORDING DEBUG] Injected test ID into request header:")
logger.info(f" Test ID: {test_id}")
logger.info(f" URL: {request.url}")
return None
LlamaStackClient._prepare_request = patched_prepare_request
@ -355,12 +371,35 @@ class ResponseStorage:
test_file = test_id.split("::")[0] # Remove test function part
test_dir = Path(test_file).parent # Get parent directory
# Put recordings in a "recordings" subdirectory of the test's parent dir
# e.g., "tests/integration/inference" -> "tests/integration/inference/recordings"
return test_dir / "recordings"
if self.base_dir.is_absolute():
repo_root = self.base_dir.parent.parent.parent
result = repo_root / test_dir / "recordings"
if is_debug_mode():
logger.info("[RECORDING DEBUG] Path resolution (absolute base_dir):")
logger.info(f" Test ID: {test_id}")
logger.info(f" Base dir: {self.base_dir}")
logger.info(f" Repo root: {repo_root}")
logger.info(f" Test file: {test_file}")
logger.info(f" Test dir: {test_dir}")
logger.info(f" Recordings dir: {result}")
return result
else:
result = test_dir / "recordings"
if is_debug_mode():
logger.info("[RECORDING DEBUG] Path resolution (relative base_dir):")
logger.info(f" Test ID: {test_id}")
logger.info(f" Base dir: {self.base_dir}")
logger.info(f" Test dir: {test_dir}")
logger.info(f" Recordings dir: {result}")
return result
else:
# Fallback for non-test contexts
return self.base_dir / "recordings"
result = self.base_dir / "recordings"
if is_debug_mode():
logger.info("[RECORDING DEBUG] Path resolution (no test context):")
logger.info(f" Base dir: {self.base_dir}")
logger.info(f" Recordings dir: {result}")
return result
def _ensure_directory(self):
"""Ensure test-specific directories exist."""
@ -395,6 +434,13 @@ class ResponseStorage:
response_path = responses_dir / response_file
if is_debug_mode():
logger.info("[RECORDING DEBUG] Storing recording:")
logger.info(f" Request hash: {request_hash}")
logger.info(f" File: {response_path}")
logger.info(f" Test ID: {get_test_context()}")
logger.info(f" Endpoint: {endpoint}")
# Save response to JSON file with metadata
with open(response_path, "w") as f:
json.dump(
@ -423,16 +469,33 @@ class ResponseStorage:
test_dir = self._get_test_dir()
response_path = test_dir / response_file
if is_debug_mode():
logger.info("[RECORDING DEBUG] Looking up recording:")
logger.info(f" Request hash: {request_hash}")
logger.info(f" Primary path: {response_path}")
logger.info(f" Primary exists: {response_path.exists()}")
if response_path.exists():
if is_debug_mode():
logger.info(" Found in primary location")
return _recording_from_file(response_path)
# Fallback to base recordings directory (for session-level recordings)
fallback_dir = self.base_dir / "recordings"
fallback_path = fallback_dir / response_file
if is_debug_mode():
logger.info(f" Fallback path: {fallback_path}")
logger.info(f" Fallback exists: {fallback_path.exists()}")
if fallback_path.exists():
if is_debug_mode():
logger.info(" Found in fallback location")
return _recording_from_file(fallback_path)
if is_debug_mode():
logger.info(" Recording not found in either location")
return None
def _model_list_responses(self, request_hash: str) -> list[dict[str, Any]]:
@ -588,6 +651,13 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
mode = _current_mode
storage = _current_storage
if is_debug_mode():
logger.info("[RECORDING DEBUG] Entering inference method:")
logger.info(f" Mode: {mode}")
logger.info(f" Client type: {client_type}")
logger.info(f" Endpoint: {endpoint}")
logger.info(f" Test context: {get_test_context()}")
if mode == APIRecordingMode.LIVE or storage is None:
if endpoint == "/v1/models":
return original_method(self, *args, **kwargs)
@ -643,6 +713,18 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
return response_body
elif mode == APIRecordingMode.REPLAY:
# REPLAY mode requires recording to exist
if is_debug_mode():
logger.error("[RECORDING DEBUG] Recording not found!")
logger.error(f" Mode: {mode}")
logger.error(f" Request hash: {request_hash}")
logger.error(f" Method: {method}")
logger.error(f" URL: {url}")
logger.error(f" Endpoint: {endpoint}")
logger.error(f" Model: {body.get('model', 'unknown')}")
logger.error(f" Test context: {get_test_context()}")
logger.error(
f" Stack config type: {os.environ.get('LLAMA_STACK_TEST_STACK_CONFIG_TYPE', 'library_client')}"
)
raise RuntimeError(
f"Recording not found for request hash: {request_hash}\n"
f"Model: {body.get('model', 'unknown')} | Request: {method} {url}\n"

370
scripts/diagnose_recordings.py Executable file
View file

@ -0,0 +1,370 @@
#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
"""
Diagnostic tool for debugging test recording issues.
Usage:
# Find where a hash would be looked up
./scripts/diagnose_recordings.py find-hash 7526c930eab04ce337496a26cd15f2591d7943035f2527182861643da9b837a7
# Show what's in a recording file
./scripts/diagnose_recordings.py show tests/integration/agents/recordings/7526c930....json
# List all recordings for a test
./scripts/diagnose_recordings.py list-test "tests/integration/agents/test_agents.py::test_custom_tool"
# Explain lookup paths for a test
./scripts/diagnose_recordings.py explain-paths --test-id "tests/integration/agents/test_agents.py::test_foo"
# Compare request hash computation
./scripts/diagnose_recordings.py compute-hash --endpoint /v1/chat/completions --method POST --body '{"model":"llama3.2:3b"}' --test-id "..."
"""
import argparse
import json
import sys
from pathlib import Path
# Add parent directory to path to import from llama_stack
REPO_ROOT = Path(__file__).parent.parent
sys.path.insert(0, str(REPO_ROOT))
try:
from llama_stack.testing.api_recorder import normalize_inference_request
except ImportError:
normalize_inference_request = None
def find_hash(hash_value: str, base_dir: Path | None = None, test_id: str | None = None):
"""Find where a hash would be looked up and what exists"""
if base_dir is None:
base_dir = REPO_ROOT / "tests/integration/common"
print(f"Searching for hash: {hash_value}\n")
print(f"Base dir: {base_dir} (absolute={base_dir.is_absolute()})")
# Compute test directory
if test_id:
test_file = test_id.split("::")[0]
test_dir = Path(test_file).parent
if base_dir.is_absolute():
repo_root = base_dir.parent.parent.parent
test_recordings_dir = repo_root / test_dir / "recordings"
else:
test_recordings_dir = test_dir / "recordings"
print(f"Test ID: {test_id}")
print(f"Test dir: {test_recordings_dir}\n")
else:
test_recordings_dir = base_dir / "recordings"
print("No test ID provided, using base dir\n")
# Check primary location
response_file = f"{hash_value}.json"
response_path = test_recordings_dir / response_file
print("Checking primary location:")
print(f" {response_path}")
if response_path.exists():
print(" EXISTS")
print("\nFound! Contents:")
show_recording(response_path)
return True
else:
print(" Does not exist")
# Check fallback location
fallback_dir = base_dir / "recordings"
fallback_path = fallback_dir / response_file
print("\nChecking fallback location:")
print(f" {fallback_path}")
if fallback_path.exists():
print(" EXISTS")
print("\nFound in fallback! Contents:")
show_recording(fallback_path)
return True
else:
print(" Does not exist")
# Show what files DO exist
print(f"\nFiles in test directory ({test_recordings_dir}):")
if test_recordings_dir.exists():
json_files = list(test_recordings_dir.glob("*.json"))
if json_files:
for f in json_files[:20]:
print(f" - {f.name}")
if len(json_files) > 20:
print(f" ... and {len(json_files) - 20} more")
else:
print(" (empty)")
else:
print(" Directory does not exist")
print(f"\nFiles in fallback directory ({fallback_dir}):")
if fallback_dir.exists():
json_files = list(fallback_dir.glob("*.json"))
if json_files:
for f in json_files[:20]:
print(f" - {f.name}")
if len(json_files) > 20:
print(f" ... and {len(json_files) - 20} more")
else:
print(" (empty)")
else:
print(" Directory does not exist")
# Try partial hash match
print("\nLooking for partial matches (first 16 chars)...")
partial = hash_value[:16]
matches = []
for dir_to_search in [test_recordings_dir, fallback_dir]:
if dir_to_search.exists():
for f in dir_to_search.glob("*.json"):
if f.stem.startswith(partial):
matches.append(f)
if matches:
print(f"Found {len(matches)} partial match(es):")
for m in matches:
print(f" {m}")
else:
print("No partial matches found")
return False
def show_recording(file_path: Path):
"""Show contents of a recording file"""
if not file_path.exists():
print(f"File does not exist: {file_path}")
return
with open(file_path) as f:
data = json.load(f)
print(f"\nRecording: {file_path.name}\n")
print(f"Test ID: {data.get('test_id', 'N/A')}")
print("\nRequest:")
req = data.get("request", {})
print(f" Method: {req.get('method', 'N/A')}")
print(f" URL: {req.get('url', 'N/A')}")
print(f" Endpoint: {req.get('endpoint', 'N/A')}")
print(f" Model: {req.get('model', 'N/A')}")
body = req.get("body", {})
if body:
print("\nRequest Body:")
print(f" Model: {body.get('model', 'N/A')}")
print(f" Stream: {body.get('stream', 'N/A')}")
if "messages" in body:
print(f" Messages: {len(body['messages'])} message(s)")
for i, msg in enumerate(body["messages"][:3]):
role = msg.get("role", "unknown")
content = msg.get("content", "")
if isinstance(content, str):
preview = content[:80] + "..." if len(content) > 80 else content
else:
preview = f"[{type(content).__name__}]"
print(f" [{i}] {role}: {preview}")
if "tools" in body:
print(f" Tools: {len(body['tools'])} tool(s)")
response = data.get("response", {})
if response:
print("\nResponse:")
print(f" Is streaming: {response.get('is_streaming', False)}")
response_body = response.get("body", {})
if isinstance(response_body, dict):
if "__type__" in response_body:
print(f" Type: {response_body['__type__']}")
if "__data__" in response_body:
response_data = response_body["__data__"]
if "choices" in response_data:
print(f" Choices: {len(response_data['choices'])}")
if "usage" in response_data:
usage = response_data["usage"]
print(f" Usage: in={usage.get('input_tokens')}, out={usage.get('output_tokens')}")
def list_test_recordings(test_id: str, base_dir: Path | None = None):
"""List all recordings for a specific test"""
if base_dir is None:
base_dir = REPO_ROOT / "tests/integration/common"
test_file = test_id.split("::")[0]
test_dir = Path(test_file).parent
if base_dir.is_absolute():
repo_root = base_dir.parent.parent.parent
test_recordings_dir = repo_root / test_dir / "recordings"
else:
test_recordings_dir = test_dir / "recordings"
print(f"Recordings for test: {test_id}\n")
print(f"Directory: {test_recordings_dir}\n")
if not test_recordings_dir.exists():
print("Directory does not exist")
return
# Find all recordings for this specific test
recordings = []
for f in test_recordings_dir.glob("*.json"):
try:
with open(f) as fp:
data = json.load(fp)
if data.get("test_id") == test_id:
recordings.append((f, data))
except Exception as e:
print(f"Could not read {f.name}: {e}")
if not recordings:
print("No recordings found for this exact test ID")
print("\nAll files in directory:")
for f in test_recordings_dir.glob("*.json"):
print(f" - {f.name}")
return
print(f"Found {len(recordings)} recording(s):\n")
for f, data in recordings:
req = data.get("request", {})
print(f" {f.name}")
print(f" Endpoint: {req.get('endpoint', 'N/A')}")
print(f" Model: {req.get('model', 'N/A')}")
print("")
def explain_paths(test_id: str | None = None, base_dir: Path | None = None):
"""Explain where recordings would be searched"""
if base_dir is None:
base_dir = REPO_ROOT / "tests/integration/common"
print("Recording Lookup Path Explanation\n")
print(f"Base directory: {base_dir}")
print(f" Absolute: {base_dir.is_absolute()}")
print("")
if test_id:
print(f"Test ID: {test_id}")
test_file = test_id.split("::")[0]
print(f" Test file: {test_file}")
test_dir = Path(test_file).parent
print(f" Test dir (relative): {test_dir}")
if base_dir.is_absolute():
repo_root = base_dir.parent.parent.parent
print(f" Repo root: {repo_root}")
test_recordings_dir = repo_root / test_dir / "recordings"
print(f" Test recordings dir (absolute): {test_recordings_dir}")
else:
test_recordings_dir = test_dir / "recordings"
print(f" Test recordings dir (relative): {test_recordings_dir}")
print("\nLookup order for recordings:")
print(f" 1. Test-specific: {test_recordings_dir}/<hash>.json")
print(f" 2. Fallback: {base_dir}/recordings/<hash>.json")
else:
print("No test ID provided")
print("\nLookup location:")
print(f" {base_dir}/recordings/<hash>.json")
def compute_hash(endpoint: str, method: str, body_json: str, test_id: str | None = None):
"""Compute hash for a request"""
if normalize_inference_request is None:
print("Could not import normalize_inference_request from llama_stack.testing.api_recorder")
print("Make sure you're running from the repo root with proper PYTHONPATH")
return
try:
body = json.loads(body_json)
except json.JSONDecodeError as e:
print(f"Invalid JSON in body: {e}")
return
# Create a fake URL with the endpoint
url = f"http://example.com{endpoint}"
# Set test context if provided
if test_id:
from llama_stack.core.testing_context import set_test_context
set_test_context(test_id)
hash_result = normalize_inference_request(method, url, {}, body)
print("Hash Computation\n")
print(f"Method: {method}")
print(f"Endpoint: {endpoint}")
print(f"Test ID: {test_id or 'None (excluded from hash for model-list endpoints)'}")
print("\nBody:")
print(json.dumps(body, indent=2))
print(f"\nComputed Hash: {hash_result}")
print(f"\nLooking for file: {hash_result}.json")
def main():
parser = argparse.ArgumentParser(
description="Diagnostic tool for test recording issues",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=__doc__,
)
subparsers = parser.add_subparsers(dest="command", help="Command to run")
# find-hash command
find_parser = subparsers.add_parser("find-hash", help="Find where a hash would be looked up")
find_parser.add_argument("hash", help="Hash value to search for (full or partial)")
find_parser.add_argument("--test-id", help="Test ID to determine search paths")
find_parser.add_argument("--base-dir", type=Path, help="Base directory (default: tests/integration/common)")
# show command
show_parser = subparsers.add_parser("show", help="Show contents of a recording file")
show_parser.add_argument("file", type=Path, help="Path to recording JSON file")
# list-test command
list_parser = subparsers.add_parser("list-test", help="List all recordings for a test")
list_parser.add_argument("test_id", help="Full test ID (e.g., tests/integration/agents/test_agents.py::test_foo)")
list_parser.add_argument("--base-dir", type=Path, help="Base directory (default: tests/integration/common)")
# explain-paths command
explain_parser = subparsers.add_parser("explain-paths", help="Explain where recordings are searched")
explain_parser.add_argument("--test-id", help="Test ID to show paths for")
explain_parser.add_argument("--base-dir", type=Path, help="Base directory (default: tests/integration/common)")
# compute-hash command
hash_parser = subparsers.add_parser("compute-hash", help="Compute hash for a request")
hash_parser.add_argument("--endpoint", required=True, help="Endpoint path (e.g., /v1/chat/completions)")
hash_parser.add_argument("--method", default="POST", help="HTTP method (default: POST)")
hash_parser.add_argument("--body", required=True, help="Request body as JSON string")
hash_parser.add_argument("--test-id", help="Test ID (affects hash for non-model-list endpoints)")
args = parser.parse_args()
if not args.command:
parser.print_help()
return
if args.command == "find-hash":
find_hash(args.hash, args.base_dir, args.test_id)
elif args.command == "show":
show_recording(args.file)
elif args.command == "list-test":
list_test_recordings(args.test_id, args.base_dir)
elif args.command == "explain-paths":
explain_paths(args.test_id, args.base_dir)
elif args.command == "compute-hash":
compute_hash(args.endpoint, args.method, args.body, args.test_id)
if __name__ == "__main__":
main()

358
scripts/docker.sh Executable file
View file

@ -0,0 +1,358 @@
#!/bin/bash
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
set -euo pipefail
# Docker container management script for Llama Stack
# Allows starting/stopping/restarting a Llama Stack docker container for testing
# Default values
DISTRO=""
PORT=8321
INFERENCE_MODE="replay"
COMMAND=""
USE_COPY_NOT_MOUNT=false
NO_REBUILD=false
# Function to display usage
usage() {
cat <<EOF
Usage: $0 COMMAND [OPTIONS]
Commands:
start Build and start the docker container
stop Stop and remove the docker container
restart Restart the docker container
status Check if the container is running
logs Show container logs (add -f to follow)
Options:
--distro STRING Distribution name (e.g., 'ci-tests', 'starter') (required for start/restart)
--port NUMBER Port to run on (default: 8321)
--inference-mode STRING Inference mode: replay, record-if-missing or record (default: replay)
--copy-source Copy source into image instead of mounting (default: auto-detect CI, otherwise mount)
--no-rebuild Skip building the image, just start the container (default: false)
--help Show this help message
Examples:
# Start a docker container (local dev mode - mounts source, builds image)
$0 start --distro ci-tests
# Start without rebuilding (uses existing image)
$0 start --distro ci-tests --no-rebuild
# Start with source copied into image (like CI)
$0 start --distro ci-tests --copy-source
# Start with custom port
$0 start --distro starter --port 8080
# Check status
$0 status --distro ci-tests
# View logs
$0 logs --distro ci-tests
# Stop container
$0 stop --distro ci-tests
# Restart container
$0 restart --distro ci-tests
Note: In CI environments (detected via CI or GITHUB_ACTIONS env vars), source is
automatically copied into the image. Locally, source is mounted for live development
unless --copy-source is specified.
EOF
}
# Parse command (first positional arg)
if [[ $# -eq 0 ]]; then
echo "Error: Command required"
usage
exit 1
fi
COMMAND="$1"
shift
# Validate command
case "$COMMAND" in
start | stop | restart | status | logs) ;;
--help)
usage
exit 0
;;
*)
echo "Error: Unknown command: $COMMAND"
usage
exit 1
;;
esac
# Parse options
while [[ $# -gt 0 ]]; do
case $1 in
--distro)
DISTRO="$2"
shift 2
;;
--port)
PORT="$2"
shift 2
;;
--inference-mode)
INFERENCE_MODE="$2"
shift 2
;;
--copy-source)
USE_COPY_NOT_MOUNT=true
shift
;;
--no-rebuild)
NO_REBUILD=true
shift
;;
--help)
usage
exit 0
;;
*)
echo "Unknown option: $1"
usage
exit 1
;;
esac
done
# Validate required parameters for commands that need them
if [[ "$COMMAND" != "stop" && "$COMMAND" != "status" && "$COMMAND" != "logs" ]]; then
if [[ -z "$DISTRO" ]]; then
echo "Error: --distro is required for '$COMMAND' command"
usage
exit 1
fi
fi
# If distro not provided for stop/status/logs, try to infer from running containers
if [[ -z "$DISTRO" && ("$COMMAND" == "stop" || "$COMMAND" == "status" || "$COMMAND" == "logs") ]]; then
# Look for any llama-stack-test-* container
RUNNING_CONTAINERS=$(docker ps -a --filter "name=llama-stack-test-" --format "{{.Names}}" | head -1)
if [[ -n "$RUNNING_CONTAINERS" ]]; then
DISTRO=$(echo "$RUNNING_CONTAINERS" | sed 's/llama-stack-test-//')
echo "Found running container for distro: $DISTRO"
else
echo "Error: --distro is required (no running containers found)"
usage
exit 1
fi
fi
# Remove docker: prefix if present
DISTRO=$(echo "$DISTRO" | sed 's/^docker://')
CONTAINER_NAME="llama-stack-test-$DISTRO"
# Function to check if container is running
is_container_running() {
docker ps --filter "name=^${CONTAINER_NAME}$" --format '{{.Names}}' | grep -q "^${CONTAINER_NAME}$"
}
# Function to check if container exists (running or stopped)
container_exists() {
docker ps -a --filter "name=^${CONTAINER_NAME}$" --format '{{.Names}}' | grep -q "^${CONTAINER_NAME}$"
}
# Function to stop and remove container
stop_container() {
if container_exists; then
echo "Stopping container: $CONTAINER_NAME"
docker stop "$CONTAINER_NAME" 2>/dev/null || true
echo "Removing container: $CONTAINER_NAME"
docker rm "$CONTAINER_NAME" 2>/dev/null || true
echo "✅ Container stopped and removed"
else
echo "⚠️ Container $CONTAINER_NAME does not exist"
fi
}
# Function to build docker image
build_image() {
echo "=== Building Docker Image for distribution: $DISTRO ==="
# Get the repo root (parent of scripts directory)
SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
REPO_ROOT=$(cd "$SCRIPT_DIR/.." && pwd)
# Determine whether to copy or mount source
# Copy in CI or if explicitly requested, otherwise mount for live development
BUILD_ENV="LLAMA_STACK_DIR=$REPO_ROOT"
if [[ "$USE_COPY_NOT_MOUNT" == "true" ]] || [[ "${CI:-false}" == "true" ]] || [[ "${GITHUB_ACTIONS:-false}" == "true" ]]; then
echo "Copying source into image (USE_COPY_NOT_MOUNT=true, CI=${CI:-false}, GITHUB_ACTIONS=${GITHUB_ACTIONS:-false})"
BUILD_ENV="USE_COPY_NOT_MOUNT=true $BUILD_ENV"
else
echo "Will mount source for live development"
fi
if ! eval "$BUILD_ENV llama stack build --distro '$DISTRO' --image-type container"; then
echo "❌ Failed to build Docker image"
exit 1
fi
echo "✅ Docker image built successfully"
}
# Function to start container
start_container() {
# Check if already running
if is_container_running; then
echo "⚠️ Container $CONTAINER_NAME is already running"
echo "URL: http://localhost:$PORT"
exit 0
fi
# Stop and remove if exists but not running
if container_exists; then
echo "Removing existing stopped container..."
docker rm "$CONTAINER_NAME" 2>/dev/null || true
fi
# Build the image unless --no-rebuild was specified
if [[ "$NO_REBUILD" == "true" ]]; then
echo "Skipping build (--no-rebuild specified)"
# Check if image exists (with or without localhost/ prefix)
if ! docker images --format "{{.Repository}}:{{.Tag}}" | grep -q "distribution-$DISTRO:dev$"; then
echo "❌ Error: Image distribution-$DISTRO:dev does not exist"
echo "Either build it first without --no-rebuild, or run: llama stack build --distro $DISTRO --image-type container"
exit 1
fi
echo "✅ Found existing image for distribution-$DISTRO:dev"
else
build_image
fi
echo ""
echo "=== Starting Docker Container ==="
# Get the repo root for volume mount
SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
REPO_ROOT=$(cd "$SCRIPT_DIR/.." && pwd)
# Determine the actual image name (may have localhost/ prefix)
IMAGE_NAME=$(docker images --format "{{.Repository}}:{{.Tag}}" | grep "distribution-$DISTRO:dev$" | head -1)
if [[ -z "$IMAGE_NAME" ]]; then
echo "❌ Error: Could not find image for distribution-$DISTRO:dev"
exit 1
fi
echo "Using image: $IMAGE_NAME"
# Build environment variables for docker run
DOCKER_ENV_VARS=""
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_INFERENCE_MODE=$INFERENCE_MODE"
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_STACK_CONFIG_TYPE=server"
# Set default OLLAMA_URL if not provided
# On macOS/Windows, use host.docker.internal to reach host from container
# On Linux with --network host, use localhost
if [[ "$(uname)" == "Darwin" ]] || [[ "$(uname)" == *"MINGW"* ]]; then
OLLAMA_URL="${OLLAMA_URL:-http://host.docker.internal:11434}"
else
OLLAMA_URL="${OLLAMA_URL:-http://localhost:11434}"
fi
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OLLAMA_URL=$OLLAMA_URL"
# Pass through API keys if they exist
[ -n "${TOGETHER_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e TOGETHER_API_KEY=$TOGETHER_API_KEY"
[ -n "${FIREWORKS_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e FIREWORKS_API_KEY=$FIREWORKS_API_KEY"
[ -n "${TAVILY_SEARCH_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e TAVILY_SEARCH_API_KEY=$TAVILY_SEARCH_API_KEY"
[ -n "${OPENAI_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OPENAI_API_KEY=$OPENAI_API_KEY"
[ -n "${ANTHROPIC_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY"
[ -n "${GROQ_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e GROQ_API_KEY=$GROQ_API_KEY"
[ -n "${GEMINI_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e GEMINI_API_KEY=$GEMINI_API_KEY"
[ -n "${SAFETY_MODEL:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e SAFETY_MODEL=$SAFETY_MODEL"
[ -n "${SQLITE_STORE_DIR:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e SQLITE_STORE_DIR=$SQLITE_STORE_DIR"
# Use --network host on Linux only (macOS doesn't support it properly)
NETWORK_MODE=""
if [[ "$(uname)" != "Darwin" ]] && [[ "$(uname)" != *"MINGW"* ]]; then
NETWORK_MODE="--network host"
fi
docker run -d $NETWORK_MODE --name "$CONTAINER_NAME" \
-p $PORT:$PORT \
$DOCKER_ENV_VARS \
-v "$REPO_ROOT":/app/llama-stack-source \
"$IMAGE_NAME" \
--port $PORT
echo "Waiting for container to start..."
for i in {1..30}; do
if curl -s http://localhost:$PORT/v1/health 2>/dev/null | grep -q "OK"; then
echo "✅ Container started successfully"
echo ""
echo "=== Container Information ==="
echo "Container name: $CONTAINER_NAME"
echo "URL: http://localhost:$PORT"
echo "Health check: http://localhost:$PORT/v1/health"
echo ""
echo "To view logs: $0 logs --distro $DISTRO"
echo "To stop: $0 stop --distro $DISTRO"
return 0
fi
if [[ $i -eq 30 ]]; then
echo "❌ Container failed to start within timeout"
echo "Showing container logs:"
docker logs "$CONTAINER_NAME"
exit 1
fi
sleep 1
done
}
# Execute command
case "$COMMAND" in
start)
start_container
;;
stop)
stop_container
;;
restart)
echo "Restarting container: $CONTAINER_NAME"
stop_container
echo ""
start_container
;;
status)
if is_container_running; then
echo "✅ Container $CONTAINER_NAME is running"
echo "URL: http://localhost:$PORT"
# Try to get the actual port from the container
ACTUAL_PORT=$(docker port "$CONTAINER_NAME" 2>/dev/null | grep "8321/tcp" | cut -d':' -f2 | head -1)
if [[ -n "$ACTUAL_PORT" ]]; then
echo "Port: $ACTUAL_PORT"
fi
elif container_exists; then
echo "⚠️ Container $CONTAINER_NAME exists but is not running"
echo "Start it with: $0 start --distro $DISTRO"
else
echo "❌ Container $CONTAINER_NAME does not exist"
echo "Start it with: $0 start --distro $DISTRO"
fi
;;
logs)
if container_exists; then
echo "=== Logs for $CONTAINER_NAME ==="
# Check if -f flag was passed after 'logs' command
if [[ "${1:-}" == "-f" || "${1:-}" == "--follow" ]]; then
docker logs --tail 100 --follow "$CONTAINER_NAME"
else
docker logs --tail 100 "$CONTAINER_NAME"
fi
else
echo "❌ Container $CONTAINER_NAME does not exist"
exit 1
fi
;;
esac

View file

@ -42,9 +42,12 @@ Setups are defined in tests/integration/setups.py and provide global configurati
You can also specify subdirectories (of tests/integration) to select tests from, which will override the suite.
Examples:
# Basic inference tests with ollama
# Basic inference tests with ollama (server mode)
$0 --stack-config server:ci-tests --suite base --setup ollama
# Basic inference tests with docker
$0 --stack-config docker:ci-tests --suite base --setup ollama
# Multiple test directories with vllm
$0 --stack-config server:ci-tests --subdirs 'inference,agents' --setup vllm
@ -153,7 +156,7 @@ echo "Setting SQLITE_STORE_DIR: $SQLITE_STORE_DIR"
# Determine stack config type for api_recorder test isolation
if [[ "$COLLECT_ONLY" == false ]]; then
if [[ "$STACK_CONFIG" == server:* ]]; then
if [[ "$STACK_CONFIG" == server:* ]] || [[ "$STACK_CONFIG" == docker:* ]]; then
export LLAMA_STACK_TEST_STACK_CONFIG_TYPE="server"
echo "Setting stack config type: server"
else
@ -229,6 +232,104 @@ if [[ "$STACK_CONFIG" == *"server:"* && "$COLLECT_ONLY" == false ]]; then
trap stop_server EXIT ERR INT TERM
fi
# Start Docker Container if needed
if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
stop_container() {
echo "Stopping Docker container..."
container_name="llama-stack-test-$DISTRO"
if docker ps -a --format '{{.Names}}' | grep -q "^${container_name}$"; then
echo "Stopping and removing container: $container_name"
docker stop "$container_name" 2>/dev/null || true
docker rm "$container_name" 2>/dev/null || true
else
echo "No container named $container_name found"
fi
echo "Docker container stopped"
}
# Extract distribution name from docker:distro format
DISTRO=$(echo "$STACK_CONFIG" | sed 's/^docker://')
export LLAMA_STACK_PORT=8321
echo "=== Building Docker Image for distribution: $DISTRO ==="
# Set LLAMA_STACK_DIR to repo root
# USE_COPY_NOT_MOUNT copies files into image (for CI), otherwise mounts for live development
BUILD_ENV="LLAMA_STACK_DIR=$ROOT_DIR"
if [[ "${CI:-false}" == "true" ]] || [[ "${GITHUB_ACTIONS:-false}" == "true" ]]; then
echo "CI detected (CI=$CI, GITHUB_ACTIONS=$GITHUB_ACTIONS): copying source into image"
BUILD_ENV="USE_COPY_NOT_MOUNT=true $BUILD_ENV"
else
echo "Local mode: will mount source for live development"
fi
eval "$BUILD_ENV llama stack build --distro '$DISTRO' --image-type container"
if [ $? -ne 0 ]; then
echo "❌ Failed to build Docker image"
exit 1
fi
echo ""
echo "=== Starting Docker Container ==="
container_name="llama-stack-test-$DISTRO"
# Stop and remove existing container if it exists
docker stop "$container_name" 2>/dev/null || true
docker rm "$container_name" 2>/dev/null || true
# Build environment variables for docker run
DOCKER_ENV_VARS=""
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_INFERENCE_MODE=$INFERENCE_MODE"
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_STACK_CONFIG_TYPE=server"
# Pass through API keys if they exist
[ -n "${TOGETHER_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e TOGETHER_API_KEY=$TOGETHER_API_KEY"
[ -n "${FIREWORKS_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e FIREWORKS_API_KEY=$FIREWORKS_API_KEY"
[ -n "${TAVILY_SEARCH_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e TAVILY_SEARCH_API_KEY=$TAVILY_SEARCH_API_KEY"
[ -n "${OPENAI_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OPENAI_API_KEY=$OPENAI_API_KEY"
[ -n "${ANTHROPIC_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY"
[ -n "${GROQ_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e GROQ_API_KEY=$GROQ_API_KEY"
[ -n "${GEMINI_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e GEMINI_API_KEY=$GEMINI_API_KEY"
[ -n "${OLLAMA_URL:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OLLAMA_URL=$OLLAMA_URL"
[ -n "${SAFETY_MODEL:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e SAFETY_MODEL=$SAFETY_MODEL"
# Determine the actual image name (may have localhost/ prefix)
IMAGE_NAME=$(docker images --format "{{.Repository}}:{{.Tag}}" | grep "distribution-$DISTRO:dev$" | head -1)
if [[ -z "$IMAGE_NAME" ]]; then
echo "❌ Error: Could not find image for distribution-$DISTRO:dev"
exit 1
fi
echo "Using image: $IMAGE_NAME"
docker run -d --network host --name "$container_name" \
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
$DOCKER_ENV_VARS \
-v $ROOT_DIR:/app/llama-stack-source \
"$IMAGE_NAME" \
--port $LLAMA_STACK_PORT
echo "Waiting for Docker container to start..."
for i in {1..30}; do
if curl -s http://localhost:$LLAMA_STACK_PORT/v1/health 2>/dev/null | grep -q "OK"; then
echo "✅ Docker container started successfully"
break
fi
if [[ $i -eq 30 ]]; then
echo "❌ Docker container failed to start"
echo "Container logs:"
docker logs "$container_name"
exit 1
fi
sleep 1
done
echo ""
# Update STACK_CONFIG to point to the running container
STACK_CONFIG="http://localhost:$LLAMA_STACK_PORT"
trap stop_container EXIT ERR INT TERM
fi
# Run tests
echo "=== Running Integration Tests ==="
EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag"

View file

@ -42,7 +42,9 @@ def pytest_sessionstart(session):
# Set test stack config type for api_recorder test isolation
stack_config = session.config.getoption("--stack-config", default=None)
if stack_config and (stack_config.startswith("server:") or stack_config.startswith("http")):
if stack_config and (
stack_config.startswith("server:") or stack_config.startswith("docker:") or stack_config.startswith("http")
):
os.environ["LLAMA_STACK_TEST_STACK_CONFIG_TYPE"] = "server"
logger.info(f"Test stack config type: server (stack_config={stack_config})")
else:
@ -139,7 +141,9 @@ def pytest_addoption(parser):
a 'pointer' to the stack. this can be either be:
(a) a template name like `starter`, or
(b) a path to a run.yaml file, or
(c) an adhoc config spec, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`
(c) an adhoc config spec, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`, or
(d) a server config like `server:ci-tests`, or
(e) a docker config like `docker:ci-tests` (builds and runs container)
"""
),
)