feat(ci): add support for docker:distro in tests (#3832)
Some checks failed
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 1s
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 0s
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 0s
Test External Providers Installed via Module / test-external-providers-from-module (venv) (push) Has been skipped
Python Package Build Test / build (3.13) (push) Failing after 2s
Test Llama Stack Build / generate-matrix (push) Successful in 6s
Unit Tests / unit-tests (3.12) (push) Failing after 5s
Test Llama Stack Build / build-single-provider (push) Failing after 9s
Test Llama Stack Build / build-ubi9-container-distribution (push) Failing after 10s
Vector IO Integration Tests / test-matrix (push) Failing after 14s
Unit Tests / unit-tests (3.13) (push) Failing after 7s
Test External API and Providers / test-external (venv) (push) Failing after 12s
API Conformance Tests / check-schema-compatibility (push) Successful in 19s
Test Llama Stack Build / build (push) Failing after 7s
Integration Tests (Replay) / Integration Tests (, , , client=, ) (push) Failing after 26s
Test Llama Stack Build / build-custom-container-distribution (push) Failing after 25s
Python Package Build Test / build (3.12) (push) Failing after 33s
UI Tests / ui-tests (22) (push) Successful in 1m26s
Pre-commit / pre-commit (push) Successful in 2m18s

Also a critical bug fix so test recordings can be found inside docker
This commit is contained in:
Ashwin Bharambe 2025-10-16 19:33:13 -07:00 committed by GitHub
parent b3099d40e2
commit cd152f4240
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 932 additions and 12 deletions

View file

@ -338,7 +338,7 @@ fi
# Add other require item commands genearic to all containers
add_to_container << EOF
RUN mkdir -p /.llama /.cache && chmod -R g+rw /app /.llama /.cache
RUN mkdir -p /.llama /.cache && chmod -R g+rw /.llama /.cache && (chmod -R g+rw /app 2>/dev/null || true)
EOF
printf "Containerfile created successfully in %s/Containerfile\n\n" "$TEMP_DIR"

View file

@ -42,3 +42,8 @@ def sync_test_context_from_provider_data():
return TEST_CONTEXT.set(provider_data["__test_id"])
return None
def is_debug_mode() -> bool:
"""Check if test recording debug mode is enabled via LLAMA_STACK_TEST_DEBUG env var."""
return os.environ.get("LLAMA_STACK_TEST_DEBUG", "").lower() in ("1", "true", "yes")

View file

@ -37,7 +37,7 @@ _id_counters: dict[str, dict[str, int]] = {}
# Test context uses ContextVar since it changes per-test and needs async isolation
from openai.types.completion_choice import CompletionChoice
from llama_stack.core.testing_context import get_test_context
from llama_stack.core.testing_context import get_test_context, is_debug_mode
# update the "finish_reason" field, since its type definition is wrong (no None is accepted)
CompletionChoice.model_fields["finish_reason"].annotation = Literal["stop", "length", "content_filter"] | None
@ -146,6 +146,7 @@ def normalize_inference_request(method: str, url: str, headers: dict[str, Any],
body_for_hash = _normalize_body_for_hash(body)
test_id = get_test_context()
normalized: dict[str, Any] = {
"method": method.upper(),
"endpoint": parsed.path,
@ -154,10 +155,20 @@ def normalize_inference_request(method: str, url: str, headers: dict[str, Any],
# Include test_id for isolation, except for shared infrastructure endpoints
if parsed.path not in ("/api/tags", "/v1/models"):
normalized["test_id"] = get_test_context()
normalized["test_id"] = test_id
normalized_json = json.dumps(normalized, sort_keys=True)
return hashlib.sha256(normalized_json.encode()).hexdigest()
request_hash = hashlib.sha256(normalized_json.encode()).hexdigest()
if is_debug_mode():
logger.info("[RECORDING DEBUG] Hash computation:")
logger.info(f" Test ID: {test_id}")
logger.info(f" Method: {method.upper()}")
logger.info(f" Endpoint: {parsed.path}")
logger.info(f" Model: {body.get('model', 'N/A')}")
logger.info(f" Computed hash: {request_hash}")
return request_hash
def normalize_tool_request(provider_name: str, tool_name: str, kwargs: dict[str, Any]) -> str:
@ -212,6 +223,11 @@ def patch_httpx_for_test_id():
provider_data["__test_id"] = test_id
request.headers["X-LlamaStack-Provider-Data"] = json.dumps(provider_data)
if is_debug_mode():
logger.info("[RECORDING DEBUG] Injected test ID into request header:")
logger.info(f" Test ID: {test_id}")
logger.info(f" URL: {request.url}")
return None
LlamaStackClient._prepare_request = patched_prepare_request
@ -355,12 +371,35 @@ class ResponseStorage:
test_file = test_id.split("::")[0] # Remove test function part
test_dir = Path(test_file).parent # Get parent directory
# Put recordings in a "recordings" subdirectory of the test's parent dir
# e.g., "tests/integration/inference" -> "tests/integration/inference/recordings"
return test_dir / "recordings"
if self.base_dir.is_absolute():
repo_root = self.base_dir.parent.parent.parent
result = repo_root / test_dir / "recordings"
if is_debug_mode():
logger.info("[RECORDING DEBUG] Path resolution (absolute base_dir):")
logger.info(f" Test ID: {test_id}")
logger.info(f" Base dir: {self.base_dir}")
logger.info(f" Repo root: {repo_root}")
logger.info(f" Test file: {test_file}")
logger.info(f" Test dir: {test_dir}")
logger.info(f" Recordings dir: {result}")
return result
else:
result = test_dir / "recordings"
if is_debug_mode():
logger.info("[RECORDING DEBUG] Path resolution (relative base_dir):")
logger.info(f" Test ID: {test_id}")
logger.info(f" Base dir: {self.base_dir}")
logger.info(f" Test dir: {test_dir}")
logger.info(f" Recordings dir: {result}")
return result
else:
# Fallback for non-test contexts
return self.base_dir / "recordings"
result = self.base_dir / "recordings"
if is_debug_mode():
logger.info("[RECORDING DEBUG] Path resolution (no test context):")
logger.info(f" Base dir: {self.base_dir}")
logger.info(f" Recordings dir: {result}")
return result
def _ensure_directory(self):
"""Ensure test-specific directories exist."""
@ -395,6 +434,13 @@ class ResponseStorage:
response_path = responses_dir / response_file
if is_debug_mode():
logger.info("[RECORDING DEBUG] Storing recording:")
logger.info(f" Request hash: {request_hash}")
logger.info(f" File: {response_path}")
logger.info(f" Test ID: {get_test_context()}")
logger.info(f" Endpoint: {endpoint}")
# Save response to JSON file with metadata
with open(response_path, "w") as f:
json.dump(
@ -423,16 +469,33 @@ class ResponseStorage:
test_dir = self._get_test_dir()
response_path = test_dir / response_file
if is_debug_mode():
logger.info("[RECORDING DEBUG] Looking up recording:")
logger.info(f" Request hash: {request_hash}")
logger.info(f" Primary path: {response_path}")
logger.info(f" Primary exists: {response_path.exists()}")
if response_path.exists():
if is_debug_mode():
logger.info(" Found in primary location")
return _recording_from_file(response_path)
# Fallback to base recordings directory (for session-level recordings)
fallback_dir = self.base_dir / "recordings"
fallback_path = fallback_dir / response_file
if is_debug_mode():
logger.info(f" Fallback path: {fallback_path}")
logger.info(f" Fallback exists: {fallback_path.exists()}")
if fallback_path.exists():
if is_debug_mode():
logger.info(" Found in fallback location")
return _recording_from_file(fallback_path)
if is_debug_mode():
logger.info(" Recording not found in either location")
return None
def _model_list_responses(self, request_hash: str) -> list[dict[str, Any]]:
@ -588,6 +651,13 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
mode = _current_mode
storage = _current_storage
if is_debug_mode():
logger.info("[RECORDING DEBUG] Entering inference method:")
logger.info(f" Mode: {mode}")
logger.info(f" Client type: {client_type}")
logger.info(f" Endpoint: {endpoint}")
logger.info(f" Test context: {get_test_context()}")
if mode == APIRecordingMode.LIVE or storage is None:
if endpoint == "/v1/models":
return original_method(self, *args, **kwargs)
@ -643,6 +713,18 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
return response_body
elif mode == APIRecordingMode.REPLAY:
# REPLAY mode requires recording to exist
if is_debug_mode():
logger.error("[RECORDING DEBUG] Recording not found!")
logger.error(f" Mode: {mode}")
logger.error(f" Request hash: {request_hash}")
logger.error(f" Method: {method}")
logger.error(f" URL: {url}")
logger.error(f" Endpoint: {endpoint}")
logger.error(f" Model: {body.get('model', 'unknown')}")
logger.error(f" Test context: {get_test_context()}")
logger.error(
f" Stack config type: {os.environ.get('LLAMA_STACK_TEST_STACK_CONFIG_TYPE', 'library_client')}"
)
raise RuntimeError(
f"Recording not found for request hash: {request_hash}\n"
f"Model: {body.get('model', 'unknown')} | Request: {method} {url}\n"