mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-12 20:12:33 +00:00
621 lines
23 KiB
Python
621 lines
23 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
# All rights reserved.
|
|
#
|
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
# the root directory of this source tree.
|
|
|
|
"""
|
|
End-to-end tests for the OpenTelemetry inline provider.
|
|
|
|
What this does:
|
|
- Boots mock OTLP and mock vLLM
|
|
- Starts a real Llama Stack with inline OTel
|
|
- Calls real HTTP APIs
|
|
- Verifies traces, metrics, and custom metric names (non-empty)
|
|
"""
|
|
|
|
# ============================================================================
|
|
# IMPORTS
|
|
# ============================================================================
|
|
|
|
import os
|
|
import socket
|
|
import subprocess
|
|
import time
|
|
from typing import Any
|
|
|
|
import pytest
|
|
import requests
|
|
import yaml
|
|
from pydantic import BaseModel, Field
|
|
|
|
# Mock servers are in the mocking/ subdirectory
|
|
from .mocking import (
|
|
MockOTLPCollector,
|
|
MockServerConfig,
|
|
MockVLLMServer,
|
|
start_mock_servers_async,
|
|
stop_mock_servers,
|
|
)
|
|
|
|
# ============================================================================
|
|
# DATA MODELS
|
|
# ============================================================================
|
|
|
|
|
|
class TelemetryTestCase(BaseModel):
|
|
"""
|
|
Pydantic model defining expected telemetry for an API call.
|
|
|
|
**TO ADD A NEW TEST CASE:** Add to TEST_CASES list below.
|
|
"""
|
|
|
|
name: str = Field(description="Unique test case identifier")
|
|
http_method: str = Field(description="HTTP method (GET, POST, etc.)")
|
|
api_path: str = Field(description="API path (e.g., '/v1/models')")
|
|
request_body: dict[str, Any] | None = Field(default=None)
|
|
expected_http_status: int = Field(default=200)
|
|
expected_trace_exports: int = Field(default=1, description="Minimum number of trace exports expected")
|
|
expected_metric_exports: int = Field(default=0, description="Minimum number of metric exports expected")
|
|
should_have_error_span: bool = Field(default=False)
|
|
expected_metrics: list[str] = Field(
|
|
default_factory=list, description="List of metric names that should be captured"
|
|
)
|
|
expected_min_spans: int | None = Field(
|
|
default=None, description="If set, minimum number of spans expected in the new trace(s) generated by this test"
|
|
)
|
|
|
|
|
|
# ============================================================================
|
|
# TEST CONFIGURATION
|
|
# **TO ADD NEW TESTS:** Add TelemetryTestCase instances here
|
|
# ============================================================================
|
|
|
|
# Custom metric names (defined in llama_stack/providers/inline/telemetry/otel/otel.py)
|
|
|
|
CUSTOM_METRICS_BASE = [
|
|
"http.server.request.duration",
|
|
"http.server.request.count",
|
|
]
|
|
|
|
CUSTOM_METRICS_STREAMING = [
|
|
"http.server.streaming.duration",
|
|
"http.server.streaming.count",
|
|
]
|
|
|
|
TEST_CASES = [
|
|
TelemetryTestCase(
|
|
name="models_list",
|
|
http_method="GET",
|
|
api_path="/v1/models",
|
|
expected_trace_exports=1, # Single trace with 2-3 spans (GET, http send)
|
|
expected_metric_exports=1, # Metrics export periodically, but we'll wait for them
|
|
expected_metrics=[], # First request: middleware may not be initialized yet
|
|
expected_min_spans=2,
|
|
),
|
|
TelemetryTestCase(
|
|
name="chat_completion",
|
|
http_method="POST",
|
|
api_path="/v1/chat/completions",
|
|
request_body={
|
|
"model": "meta-llama/Llama-3.2-1B-Instruct",
|
|
"messages": [{"role": "user", "content": "Hello!"}],
|
|
},
|
|
expected_trace_exports=1, # Single trace with 4 spans (POST, http receive, 2x http send)
|
|
expected_metric_exports=1, # Metrics export periodically
|
|
expected_metrics=CUSTOM_METRICS_BASE,
|
|
expected_min_spans=3,
|
|
),
|
|
TelemetryTestCase(
|
|
name="chat_completion_streaming",
|
|
http_method="POST",
|
|
api_path="/v1/chat/completions",
|
|
request_body={
|
|
"model": "meta-llama/Llama-3.2-1B-Instruct",
|
|
"messages": [{"role": "user", "content": "Streaming test"}],
|
|
"stream": True, # Enable streaming response
|
|
},
|
|
expected_trace_exports=1, # Single trace with streaming spans
|
|
expected_metric_exports=1, # Metrics export periodically
|
|
# Validate both base and streaming metrics with polling
|
|
expected_metrics=CUSTOM_METRICS_BASE + CUSTOM_METRICS_STREAMING,
|
|
expected_min_spans=4,
|
|
),
|
|
]
|
|
|
|
|
|
# ============================================================================
|
|
# TEST INFRASTRUCTURE
|
|
# ============================================================================
|
|
|
|
|
|
class TelemetryTestRunner:
|
|
"""
|
|
Executes TelemetryTestCase instances against real Llama Stack.
|
|
|
|
**HOW IT WORKS:**
|
|
1. Makes real HTTP request to the stack
|
|
2. Waits for telemetry export
|
|
3. Verifies exports were sent to mock collector
|
|
4. Validates custom metrics by name (if expected_metrics is specified)
|
|
5. Ensures metrics have non-empty data points
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
base_url: str,
|
|
collector: MockOTLPCollector,
|
|
poll_timeout_seconds: float = 8.0,
|
|
poll_interval_seconds: float = 0.1,
|
|
):
|
|
self.base_url = base_url
|
|
self.collector = collector
|
|
self.poll_timeout_seconds = poll_timeout_seconds # how long to wait for telemetry to be exported
|
|
self.poll_interval_seconds = poll_interval_seconds # how often to poll for telemetry
|
|
|
|
def run_test_case(self, test_case: TelemetryTestCase, verbose: bool = False) -> bool:
|
|
"""Execute a single test case and verify telemetry."""
|
|
initial_traces = self.collector.get_trace_count()
|
|
prior_trace_ids = self.collector.get_all_trace_ids()
|
|
initial_metrics = self.collector.get_metric_count()
|
|
|
|
if verbose:
|
|
print(f"\n--- {test_case.name} ---")
|
|
print(f" {test_case.http_method} {test_case.api_path}")
|
|
if test_case.expected_metrics:
|
|
print(f" Expected custom metrics: {', '.join(test_case.expected_metrics)}")
|
|
|
|
# Make real HTTP request to Llama Stack
|
|
is_streaming_test = test_case.request_body and test_case.request_body.get("stream", False)
|
|
try:
|
|
url = f"{self.base_url}{test_case.api_path}"
|
|
|
|
# Streaming requests need longer timeout to complete
|
|
timeout = 10 if is_streaming_test else 5
|
|
|
|
if test_case.http_method == "GET":
|
|
response = requests.get(url, timeout=timeout)
|
|
elif test_case.http_method == "POST":
|
|
response = requests.post(url, json=test_case.request_body or {}, timeout=timeout)
|
|
else:
|
|
response = requests.request(test_case.http_method, url, timeout=timeout)
|
|
|
|
if verbose:
|
|
print(f" HTTP Response: {response.status_code}")
|
|
|
|
status_match = response.status_code == test_case.expected_http_status
|
|
|
|
except requests.exceptions.RequestException as e:
|
|
if verbose:
|
|
print(f" Request exception: {type(e).__name__}")
|
|
# For streaming requests, exceptions are expected due to mock server behavior
|
|
# The important part is whether telemetry metrics were captured
|
|
status_match = is_streaming_test # Pass streaming tests, fail non-streaming
|
|
|
|
# Poll until all telemetry expectations are met or timeout (single loop for speed)
|
|
missing_metrics: list[str] = []
|
|
empty_metrics: list[str] = []
|
|
new_trace_ids: set[str] = set()
|
|
|
|
def compute_status() -> tuple[bool, bool, bool, bool]:
|
|
traces_ok_local = (self.collector.get_trace_count() - initial_traces) >= test_case.expected_trace_exports
|
|
metrics_count_ok_local = (
|
|
self.collector.get_metric_count() - initial_metrics
|
|
) >= test_case.expected_metric_exports
|
|
|
|
metrics_ok_local = True
|
|
if test_case.expected_metrics:
|
|
missing_metrics.clear()
|
|
empty_metrics.clear()
|
|
for metric_name in test_case.expected_metrics:
|
|
if not self.collector.has_metric(metric_name):
|
|
missing_metrics.append(metric_name)
|
|
else:
|
|
data_points = self.collector.get_metric_by_name(metric_name)
|
|
if len(data_points) == 0:
|
|
empty_metrics.append(metric_name)
|
|
metrics_ok_local = len(missing_metrics) == 0 and len(empty_metrics) == 0
|
|
|
|
spans_ok_local = True
|
|
if test_case.expected_min_spans is not None:
|
|
nonlocal new_trace_ids
|
|
new_trace_ids = self.collector.get_new_trace_ids(prior_trace_ids)
|
|
if not new_trace_ids:
|
|
spans_ok_local = False
|
|
else:
|
|
counts = self.collector.get_trace_span_counts()
|
|
min_spans: int = int(test_case.expected_min_spans or 0)
|
|
spans_ok_local = all(counts.get(tid, 0) >= min_spans for tid in new_trace_ids)
|
|
|
|
return traces_ok_local, metrics_count_ok_local, metrics_ok_local, spans_ok_local
|
|
|
|
# Poll until all telemetry expectations are met or timeout (single loop for speed)
|
|
start = time.time()
|
|
traces_ok, metrics_count_ok, metrics_by_name_validated, spans_ok = compute_status()
|
|
while time.time() - start < self.poll_timeout_seconds:
|
|
if traces_ok and metrics_count_ok and metrics_by_name_validated and spans_ok:
|
|
break
|
|
time.sleep(self.poll_interval_seconds)
|
|
traces_ok, metrics_count_ok, metrics_by_name_validated, spans_ok = compute_status()
|
|
|
|
if verbose:
|
|
total_http_requests = len(getattr(self.collector, "all_http_requests", []))
|
|
print(f" [DEBUG] OTLP POST requests: {total_http_requests}")
|
|
print(
|
|
f" Expected: >={test_case.expected_trace_exports} traces, >={test_case.expected_metric_exports} metrics"
|
|
)
|
|
print(
|
|
f" Actual: {self.collector.get_trace_count() - initial_traces} traces, {self.collector.get_metric_count() - initial_metrics} metrics"
|
|
)
|
|
|
|
if test_case.expected_metrics:
|
|
print(" Custom metrics:")
|
|
for metric_name in test_case.expected_metrics:
|
|
n = len(self.collector.get_metric_by_name(metric_name))
|
|
status = "✓" if n > 0 else "✗"
|
|
print(f" {status} {metric_name}: {n}")
|
|
if missing_metrics:
|
|
print(f" Missing: {missing_metrics}")
|
|
if empty_metrics:
|
|
print(f" Empty: {empty_metrics}")
|
|
|
|
if test_case.expected_min_spans is not None:
|
|
counts = self.collector.get_trace_span_counts()
|
|
span_counts = {tid: counts[tid] for tid in new_trace_ids}
|
|
print(f" New trace IDs: {sorted(new_trace_ids)}")
|
|
print(f" Span counts: {span_counts}")
|
|
|
|
result = bool(
|
|
(status_match or is_streaming_test)
|
|
and traces_ok
|
|
and metrics_count_ok
|
|
and metrics_by_name_validated
|
|
and spans_ok
|
|
)
|
|
print(f" Result: {'PASS' if result else 'FAIL'}")
|
|
|
|
return bool(
|
|
(status_match or is_streaming_test)
|
|
and traces_ok
|
|
and metrics_count_ok
|
|
and metrics_by_name_validated
|
|
and spans_ok
|
|
)
|
|
|
|
def run_all_test_cases(self, test_cases: list[TelemetryTestCase], verbose: bool = True) -> dict[str, bool]:
|
|
"""Run all test cases and return results."""
|
|
results = {}
|
|
for test_case in test_cases:
|
|
results[test_case.name] = self.run_test_case(test_case, verbose=verbose)
|
|
return results
|
|
|
|
|
|
# ============================================================================
|
|
# HELPER FUNCTIONS
|
|
# ============================================================================
|
|
|
|
|
|
def is_port_available(port: int) -> bool:
|
|
"""Check if a TCP port is available for binding."""
|
|
try:
|
|
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
|
|
sock.bind(("localhost", port))
|
|
return True
|
|
except OSError:
|
|
return False
|
|
|
|
|
|
# ============================================================================
|
|
# PYTEST FIXTURES
|
|
# ============================================================================
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def mock_servers():
|
|
"""
|
|
Fixture: Start all mock servers in parallel using async harness.
|
|
|
|
**TO ADD A NEW MOCK SERVER:**
|
|
Just add a MockServerConfig to the MOCK_SERVERS list below.
|
|
"""
|
|
import asyncio
|
|
|
|
# ========================================================================
|
|
# MOCK SERVER CONFIGURATION
|
|
# **TO ADD A NEW MOCK:** Just add a MockServerConfig instance below
|
|
#
|
|
# Example:
|
|
# MockServerConfig(
|
|
# name="Mock MyService",
|
|
# server_class=MockMyService, # Must inherit from MockServerBase
|
|
# init_kwargs={"port": 9000, "param": "value"},
|
|
# ),
|
|
# ========================================================================
|
|
mock_servers_config = [
|
|
MockServerConfig(
|
|
name="Mock OTLP Collector",
|
|
server_class=MockOTLPCollector,
|
|
init_kwargs={"port": 4318},
|
|
),
|
|
MockServerConfig(
|
|
name="Mock vLLM Server",
|
|
server_class=MockVLLMServer,
|
|
init_kwargs={
|
|
"port": 8000,
|
|
"models": ["meta-llama/Llama-3.2-1B-Instruct"],
|
|
},
|
|
),
|
|
# Add more mock servers here - they will start in parallel automatically!
|
|
]
|
|
|
|
# Start all servers in parallel
|
|
servers = asyncio.run(start_mock_servers_async(mock_servers_config))
|
|
|
|
# Verify vLLM models
|
|
models_response = requests.get("http://localhost:8000/v1/models", timeout=1)
|
|
models_data = models_response.json()
|
|
print(f"[INFO] Mock vLLM serving {len(models_data['data'])} models: {[m['id'] for m in models_data['data']]}")
|
|
|
|
yield servers
|
|
|
|
# Stop all servers
|
|
stop_mock_servers(servers)
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def mock_otlp_collector(mock_servers):
|
|
"""Convenience fixture to get OTLP collector from mock_servers."""
|
|
return mock_servers["Mock OTLP Collector"]
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def mock_vllm_server(mock_servers):
|
|
"""Convenience fixture to get vLLM server from mock_servers."""
|
|
return mock_servers["Mock vLLM Server"]
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def llama_stack_server(tmp_path_factory, mock_otlp_collector, mock_vllm_server):
|
|
"""
|
|
Fixture: Start real Llama Stack server with inline OTel provider.
|
|
|
|
**THIS IS THE MAIN FIXTURE** - it runs:
|
|
opentelemetry-instrument llama stack run --config run.yaml
|
|
|
|
**TO MODIFY STACK CONFIG:** Edit run_config dict below
|
|
"""
|
|
config_dir = tmp_path_factory.mktemp("otel-stack-config")
|
|
|
|
# Ensure mock vLLM is ready and accessible before starting Llama Stack
|
|
print("\n[INFO] Verifying mock vLLM is accessible at http://localhost:8000...")
|
|
try:
|
|
vllm_models = requests.get("http://localhost:8000/v1/models", timeout=2)
|
|
print(f"[INFO] Mock vLLM models endpoint response: {vllm_models.status_code}")
|
|
except Exception as e:
|
|
pytest.fail(f"Mock vLLM not accessible before starting Llama Stack: {e}")
|
|
|
|
# Create run.yaml with inference and telemetry providers
|
|
run_config = {
|
|
"image_name": "test-otel-e2e",
|
|
"apis": ["inference"],
|
|
"providers": {
|
|
"inference": [
|
|
{
|
|
"provider_id": "vllm",
|
|
"provider_type": "remote::vllm",
|
|
"config": {
|
|
"url": "http://localhost:8000/v1",
|
|
},
|
|
},
|
|
],
|
|
},
|
|
"instrumentation": {
|
|
"provider": "otel", # Discriminator for Pydantic
|
|
"config": {
|
|
"service_name": "llama-stack-e2e-test",
|
|
"span_processor": "simple",
|
|
},
|
|
},
|
|
"server": {
|
|
"host": "127.0.0.1",
|
|
},
|
|
"models": [
|
|
{
|
|
"model_id": "meta-llama/Llama-3.2-1B-Instruct",
|
|
"provider_id": "vllm",
|
|
}
|
|
],
|
|
}
|
|
|
|
config_file = config_dir / "run.yaml"
|
|
with open(config_file, "w") as f:
|
|
yaml.dump(run_config, f)
|
|
|
|
# Find available port for Llama Stack
|
|
port = 5555
|
|
while not is_port_available(port) and port < 5600:
|
|
port += 1
|
|
|
|
if port >= 5600:
|
|
pytest.skip("No available ports for test server")
|
|
|
|
# Set environment variables for OTel instrumentation
|
|
# NOTE: These only affect the subprocess, not other tests
|
|
env = os.environ.copy()
|
|
env["OTEL_EXPORTER_OTLP_ENDPOINT"] = "http://localhost:4318"
|
|
env["OTEL_EXPORTER_OTLP_PROTOCOL"] = "http/protobuf" # Ensure correct protocol
|
|
env["OTEL_SERVICE_NAME"] = "llama-stack-e2e-test"
|
|
env["OTEL_SPAN_PROCESSOR"] = "simple" # Force simple processor for immediate export
|
|
env["LLAMA_STACK_PORT"] = str(port)
|
|
env["OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED"] = "true"
|
|
|
|
# Configure fast metric export for testing (default is 60 seconds)
|
|
# This makes metrics export every 500ms instead of every 60 seconds
|
|
env["OTEL_METRIC_EXPORT_INTERVAL"] = "500" # milliseconds
|
|
env["OTEL_METRIC_EXPORT_TIMEOUT"] = "1000" # milliseconds
|
|
|
|
# Disable inference recording to ensure real requests to our mock vLLM
|
|
# This is critical - without this, Llama Stack replays cached responses
|
|
# Safe to remove here as it only affects the subprocess environment
|
|
if "LLAMA_STACK_TEST_INFERENCE_MODE" in env:
|
|
del env["LLAMA_STACK_TEST_INFERENCE_MODE"]
|
|
|
|
# Start server with automatic instrumentation
|
|
cmd = [
|
|
"opentelemetry-instrument", # ← Automatic instrumentation wrapper
|
|
"llama",
|
|
"stack",
|
|
"run",
|
|
str(config_file),
|
|
"--port",
|
|
str(port),
|
|
]
|
|
|
|
print(f"\n[INFO] Starting Llama Stack with OTel instrumentation on port {port}")
|
|
print(f"[INFO] Command: {' '.join(cmd)}")
|
|
|
|
process = subprocess.Popen(
|
|
cmd,
|
|
env=env,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.STDOUT, # Merge stderr into stdout
|
|
text=True,
|
|
)
|
|
|
|
# Wait for server to start
|
|
max_wait = 30
|
|
base_url = f"http://127.0.0.1:{port}"
|
|
startup_output = []
|
|
|
|
for i in range(max_wait):
|
|
# Collect server output non-blocking
|
|
import select
|
|
|
|
if process.stdout and select.select([process.stdout], [], [], 0)[0]:
|
|
line = process.stdout.readline()
|
|
if line:
|
|
startup_output.append(line)
|
|
|
|
try:
|
|
response = requests.get(f"{base_url}/v1/health", timeout=1)
|
|
if response.status_code == 200:
|
|
print(f"[INFO] Server ready at {base_url}")
|
|
# Print relevant initialization logs
|
|
print(f"[DEBUG] Captured {len(startup_output)} lines of server output")
|
|
relevant_logs = [
|
|
line
|
|
for line in startup_output
|
|
if any(keyword in line.lower() for keyword in ["telemetry", "otel", "provider", "error creating"])
|
|
]
|
|
if relevant_logs:
|
|
print("[DEBUG] Relevant server logs:")
|
|
for log in relevant_logs[-10:]: # Last 10 relevant lines
|
|
print(f" {log.strip()}")
|
|
time.sleep(0.5)
|
|
break
|
|
except requests.exceptions.RequestException:
|
|
if i == max_wait - 1:
|
|
process.terminate()
|
|
stdout, _ = process.communicate(timeout=5)
|
|
pytest.fail(f"Server failed to start.\nOutput: {stdout}")
|
|
time.sleep(1)
|
|
|
|
yield {
|
|
"base_url": base_url,
|
|
"port": port,
|
|
"collector": mock_otlp_collector,
|
|
"vllm_server": mock_vllm_server,
|
|
}
|
|
|
|
# Cleanup
|
|
print("\n[INFO] Stopping Llama Stack server")
|
|
process.terminate()
|
|
try:
|
|
process.wait(timeout=5)
|
|
except subprocess.TimeoutExpired:
|
|
process.kill()
|
|
|
|
|
|
# ============================================================================
|
|
# TESTS: End-to-End with Real Stack
|
|
# **THESE RUN SLOW** - marked with @pytest.mark.slow
|
|
# **TO ADD NEW E2E TESTS:** Add methods to this class
|
|
# ============================================================================
|
|
|
|
|
|
@pytest.mark.slow
|
|
class TestOTelE2E:
|
|
"""
|
|
End-to-end tests with real Llama Stack server.
|
|
|
|
These tests verify the complete flow:
|
|
- Real Llama Stack with inline OTel provider
|
|
- Real API calls
|
|
- Automatic trace and metric collection
|
|
- Mock OTLP collector captures exports
|
|
"""
|
|
|
|
def test_server_starts_with_auto_instrumentation(self, llama_stack_server):
|
|
"""Verify server starts successfully with inline OTel provider."""
|
|
base_url = llama_stack_server["base_url"]
|
|
|
|
# Try different health check endpoints
|
|
health_endpoints = ["/health", "/v1/health", "/"]
|
|
server_responding = False
|
|
|
|
for endpoint in health_endpoints:
|
|
try:
|
|
response = requests.get(f"{base_url}{endpoint}", timeout=5)
|
|
print(f"\n[DEBUG] {endpoint} -> {response.status_code}")
|
|
if response.status_code == 200:
|
|
server_responding = True
|
|
break
|
|
except Exception as e:
|
|
print(f"[DEBUG] {endpoint} failed: {e}")
|
|
|
|
assert server_responding, f"Server not responding on any endpoint at {base_url}"
|
|
|
|
print(f"\n[PASS] Llama Stack running with OTel at {base_url}")
|
|
|
|
def test_all_test_cases_via_runner(self, llama_stack_server):
|
|
"""
|
|
**MAIN TEST:** Run all TelemetryTestCase instances with custom metrics validation.
|
|
|
|
This executes all test cases defined in TEST_CASES list and validates:
|
|
1. Traces are exported to the collector
|
|
2. Metrics are exported to the collector
|
|
3. Custom metrics (defined in CUSTOM_METRICS_BASE, CUSTOM_METRICS_STREAMING)
|
|
are captured by name with non-empty data points
|
|
|
|
Each test case specifies which metrics to validate via expected_metrics field.
|
|
|
|
**TO ADD MORE TESTS:**
|
|
- Add TelemetryTestCase to TEST_CASES (line ~132)
|
|
- Reference CUSTOM_METRICS_BASE or CUSTOM_METRICS_STREAMING in expected_metrics
|
|
- See examples in existing test cases
|
|
|
|
**TO ADD NEW METRICS:**
|
|
- Add metric to otel.py
|
|
- Add metric name to CUSTOM_METRICS_BASE or CUSTOM_METRICS_STREAMING (line ~122)
|
|
- Update test cases that should validate it
|
|
"""
|
|
base_url = llama_stack_server["base_url"]
|
|
collector = llama_stack_server["collector"]
|
|
|
|
# Create test runner
|
|
runner = TelemetryTestRunner(base_url, collector)
|
|
|
|
# Execute all test cases (set verbose=False for cleaner output)
|
|
results = runner.run_all_test_cases(TEST_CASES, verbose=False)
|
|
|
|
print(f"\n{'=' * 50}\nTEST CASE SUMMARY\n{'=' * 50}")
|
|
passed = sum(1 for p in results.values() if p)
|
|
total = len(results)
|
|
print(f"Passed: {passed}/{total}\n")
|
|
|
|
failed = [name for name, ok in results.items() if not ok]
|
|
for name, ok in results.items():
|
|
print(f" {'[PASS]' if ok else '[FAIL]'} {name}")
|
|
|
|
print(f"{'=' * 50}\n")
|
|
assert not failed, f"Some test cases failed: {failed}"
|