fix(pr specific): passes pre-commit

2025-10-07 12:47:37 +00:00 · 2025-10-03 12:35:09 -04:00 · 2025-10-03 12:35:09 -04:00 · 2b7a765d02
commit 2b7a765d02
parent 4aa2dc110d
20 changed files with 547 additions and 516 deletions
--- a/tests/integration/telemetry/test_otel_e2e.py
+++ b/tests/integration/telemetry/test_otel_e2e.py
@ -34,7 +34,7 @@ import os
 import socket
 import subprocess
 import time
-from typing import Any, Dict, List
+from typing import Any

 import pytest
 import requests
@ -44,28 +44,28 @@ from pydantic import BaseModel, Field
 # Mock servers are in the mocking/ subdirectory
 from .mocking import (
    MockOTLPCollector,
-    MockVLLMServer,
    MockServerConfig,
+    MockVLLMServer,
    start_mock_servers_async,
    stop_mock_servers,
 )

-
 # ============================================================================
 # DATA MODELS
 # ============================================================================

+
 class TelemetryTestCase(BaseModel):
    """
    Pydantic model defining expected telemetry for an API call.
-    
+
    **TO ADD A NEW TEST CASE:** Add to TEST_CASES list below.
    """
-    
+
    name: str = Field(description="Unique test case identifier")
    http_method: str = Field(description="HTTP method (GET, POST, etc.)")
    api_path: str = Field(description="API path (e.g., '/v1/models')")
-    request_body: Dict[str, Any] | None = Field(default=None)
+    request_body: dict[str, Any] | None = Field(default=None)
    expected_http_status: int = Field(default=200)
    expected_trace_exports: int = Field(default=1, description="Minimum number of trace exports expected")
    expected_metric_exports: int = Field(default=0, description="Minimum number of metric exports expected")
@ -103,71 +103,74 @@ TEST_CASES = [
 # TEST INFRASTRUCTURE
 # ============================================================================

+
 class TelemetryTestRunner:
    """
    Executes TelemetryTestCase instances against real Llama Stack.
-    
+
    **HOW IT WORKS:**
    1. Makes real HTTP request to the stack
    2. Waits for telemetry export
    3. Verifies exports were sent to mock collector
    """
-    
+
    def __init__(self, base_url: str, collector: MockOTLPCollector):
        self.base_url = base_url
        self.collector = collector
-    
+
    def run_test_case(self, test_case: TelemetryTestCase, verbose: bool = False) -> bool:
        """Execute a single test case and verify telemetry."""
        initial_traces = self.collector.get_trace_count()
        initial_metrics = self.collector.get_metric_count()
-        
+
        if verbose:
            print(f"\n--- {test_case.name} ---")
            print(f"  {test_case.http_method} {test_case.api_path}")
-        
+
        # Make real HTTP request to Llama Stack
        try:
            url = f"{self.base_url}{test_case.api_path}"
-            
+
            if test_case.http_method == "GET":
                response = requests.get(url, timeout=5)
            elif test_case.http_method == "POST":
                response = requests.post(url, json=test_case.request_body or {}, timeout=5)
            else:
                response = requests.request(test_case.http_method, url, timeout=5)
-            
+
            if verbose:
                print(f"  HTTP Response: {response.status_code}")
-            
+
            status_match = response.status_code == test_case.expected_http_status
-            
+
        except requests.exceptions.RequestException as e:
            if verbose:
                print(f"  Request failed: {e}")
            status_match = False
-        
+
        # Wait for automatic instrumentation to export telemetry
        # Traces export immediately, metrics export every 1 second (configured via env var)
        time.sleep(2.0)  # Wait for both traces and metrics to export
-        
+
        # Verify traces were exported to mock collector
        new_traces = self.collector.get_trace_count() - initial_traces
        traces_exported = new_traces >= test_case.expected_trace_exports
-        
+
        # Verify metrics were exported (if expected)
        new_metrics = self.collector.get_metric_count() - initial_metrics
        metrics_exported = new_metrics >= test_case.expected_metric_exports
-        
+
        if verbose:
-            print(f"  Expected: >={test_case.expected_trace_exports} trace exports, >={test_case.expected_metric_exports} metric exports")
+            print(
+                f"  Expected: >={test_case.expected_trace_exports} trace exports, >={test_case.expected_metric_exports} metric exports"
+            )
            print(f"  Actual: {new_traces} trace exports, {new_metrics} metric exports")
            result = status_match and traces_exported and metrics_exported
            print(f"  Result: {'PASS' if result else 'FAIL'}")
-        
+
        return status_match and traces_exported and metrics_exported
-    
-    def run_all_test_cases(self, test_cases: List[TelemetryTestCase], verbose: bool = True) -> Dict[str, bool]:
+
+    def run_all_test_cases(self, test_cases: list[TelemetryTestCase], verbose: bool = True) -> dict[str, bool]:
        """Run all test cases and return results."""
        results = {}
        for test_case in test_cases:
@ -179,11 +182,12 @@ class TelemetryTestRunner:
 # HELPER FUNCTIONS
 # ============================================================================

+
 def is_port_available(port: int) -> bool:
    """Check if a TCP port is available for binding."""
    try:
        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
-            sock.bind(('localhost', port))
+            sock.bind(("localhost", port))
            return True
    except OSError:
        return False
@ -193,20 +197,21 @@ def is_port_available(port: int) -> bool:
 # PYTEST FIXTURES
 # ============================================================================

+
@pytest.fixture(scope="module")
 def mock_servers():
    """
    Fixture: Start all mock servers in parallel using async harness.
-    
+
    **TO ADD A NEW MOCK SERVER:**
    Just add a MockServerConfig to the MOCK_SERVERS list below.
    """
    import asyncio
-    
+
    # ========================================================================
    # MOCK SERVER CONFIGURATION
    # **TO ADD A NEW MOCK:** Just add a MockServerConfig instance below
-    # 
+    #
    # Example:
    #   MockServerConfig(
    #       name="Mock MyService",
@ -214,7 +219,7 @@ def mock_servers():
    #       init_kwargs={"port": 9000, "param": "value"},
    #   ),
    # ========================================================================
-    MOCK_SERVERS = [
+    mock_servers_config = [
        MockServerConfig(
            name="Mock OTLP Collector",
            server_class=MockOTLPCollector,
@ -230,17 +235,17 @@ def mock_servers():
        ),
        # Add more mock servers here - they will start in parallel automatically!
    ]
-    
+
    # Start all servers in parallel
-    servers = asyncio.run(start_mock_servers_async(MOCK_SERVERS))
-    
+    servers = asyncio.run(start_mock_servers_async(mock_servers_config))
+
    # Verify vLLM models
    models_response = requests.get("http://localhost:8000/v1/models", timeout=1)
    models_data = models_response.json()
    print(f"[INFO] Mock vLLM serving {len(models_data['data'])} models: {[m['id'] for m in models_data['data']]}")
-    
+
    yield servers
-    
+
    # Stop all servers
    stop_mock_servers(servers)

@ -261,22 +266,22 @@ def mock_vllm_server(mock_servers):
 def llama_stack_server(tmp_path_factory, mock_otlp_collector, mock_vllm_server):
    """
    Fixture: Start real Llama Stack server with automatic OTel instrumentation.
-    
+
    **THIS IS THE MAIN FIXTURE** - it runs:
        opentelemetry-instrument llama stack run --config run.yaml
-    
+
    **TO MODIFY STACK CONFIG:** Edit run_config dict below
    """
    config_dir = tmp_path_factory.mktemp("otel-stack-config")
-    
+
    # Ensure mock vLLM is ready and accessible before starting Llama Stack
-    print(f"\n[INFO] Verifying mock vLLM is accessible at http://localhost:8000...")
+    print("\n[INFO] Verifying mock vLLM is accessible at http://localhost:8000...")
    try:
        vllm_models = requests.get("http://localhost:8000/v1/models", timeout=2)
        print(f"[INFO] Mock vLLM models endpoint response: {vllm_models.status_code}")
    except Exception as e:
        pytest.fail(f"Mock vLLM not accessible before starting Llama Stack: {e}")
-    
+
    # Create run.yaml with inference provider
    # **TO ADD MORE PROVIDERS:** Add to providers dict
    run_config = {
@ -300,19 +305,19 @@ def llama_stack_server(tmp_path_factory, mock_otlp_collector, mock_vllm_server):
            }
        ],
    }
-    
+
    config_file = config_dir / "run.yaml"
    with open(config_file, "w") as f:
        yaml.dump(run_config, f)
-    
+
    # Find available port for Llama Stack
    port = 5555
    while not is_port_available(port) and port < 5600:
        port += 1
-    
+
    if port >= 5600:
        pytest.skip("No available ports for test server")
-    
+
    # Set environment variables for OTel instrumentation
    # NOTE: These only affect the subprocess, not other tests
    env = os.environ.copy()
@ -321,29 +326,32 @@ def llama_stack_server(tmp_path_factory, mock_otlp_collector, mock_vllm_server):
    env["OTEL_SERVICE_NAME"] = "llama-stack-e2e-test"
    env["LLAMA_STACK_PORT"] = str(port)
    env["OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED"] = "true"
-    
+
    # Configure fast metric export for testing (default is 60 seconds)
    # This makes metrics export every 500ms instead of every 60 seconds
    env["OTEL_METRIC_EXPORT_INTERVAL"] = "500"  # milliseconds
    env["OTEL_METRIC_EXPORT_TIMEOUT"] = "1000"  # milliseconds
-    
+
    # Disable inference recording to ensure real requests to our mock vLLM
    # This is critical - without this, Llama Stack replays cached responses
    # Safe to remove here as it only affects the subprocess environment
    if "LLAMA_STACK_TEST_INFERENCE_MODE" in env:
        del env["LLAMA_STACK_TEST_INFERENCE_MODE"]
-    
+
    # Start server with automatic instrumentation
    cmd = [
        "opentelemetry-instrument",  # ← Automatic instrumentation wrapper
-        "llama", "stack", "run",
+        "llama",
+        "stack",
+        "run",
        str(config_file),
-        "--port", str(port),
+        "--port",
+        str(port),
    ]
-    
+
    print(f"\n[INFO] Starting Llama Stack with OTel instrumentation on port {port}")
    print(f"[INFO] Command: {' '.join(cmd)}")
-    
+
    process = subprocess.Popen(
        cmd,
        env=env,
@ -351,11 +359,11 @@ def llama_stack_server(tmp_path_factory, mock_otlp_collector, mock_vllm_server):
        stderr=subprocess.PIPE,
        text=True,
    )
-    
+
    # Wait for server to start
    max_wait = 30
    base_url = f"http://localhost:{port}"
-    
+
    for i in range(max_wait):
        try:
            response = requests.get(f"{base_url}/v1/health", timeout=1)
@ -368,16 +376,16 @@ def llama_stack_server(tmp_path_factory, mock_otlp_collector, mock_vllm_server):
                stdout, stderr = process.communicate(timeout=5)
                pytest.fail(f"Server failed to start.\nStdout: {stdout}\nStderr: {stderr}")
            time.sleep(1)
-    
+
    yield {
-        'base_url': base_url,
-        'port': port,
-        'collector': mock_otlp_collector,
-        'vllm_server': mock_vllm_server,
+        "base_url": base_url,
+        "port": port,
+        "collector": mock_otlp_collector,
+        "vllm_server": mock_vllm_server,
    }
-    
+
    # Cleanup
-    print(f"\n[INFO] Stopping Llama Stack server")
+    print("\n[INFO] Stopping Llama Stack server")
    process.terminate()
    try:
        process.wait(timeout=5)
@ -391,26 +399,27 @@ def llama_stack_server(tmp_path_factory, mock_otlp_collector, mock_vllm_server):
 # **TO ADD NEW E2E TESTS:** Add methods to this class
 # ============================================================================

+
@pytest.mark.slow
 class TestOTelE2E:
    """
    End-to-end tests with real Llama Stack server.
-    
+
    These tests verify the complete flow:
    - Real Llama Stack with opentelemetry-instrument
    - Real API calls
    - Real automatic instrumentation
    - Mock OTLP collector captures exports
    """
-    
+
    def test_server_starts_with_auto_instrumentation(self, llama_stack_server):
        """Verify server starts successfully with opentelemetry-instrument."""
-        base_url = llama_stack_server['base_url']
-        
+        base_url = llama_stack_server["base_url"]
+
        # Try different health check endpoints
        health_endpoints = ["/health", "/v1/health", "/"]
        server_responding = False
-        
+
        for endpoint in health_endpoints:
            try:
                response = requests.get(f"{base_url}{endpoint}", timeout=5)
@ -420,36 +429,36 @@ class TestOTelE2E:
                    break
            except Exception as e:
                print(f"[DEBUG] {endpoint} failed: {e}")
-        
+
        assert server_responding, f"Server not responding on any endpoint at {base_url}"
-        
+
        print(f"\n[PASS] Llama Stack running with OTel at {base_url}")
-    
+
    def test_all_test_cases_via_runner(self, llama_stack_server):
        """
        **MAIN TEST:** Run all TelemetryTestCase instances.
-        
+
        This executes all test cases defined in TEST_CASES list.
        **TO ADD MORE TESTS:** Add to TEST_CASES at top of file
        """
-        base_url = llama_stack_server['base_url']
-        collector = llama_stack_server['collector']
-        
+        base_url = llama_stack_server["base_url"]
+        collector = llama_stack_server["collector"]
+
        # Create test runner
        runner = TelemetryTestRunner(base_url, collector)
-        
+
        # Execute all test cases
        results = runner.run_all_test_cases(TEST_CASES, verbose=True)
-        
+
        # Print summary
-        print(f"\n{'='*50}")
-        print(f"TEST CASE SUMMARY")
-        print(f"{'='*50}")
+        print(f"\n{'=' * 50}")
+        print("TEST CASE SUMMARY")
+        print(f"{'=' * 50}")
        passed = sum(1 for p in results.values() if p)
        total = len(results)
        print(f"Passed: {passed}/{total}\n")
-        
+
        for name, result in results.items():
            status = "[PASS]" if result else "[FAIL]"
            print(f"  {status} {name}")
-        print(f"{'='*50}\n")
+        print(f"{'=' * 50}\n")