From 0a6c180631b69554674f6fe08b32f8bc7720d1d9 Mon Sep 17 00:00:00 2001 From: Emilio Garcia Date: Thu, 30 Oct 2025 13:37:41 -0400 Subject: [PATCH] fix(tests): metrics test improved to avoid race conditions --- tests/integration/telemetry/collectors/base.py | 15 ++++++++++++++- tests/integration/telemetry/conftest.py | 3 --- tests/integration/telemetry/test_completions.py | 2 +- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/tests/integration/telemetry/collectors/base.py b/tests/integration/telemetry/collectors/base.py index 963da5b8e..50580ce05 100644 --- a/tests/integration/telemetry/collectors/base.py +++ b/tests/integration/telemetry/collectors/base.py @@ -168,6 +168,7 @@ class BaseTelemetryCollector: expected_count: int | None = None, timeout: float = 5.0, poll_interval: float = 0.05, + expect_model_id: str | None = None, ) -> dict[str, MetricStub]: """Get metrics with polling until metrics are available or timeout is reached.""" @@ -175,6 +176,7 @@ class BaseTelemetryCollector: deadline = time.time() + timeout min_count = expected_count if expected_count is not None else 1 accumulated_metrics = {} + count_metrics_with_model_id = 0 while time.time() < deadline: current_metrics = self._snapshot_metrics() @@ -183,12 +185,21 @@ class BaseTelemetryCollector: metric_name = metric.name if metric_name not in accumulated_metrics: accumulated_metrics[metric_name] = metric + if ( + expect_model_id + and metric.attributes + and metric.attributes.get("model_id") == expect_model_id + ): + count_metrics_with_model_id += 1 else: accumulated_metrics[metric_name] = metric # Check if we have enough metrics if len(accumulated_metrics) >= min_count: - return accumulated_metrics + if not expect_model_id: + return accumulated_metrics + if count_metrics_with_model_id >= min_count: + return accumulated_metrics time.sleep(poll_interval) @@ -346,6 +357,8 @@ class BaseTelemetryCollector: return None def clear(self) -> None: + # prevent race conditions between tests caused by 200ms metric collection interval + time.sleep(0.3) self._clear_impl() def _snapshot_spans(self) -> tuple[SpanStub, ...]: # pragma: no cover - interface hook diff --git a/tests/integration/telemetry/conftest.py b/tests/integration/telemetry/conftest.py index d6ed31412..fd9224ae4 100644 --- a/tests/integration/telemetry/conftest.py +++ b/tests/integration/telemetry/conftest.py @@ -7,7 +7,6 @@ """Telemetry test configuration supporting both library and server test modes.""" import os -import time import pytest @@ -60,8 +59,6 @@ def llama_stack_client(telemetry_test_collector, request): @pytest.fixture def mock_otlp_collector(telemetry_test_collector): """Provides access to telemetry data and clears between tests.""" - # prevent race conditions between tests caused by 200ms metric collection interval - time.sleep(0.3) telemetry_test_collector.clear() try: yield telemetry_test_collector diff --git a/tests/integration/telemetry/test_completions.py b/tests/integration/telemetry/test_completions.py index d1b97ef34..695f0c036 100644 --- a/tests/integration/telemetry/test_completions.py +++ b/tests/integration/telemetry/test_completions.py @@ -109,7 +109,7 @@ def test_telemetry_format_completeness(mock_otlp_collector, llama_stack_client, # Verify token usage metrics in response using polling expected_metrics = ["completion_tokens", "total_tokens", "prompt_tokens"] - metrics = mock_otlp_collector.get_metrics(expected_count=len(expected_metrics)) + metrics = mock_otlp_collector.get_metrics(expected_count=len(expected_metrics), expect_model_id=text_model_id) assert len(metrics) > 0, "No metrics found within timeout" # Filter metrics to only those from the specific model used in the request