fix(tests): metrics test improved to avoid race conditions

2025-12-03 18:00:36 +00:00 · 2025-10-30 13:37:41 -04:00 · 2025-10-30 13:37:41 -04:00 · 0a6c180631
commit 0a6c180631
parent 25051f1bf0
3 changed files with 15 additions and 5 deletions
--- a/tests/integration/telemetry/collectors/base.py
+++ b/tests/integration/telemetry/collectors/base.py
@ -168,6 +168,7 @@ class BaseTelemetryCollector:
        expected_count: int | None = None,
        timeout: float = 5.0,
        poll_interval: float = 0.05,
+        expect_model_id: str | None = None,
    ) -> dict[str, MetricStub]:
        """Get metrics with polling until metrics are available or timeout is reached."""

@ -175,6 +176,7 @@ class BaseTelemetryCollector:
        deadline = time.time() + timeout
        min_count = expected_count if expected_count is not None else 1
        accumulated_metrics = {}
+        count_metrics_with_model_id = 0

        while time.time() < deadline:
            current_metrics = self._snapshot_metrics()
@ -183,12 +185,21 @@ class BaseTelemetryCollector:
                    metric_name = metric.name
                    if metric_name not in accumulated_metrics:
                        accumulated_metrics[metric_name] = metric
+                        if (
+                            expect_model_id
+                            and metric.attributes
+                            and metric.attributes.get("model_id") == expect_model_id
+                        ):
+                            count_metrics_with_model_id += 1
                    else:
                        accumulated_metrics[metric_name] = metric

            # Check if we have enough metrics
            if len(accumulated_metrics) >= min_count:
-                return accumulated_metrics
+                if not expect_model_id:
+                    return accumulated_metrics
+                if count_metrics_with_model_id >= min_count:
+                    return accumulated_metrics

            time.sleep(poll_interval)

@ -346,6 +357,8 @@ class BaseTelemetryCollector:
        return None

    def clear(self) -> None:
+        # prevent race conditions between tests caused by 200ms metric collection interval
+        time.sleep(0.3)
        self._clear_impl()

    def _snapshot_spans(self) -> tuple[SpanStub, ...]:  # pragma: no cover - interface hook
--- a/tests/integration/telemetry/conftest.py
+++ b/tests/integration/telemetry/conftest.py
@ -7,7 +7,6 @@
 """Telemetry test configuration supporting both library and server test modes."""

 import os
-import time

 import pytest

@ -60,8 +59,6 @@ def llama_stack_client(telemetry_test_collector, request):
@pytest.fixture
 def mock_otlp_collector(telemetry_test_collector):
    """Provides access to telemetry data and clears between tests."""
-    # prevent race conditions between tests caused by 200ms metric collection interval
-    time.sleep(0.3)
    telemetry_test_collector.clear()
    try:
        yield telemetry_test_collector
--- a/tests/integration/telemetry/test_completions.py
+++ b/tests/integration/telemetry/test_completions.py
@ -109,7 +109,7 @@ def test_telemetry_format_completeness(mock_otlp_collector, llama_stack_client,

    # Verify token usage metrics in response using polling
    expected_metrics = ["completion_tokens", "total_tokens", "prompt_tokens"]
-    metrics = mock_otlp_collector.get_metrics(expected_count=len(expected_metrics))
+    metrics = mock_otlp_collector.get_metrics(expected_count=len(expected_metrics), expect_model_id=text_model_id)
    assert len(metrics) > 0, "No metrics found within timeout"

    # Filter metrics to only those from the specific model used in the request