From 0a6c180631b69554674f6fe08b32f8bc7720d1d9 Mon Sep 17 00:00:00 2001
From: Emilio Garcia <i.am.emilio@gmail.com>
Date: Thu, 30 Oct 2025 13:37:41 -0400
Subject: [PATCH] fix(tests): metrics test improved to avoid race conditions

---
 tests/integration/telemetry/collectors/base.py  | 15 ++++++++++++++-
 tests/integration/telemetry/conftest.py         |  3 ---
 tests/integration/telemetry/test_completions.py |  2 +-
 3 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/tests/integration/telemetry/collectors/base.py b/tests/integration/telemetry/collectors/base.py
index 963da5b8e..50580ce05 100644
--- a/tests/integration/telemetry/collectors/base.py
+++ b/tests/integration/telemetry/collectors/base.py
@@ -168,6 +168,7 @@ class BaseTelemetryCollector:
         expected_count: int | None = None,
         timeout: float = 5.0,
         poll_interval: float = 0.05,
+        expect_model_id: str | None = None,
     ) -> dict[str, MetricStub]:
         """Get metrics with polling until metrics are available or timeout is reached."""
 
@@ -175,6 +176,7 @@ class BaseTelemetryCollector:
         deadline = time.time() + timeout
         min_count = expected_count if expected_count is not None else 1
         accumulated_metrics = {}
+        count_metrics_with_model_id = 0
 
         while time.time() < deadline:
             current_metrics = self._snapshot_metrics()
@@ -183,12 +185,21 @@ class BaseTelemetryCollector:
                     metric_name = metric.name
                     if metric_name not in accumulated_metrics:
                         accumulated_metrics[metric_name] = metric
+                        if (
+                            expect_model_id
+                            and metric.attributes
+                            and metric.attributes.get("model_id") == expect_model_id
+                        ):
+                            count_metrics_with_model_id += 1
                     else:
                         accumulated_metrics[metric_name] = metric
 
             # Check if we have enough metrics
             if len(accumulated_metrics) >= min_count:
-                return accumulated_metrics
+                if not expect_model_id:
+                    return accumulated_metrics
+                if count_metrics_with_model_id >= min_count:
+                    return accumulated_metrics
 
             time.sleep(poll_interval)
 
@@ -346,6 +357,8 @@ class BaseTelemetryCollector:
         return None
 
     def clear(self) -> None:
+        # prevent race conditions between tests caused by 200ms metric collection interval
+        time.sleep(0.3)
         self._clear_impl()
 
     def _snapshot_spans(self) -> tuple[SpanStub, ...]:  # pragma: no cover - interface hook
diff --git a/tests/integration/telemetry/conftest.py b/tests/integration/telemetry/conftest.py
index d6ed31412..fd9224ae4 100644
--- a/tests/integration/telemetry/conftest.py
+++ b/tests/integration/telemetry/conftest.py
@@ -7,7 +7,6 @@
 """Telemetry test configuration supporting both library and server test modes."""
 
 import os
-import time
 
 import pytest
 
@@ -60,8 +59,6 @@ def llama_stack_client(telemetry_test_collector, request):
 @pytest.fixture
 def mock_otlp_collector(telemetry_test_collector):
     """Provides access to telemetry data and clears between tests."""
-    # prevent race conditions between tests caused by 200ms metric collection interval
-    time.sleep(0.3)
     telemetry_test_collector.clear()
     try:
         yield telemetry_test_collector
diff --git a/tests/integration/telemetry/test_completions.py b/tests/integration/telemetry/test_completions.py
index d1b97ef34..695f0c036 100644
--- a/tests/integration/telemetry/test_completions.py
+++ b/tests/integration/telemetry/test_completions.py
@@ -109,7 +109,7 @@ def test_telemetry_format_completeness(mock_otlp_collector, llama_stack_client,
 
     # Verify token usage metrics in response using polling
     expected_metrics = ["completion_tokens", "total_tokens", "prompt_tokens"]
-    metrics = mock_otlp_collector.get_metrics(expected_count=len(expected_metrics))
+    metrics = mock_otlp_collector.get_metrics(expected_count=len(expected_metrics), expect_model_id=text_model_id)
     assert len(metrics) > 0, "No metrics found within timeout"
 
     # Filter metrics to only those from the specific model used in the request