fix(test): telemetry otlp fixture prevents race conditions contaminating test data

This commit is contained in:
Emilio Garcia 2025-10-30 14:09:44 -04:00
parent 0a6c180631
commit 661bd28e86
2 changed files with 30 additions and 2 deletions

View file

@ -357,8 +357,6 @@ class BaseTelemetryCollector:
return None
def clear(self) -> None:
# prevent race conditions between tests caused by 200ms metric collection interval
time.sleep(0.3)
self._clear_impl()
def _snapshot_spans(self) -> tuple[SpanStub, ...]: # pragma: no cover - interface hook

View file

@ -9,6 +9,7 @@
import gzip
import os
import threading
import time
from http.server import BaseHTTPRequestHandler, HTTPServer
from socketserver import ThreadingMixIn
@ -78,6 +79,35 @@ class OtlpHttpTestCollector(BaseTelemetryCollector):
return tuple(self._metrics) if self._metrics else None
def _clear_impl(self) -> None:
"""Clear telemetry over a period of time to prevent race conditions between tests."""
with self._lock:
self._spans.clear()
self._metrics.clear()
# Prevent race conditions where telemetry arrives after clear() but before
# the test starts, causing contamination between tests
deadline = time.time() + 2.0 # Maximum wait time
last_span_count = 0
last_metric_count = 0
stable_iterations = 0
while time.time() < deadline:
with self._lock:
current_span_count = len(self._spans)
current_metric_count = len(self._metrics)
if current_span_count == last_span_count and current_metric_count == last_metric_count:
stable_iterations += 1
if stable_iterations >= 4: # 4 * 50ms = 200ms of stability
break
else:
stable_iterations = 0
last_span_count = current_span_count
last_metric_count = current_metric_count
time.sleep(0.05)
# Final clear to remove any telemetry that arrived during stabilization
with self._lock:
self._spans.clear()
self._metrics.clear()