mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-09 21:18:38 +00:00
feat(telemetry:major): End to End Testing, Metric Capture, SQL Alchemy Injection
This commit is contained in:
parent
9a0294ab4f
commit
4aa2dc110d
19 changed files with 1854 additions and 881 deletions
|
@ -5,10 +5,12 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
import concurrent.futures
|
||||
import threading
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
from opentelemetry import trace
|
||||
from opentelemetry.metrics import Meter
|
||||
from opentelemetry.trace import Tracer
|
||||
|
||||
from llama_stack.providers.inline.telemetry.otel.config import OTelTelemetryConfig
|
||||
from llama_stack.providers.inline.telemetry.otel.otel import OTelTelemetryProvider
|
||||
|
@ -43,12 +45,6 @@ class TestOTelTelemetryProviderInitialization:
|
|||
provider = OTelTelemetryProvider(config=otel_config)
|
||||
|
||||
assert provider.config == otel_config
|
||||
assert hasattr(provider, "_lock")
|
||||
assert provider._lock is not None
|
||||
assert isinstance(provider._counters, dict)
|
||||
assert isinstance(provider._histograms, dict)
|
||||
assert isinstance(provider._up_down_counters, dict)
|
||||
assert isinstance(provider._gauges, dict)
|
||||
|
||||
def test_initialization_sets_service_attributes(self, otel_config, monkeypatch):
|
||||
"""Test that service attributes are properly configured."""
|
||||
|
@ -88,228 +84,309 @@ class TestOTelTelemetryProviderInitialization:
|
|||
assert any("Metrics will not be exported" in record.message for record in caplog.records)
|
||||
|
||||
|
||||
class TestOTelTelemetryProviderMetrics:
|
||||
"""Tests for metric recording functionality."""
|
||||
class TestOTelTelemetryProviderTracerAPI:
|
||||
"""Tests for the get_tracer() API."""
|
||||
|
||||
def test_record_count_creates_counter(self, otel_provider):
|
||||
"""Test that record_count creates a counter on first call."""
|
||||
assert "test_counter" not in otel_provider._counters
|
||||
def test_get_tracer_returns_tracer(self, otel_provider):
|
||||
"""Test that get_tracer returns a valid Tracer instance."""
|
||||
tracer = otel_provider.get_tracer("test.module")
|
||||
|
||||
otel_provider.record_count("test_counter", 1.0)
|
||||
|
||||
assert "test_counter" in otel_provider._counters
|
||||
assert otel_provider._counters["test_counter"] is not None
|
||||
assert tracer is not None
|
||||
assert isinstance(tracer, Tracer)
|
||||
|
||||
def test_record_count_reuses_counter(self, otel_provider):
|
||||
"""Test that record_count reuses existing counter."""
|
||||
otel_provider.record_count("test_counter", 1.0)
|
||||
first_counter = otel_provider._counters["test_counter"]
|
||||
|
||||
otel_provider.record_count("test_counter", 2.0)
|
||||
second_counter = otel_provider._counters["test_counter"]
|
||||
|
||||
assert first_counter is second_counter
|
||||
assert len(otel_provider._counters) == 1
|
||||
|
||||
def test_record_count_with_attributes(self, otel_provider):
|
||||
"""Test that record_count works with attributes."""
|
||||
otel_provider.record_count(
|
||||
"test_counter",
|
||||
1.0,
|
||||
attributes={"key": "value", "env": "test"}
|
||||
def test_get_tracer_with_version(self, otel_provider):
|
||||
"""Test that get_tracer works with version parameter."""
|
||||
tracer = otel_provider.get_tracer(
|
||||
instrumenting_module_name="test.module",
|
||||
instrumenting_library_version="1.0.0"
|
||||
)
|
||||
|
||||
assert "test_counter" in otel_provider._counters
|
||||
assert tracer is not None
|
||||
assert isinstance(tracer, Tracer)
|
||||
|
||||
def test_record_histogram_creates_histogram(self, otel_provider):
|
||||
"""Test that record_histogram creates a histogram on first call."""
|
||||
assert "test_histogram" not in otel_provider._histograms
|
||||
|
||||
otel_provider.record_histogram("test_histogram", 42.5)
|
||||
|
||||
assert "test_histogram" in otel_provider._histograms
|
||||
assert otel_provider._histograms["test_histogram"] is not None
|
||||
|
||||
def test_record_histogram_reuses_histogram(self, otel_provider):
|
||||
"""Test that record_histogram reuses existing histogram."""
|
||||
otel_provider.record_histogram("test_histogram", 10.0)
|
||||
first_histogram = otel_provider._histograms["test_histogram"]
|
||||
|
||||
otel_provider.record_histogram("test_histogram", 20.0)
|
||||
second_histogram = otel_provider._histograms["test_histogram"]
|
||||
|
||||
assert first_histogram is second_histogram
|
||||
assert len(otel_provider._histograms) == 1
|
||||
|
||||
def test_record_histogram_with_bucket_boundaries(self, otel_provider):
|
||||
"""Test that record_histogram works with explicit bucket boundaries."""
|
||||
boundaries = [0.0, 10.0, 50.0, 100.0]
|
||||
|
||||
otel_provider.record_histogram(
|
||||
"test_histogram",
|
||||
25.0,
|
||||
explicit_bucket_boundaries_advisory=boundaries
|
||||
def test_get_tracer_with_attributes(self, otel_provider):
|
||||
"""Test that get_tracer works with attributes."""
|
||||
tracer = otel_provider.get_tracer(
|
||||
instrumenting_module_name="test.module",
|
||||
attributes={"component": "test", "tier": "backend"}
|
||||
)
|
||||
|
||||
assert "test_histogram" in otel_provider._histograms
|
||||
assert tracer is not None
|
||||
assert isinstance(tracer, Tracer)
|
||||
|
||||
def test_record_up_down_counter_creates_counter(self, otel_provider):
|
||||
"""Test that record_up_down_counter creates a counter on first call."""
|
||||
assert "test_updown" not in otel_provider._up_down_counters
|
||||
def test_get_tracer_with_schema_url(self, otel_provider):
|
||||
"""Test that get_tracer works with schema URL."""
|
||||
tracer = otel_provider.get_tracer(
|
||||
instrumenting_module_name="test.module",
|
||||
schema_url="https://example.com/schema"
|
||||
)
|
||||
|
||||
otel_provider.record_up_down_counter("test_updown", 1.0)
|
||||
|
||||
assert "test_updown" in otel_provider._up_down_counters
|
||||
assert otel_provider._up_down_counters["test_updown"] is not None
|
||||
assert tracer is not None
|
||||
assert isinstance(tracer, Tracer)
|
||||
|
||||
def test_record_up_down_counter_reuses_counter(self, otel_provider):
|
||||
"""Test that record_up_down_counter reuses existing counter."""
|
||||
otel_provider.record_up_down_counter("test_updown", 5.0)
|
||||
first_counter = otel_provider._up_down_counters["test_updown"]
|
||||
def test_tracer_can_create_spans(self, otel_provider):
|
||||
"""Test that tracer can create spans."""
|
||||
tracer = otel_provider.get_tracer("test.module")
|
||||
|
||||
otel_provider.record_up_down_counter("test_updown", -3.0)
|
||||
second_counter = otel_provider._up_down_counters["test_updown"]
|
||||
|
||||
assert first_counter is second_counter
|
||||
assert len(otel_provider._up_down_counters) == 1
|
||||
with tracer.start_as_current_span("test.operation") as span:
|
||||
assert span is not None
|
||||
assert span.is_recording()
|
||||
|
||||
def test_multiple_metrics_with_different_names(self, otel_provider):
|
||||
"""Test that multiple metrics with different names are cached separately."""
|
||||
otel_provider.record_count("counter1", 1.0)
|
||||
otel_provider.record_count("counter2", 2.0)
|
||||
otel_provider.record_histogram("histogram1", 10.0)
|
||||
otel_provider.record_up_down_counter("updown1", 5.0)
|
||||
def test_tracer_can_create_spans_with_attributes(self, otel_provider):
|
||||
"""Test that tracer can create spans with attributes."""
|
||||
tracer = otel_provider.get_tracer("test.module")
|
||||
|
||||
assert len(otel_provider._counters) == 2
|
||||
assert len(otel_provider._histograms) == 1
|
||||
assert len(otel_provider._up_down_counters) == 1
|
||||
with tracer.start_as_current_span(
|
||||
"test.operation",
|
||||
attributes={"user.id": "123", "request.id": "abc"}
|
||||
) as span:
|
||||
assert span is not None
|
||||
assert span.is_recording()
|
||||
|
||||
def test_multiple_tracers_can_coexist(self, otel_provider):
|
||||
"""Test that multiple tracers can be created."""
|
||||
tracer1 = otel_provider.get_tracer("module.one")
|
||||
tracer2 = otel_provider.get_tracer("module.two")
|
||||
|
||||
assert tracer1 is not None
|
||||
assert tracer2 is not None
|
||||
# Tracers with different names might be the same instance or different
|
||||
# depending on OTel implementation, so just verify both work
|
||||
with tracer1.start_as_current_span("op1") as span1:
|
||||
assert span1.is_recording()
|
||||
with tracer2.start_as_current_span("op2") as span2:
|
||||
assert span2.is_recording()
|
||||
|
||||
|
||||
class TestOTelTelemetryProviderThreadSafety:
|
||||
"""Tests for thread safety of metric operations."""
|
||||
class TestOTelTelemetryProviderMeterAPI:
|
||||
"""Tests for the get_meter() API."""
|
||||
|
||||
def test_concurrent_counter_creation_same_name(self, otel_provider):
|
||||
"""Test that concurrent calls to record_count with same name are thread-safe."""
|
||||
num_threads = 50
|
||||
counter_name = "concurrent_counter"
|
||||
def test_get_meter_returns_meter(self, otel_provider):
|
||||
"""Test that get_meter returns a valid Meter instance."""
|
||||
meter = otel_provider.get_meter("test.meter")
|
||||
|
||||
def record_metric():
|
||||
otel_provider.record_count(counter_name, 1.0)
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
|
||||
futures = [executor.submit(record_metric) for _ in range(num_threads)]
|
||||
concurrent.futures.wait(futures)
|
||||
|
||||
# Should have exactly one counter created despite concurrent access
|
||||
assert len(otel_provider._counters) == 1
|
||||
assert counter_name in otel_provider._counters
|
||||
assert meter is not None
|
||||
assert isinstance(meter, Meter)
|
||||
|
||||
def test_concurrent_histogram_creation_same_name(self, otel_provider):
|
||||
"""Test that concurrent calls to record_histogram with same name are thread-safe."""
|
||||
num_threads = 50
|
||||
histogram_name = "concurrent_histogram"
|
||||
def test_get_meter_with_version(self, otel_provider):
|
||||
"""Test that get_meter works with version parameter."""
|
||||
meter = otel_provider.get_meter(
|
||||
name="test.meter",
|
||||
version="1.0.0"
|
||||
)
|
||||
|
||||
def record_metric():
|
||||
thread_id = threading.current_thread().ident or 0
|
||||
otel_provider.record_histogram(histogram_name, float(thread_id % 100))
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
|
||||
futures = [executor.submit(record_metric) for _ in range(num_threads)]
|
||||
concurrent.futures.wait(futures)
|
||||
|
||||
# Should have exactly one histogram created despite concurrent access
|
||||
assert len(otel_provider._histograms) == 1
|
||||
assert histogram_name in otel_provider._histograms
|
||||
assert meter is not None
|
||||
assert isinstance(meter, Meter)
|
||||
|
||||
def test_concurrent_up_down_counter_creation_same_name(self, otel_provider):
|
||||
"""Test that concurrent calls to record_up_down_counter with same name are thread-safe."""
|
||||
num_threads = 50
|
||||
counter_name = "concurrent_updown"
|
||||
def test_get_meter_with_attributes(self, otel_provider):
|
||||
"""Test that get_meter works with attributes."""
|
||||
meter = otel_provider.get_meter(
|
||||
name="test.meter",
|
||||
attributes={"service": "test", "env": "dev"}
|
||||
)
|
||||
|
||||
def record_metric():
|
||||
otel_provider.record_up_down_counter(counter_name, 1.0)
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
|
||||
futures = [executor.submit(record_metric) for _ in range(num_threads)]
|
||||
concurrent.futures.wait(futures)
|
||||
|
||||
# Should have exactly one counter created despite concurrent access
|
||||
assert len(otel_provider._up_down_counters) == 1
|
||||
assert counter_name in otel_provider._up_down_counters
|
||||
assert meter is not None
|
||||
assert isinstance(meter, Meter)
|
||||
|
||||
def test_concurrent_mixed_metrics_different_names(self, otel_provider):
|
||||
"""Test concurrent creation of different metric types with different names."""
|
||||
num_threads = 30
|
||||
def test_get_meter_with_schema_url(self, otel_provider):
|
||||
"""Test that get_meter works with schema URL."""
|
||||
meter = otel_provider.get_meter(
|
||||
name="test.meter",
|
||||
schema_url="https://example.com/schema"
|
||||
)
|
||||
|
||||
def record_counters(thread_id):
|
||||
otel_provider.record_count(f"counter_{thread_id}", 1.0)
|
||||
assert meter is not None
|
||||
assert isinstance(meter, Meter)
|
||||
|
||||
def test_meter_can_create_counter(self, otel_provider):
|
||||
"""Test that meter can create counters."""
|
||||
meter = otel_provider.get_meter("test.meter")
|
||||
|
||||
def record_histograms(thread_id):
|
||||
otel_provider.record_histogram(f"histogram_{thread_id}", float(thread_id))
|
||||
counter = meter.create_counter(
|
||||
"test.requests.total",
|
||||
unit="requests",
|
||||
description="Total requests"
|
||||
)
|
||||
|
||||
def record_up_down_counters(thread_id):
|
||||
otel_provider.record_up_down_counter(f"updown_{thread_id}", float(thread_id))
|
||||
assert counter is not None
|
||||
# Test that counter can be used
|
||||
counter.add(1, {"endpoint": "/test"})
|
||||
|
||||
def test_meter_can_create_histogram(self, otel_provider):
|
||||
"""Test that meter can create histograms."""
|
||||
meter = otel_provider.get_meter("test.meter")
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads * 3) as executor:
|
||||
futures = []
|
||||
for i in range(num_threads):
|
||||
futures.append(executor.submit(record_counters, i))
|
||||
futures.append(executor.submit(record_histograms, i))
|
||||
futures.append(executor.submit(record_up_down_counters, i))
|
||||
histogram = meter.create_histogram(
|
||||
"test.request.duration",
|
||||
unit="ms",
|
||||
description="Request duration"
|
||||
)
|
||||
|
||||
assert histogram is not None
|
||||
# Test that histogram can be used
|
||||
histogram.record(42.5, {"method": "GET"})
|
||||
|
||||
def test_meter_can_create_up_down_counter(self, otel_provider):
|
||||
"""Test that meter can create up/down counters."""
|
||||
meter = otel_provider.get_meter("test.meter")
|
||||
|
||||
up_down_counter = meter.create_up_down_counter(
|
||||
"test.active.connections",
|
||||
unit="connections",
|
||||
description="Active connections"
|
||||
)
|
||||
|
||||
assert up_down_counter is not None
|
||||
# Test that up/down counter can be used
|
||||
up_down_counter.add(5)
|
||||
up_down_counter.add(-2)
|
||||
|
||||
def test_meter_can_create_observable_gauge(self, otel_provider):
|
||||
"""Test that meter can create observable gauges."""
|
||||
meter = otel_provider.get_meter("test.meter")
|
||||
|
||||
def gauge_callback(options):
|
||||
return [{"attributes": {"host": "localhost"}, "value": 42.0}]
|
||||
|
||||
gauge = meter.create_observable_gauge(
|
||||
"test.memory.usage",
|
||||
callbacks=[gauge_callback],
|
||||
unit="bytes",
|
||||
description="Memory usage"
|
||||
)
|
||||
|
||||
assert gauge is not None
|
||||
|
||||
def test_multiple_instruments_from_same_meter(self, otel_provider):
|
||||
"""Test that a meter can create multiple instruments."""
|
||||
meter = otel_provider.get_meter("test.meter")
|
||||
|
||||
counter = meter.create_counter("test.counter")
|
||||
histogram = meter.create_histogram("test.histogram")
|
||||
up_down_counter = meter.create_up_down_counter("test.gauge")
|
||||
|
||||
assert counter is not None
|
||||
assert histogram is not None
|
||||
assert up_down_counter is not None
|
||||
|
||||
# Verify they all work
|
||||
counter.add(1)
|
||||
histogram.record(10.0)
|
||||
up_down_counter.add(5)
|
||||
|
||||
|
||||
class TestOTelTelemetryProviderNativeUsage:
|
||||
"""Tests for native OpenTelemetry usage patterns."""
|
||||
|
||||
def test_complete_tracing_workflow(self, otel_provider):
|
||||
"""Test a complete tracing workflow using native OTel API."""
|
||||
tracer = otel_provider.get_tracer("llama_stack.inference")
|
||||
|
||||
# Create parent span
|
||||
with tracer.start_as_current_span("inference.request") as parent_span:
|
||||
parent_span.set_attribute("model", "llama-3.2-1b")
|
||||
parent_span.set_attribute("user", "test-user")
|
||||
|
||||
# Create child span
|
||||
with tracer.start_as_current_span("model.load") as child_span:
|
||||
child_span.set_attribute("model.size", "1B")
|
||||
assert child_span.is_recording()
|
||||
|
||||
# Create another child span
|
||||
with tracer.start_as_current_span("inference.execute") as child_span:
|
||||
child_span.set_attribute("tokens.input", 25)
|
||||
child_span.set_attribute("tokens.output", 150)
|
||||
assert child_span.is_recording()
|
||||
|
||||
assert parent_span.is_recording()
|
||||
|
||||
def test_complete_metrics_workflow(self, otel_provider):
|
||||
"""Test a complete metrics workflow using native OTel API."""
|
||||
meter = otel_provider.get_meter("llama_stack.metrics")
|
||||
|
||||
# Create various instruments
|
||||
request_counter = meter.create_counter(
|
||||
"llama.requests.total",
|
||||
unit="requests",
|
||||
description="Total requests"
|
||||
)
|
||||
|
||||
latency_histogram = meter.create_histogram(
|
||||
"llama.inference.duration",
|
||||
unit="ms",
|
||||
description="Inference duration"
|
||||
)
|
||||
|
||||
active_sessions = meter.create_up_down_counter(
|
||||
"llama.sessions.active",
|
||||
unit="sessions",
|
||||
description="Active sessions"
|
||||
)
|
||||
|
||||
# Use the instruments
|
||||
request_counter.add(1, {"endpoint": "/chat", "status": "success"})
|
||||
latency_histogram.record(123.45, {"model": "llama-3.2-1b"})
|
||||
active_sessions.add(1)
|
||||
active_sessions.add(-1)
|
||||
|
||||
# No exceptions means success
|
||||
|
||||
def test_concurrent_tracer_usage(self, otel_provider):
|
||||
"""Test that multiple threads can use tracers concurrently."""
|
||||
def create_spans(thread_id):
|
||||
tracer = otel_provider.get_tracer(f"test.module.{thread_id}")
|
||||
for i in range(10):
|
||||
with tracer.start_as_current_span(f"operation.{i}") as span:
|
||||
span.set_attribute("thread.id", thread_id)
|
||||
span.set_attribute("iteration", i)
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
|
||||
futures = [executor.submit(create_spans, i) for i in range(10)]
|
||||
concurrent.futures.wait(futures)
|
||||
|
||||
# Each thread should have created its own metric
|
||||
assert len(otel_provider._counters) == num_threads
|
||||
assert len(otel_provider._histograms) == num_threads
|
||||
assert len(otel_provider._up_down_counters) == num_threads
|
||||
# If we get here without exceptions, thread safety is working
|
||||
|
||||
def test_concurrent_access_existing_and_new_metrics(self, otel_provider):
|
||||
"""Test concurrent access mixing existing and new metric creation."""
|
||||
# Pre-create some metrics
|
||||
otel_provider.record_count("existing_counter", 1.0)
|
||||
otel_provider.record_histogram("existing_histogram", 10.0)
|
||||
def test_concurrent_meter_usage(self, otel_provider):
|
||||
"""Test that multiple threads can use meters concurrently."""
|
||||
def record_metrics(thread_id):
|
||||
meter = otel_provider.get_meter(f"test.meter.{thread_id}")
|
||||
counter = meter.create_counter(f"test.counter.{thread_id}")
|
||||
histogram = meter.create_histogram(f"test.histogram.{thread_id}")
|
||||
|
||||
for i in range(10):
|
||||
counter.add(1, {"thread": str(thread_id)})
|
||||
histogram.record(float(i * 10), {"thread": str(thread_id)})
|
||||
|
||||
num_threads = 40
|
||||
|
||||
def mixed_operations(thread_id):
|
||||
# Half the threads use existing metrics, half create new ones
|
||||
if thread_id % 2 == 0:
|
||||
otel_provider.record_count("existing_counter", 1.0)
|
||||
otel_provider.record_histogram("existing_histogram", float(thread_id))
|
||||
else:
|
||||
otel_provider.record_count(f"new_counter_{thread_id}", 1.0)
|
||||
otel_provider.record_histogram(f"new_histogram_{thread_id}", float(thread_id))
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
|
||||
futures = [executor.submit(mixed_operations, i) for i in range(num_threads)]
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
|
||||
futures = [executor.submit(record_metrics, i) for i in range(10)]
|
||||
concurrent.futures.wait(futures)
|
||||
|
||||
# Should have existing metrics plus half of num_threads new ones
|
||||
expected_new_counters = num_threads // 2
|
||||
expected_new_histograms = num_threads // 2
|
||||
# If we get here without exceptions, thread safety is working
|
||||
|
||||
def test_mixed_tracing_and_metrics(self, otel_provider):
|
||||
"""Test using both tracing and metrics together."""
|
||||
tracer = otel_provider.get_tracer("test.module")
|
||||
meter = otel_provider.get_meter("test.meter")
|
||||
|
||||
assert len(otel_provider._counters) == 1 + expected_new_counters
|
||||
assert len(otel_provider._histograms) == 1 + expected_new_histograms
|
||||
counter = meter.create_counter("operations.count")
|
||||
histogram = meter.create_histogram("operation.duration")
|
||||
|
||||
# Trace an operation while recording metrics
|
||||
with tracer.start_as_current_span("test.operation") as span:
|
||||
counter.add(1)
|
||||
span.set_attribute("step", "start")
|
||||
|
||||
histogram.record(50.0)
|
||||
span.set_attribute("step", "processing")
|
||||
|
||||
counter.add(1)
|
||||
span.set_attribute("step", "complete")
|
||||
|
||||
# No exceptions means success
|
||||
|
||||
|
||||
class TestOTelTelemetryProviderTracing:
|
||||
"""Tests for tracing functionality."""
|
||||
|
||||
def test_custom_trace_creates_span(self, otel_provider):
|
||||
"""Test that custom_trace creates a span."""
|
||||
span = otel_provider.custom_trace("test_span")
|
||||
|
||||
assert span is not None
|
||||
assert hasattr(span, "get_span_context")
|
||||
|
||||
def test_custom_trace_with_attributes(self, otel_provider):
|
||||
"""Test that custom_trace works with attributes."""
|
||||
attributes = {"key": "value", "operation": "test"}
|
||||
|
||||
span = otel_provider.custom_trace("test_span", attributes=attributes)
|
||||
|
||||
assert span is not None
|
||||
class TestOTelTelemetryProviderFastAPIMiddleware:
|
||||
"""Tests for FastAPI middleware functionality."""
|
||||
|
||||
def test_fastapi_middleware(self, otel_provider):
|
||||
"""Test that fastapi_middleware can be called."""
|
||||
|
@ -318,51 +395,182 @@ class TestOTelTelemetryProviderTracing:
|
|||
# Should not raise an exception
|
||||
otel_provider.fastapi_middleware(mock_app)
|
||||
|
||||
def test_fastapi_middleware_is_idempotent(self, otel_provider):
|
||||
"""Test that calling fastapi_middleware multiple times is safe."""
|
||||
mock_app = MagicMock()
|
||||
|
||||
# Should be able to call multiple times without error
|
||||
otel_provider.fastapi_middleware(mock_app)
|
||||
# Note: Second call might warn but shouldn't fail
|
||||
# otel_provider.fastapi_middleware(mock_app)
|
||||
|
||||
|
||||
class TestOTelTelemetryProviderEdgeCases:
|
||||
"""Tests for edge cases and error conditions."""
|
||||
|
||||
def test_record_count_with_zero(self, otel_provider):
|
||||
"""Test that record_count works with zero value."""
|
||||
otel_provider.record_count("zero_counter", 0.0)
|
||||
def test_tracer_with_empty_module_name(self, otel_provider):
|
||||
"""Test that get_tracer works with empty module name."""
|
||||
tracer = otel_provider.get_tracer("")
|
||||
|
||||
assert "zero_counter" in otel_provider._counters
|
||||
assert tracer is not None
|
||||
assert isinstance(tracer, Tracer)
|
||||
|
||||
def test_record_count_with_large_value(self, otel_provider):
|
||||
"""Test that record_count works with large values."""
|
||||
otel_provider.record_count("large_counter", 1_000_000.0)
|
||||
def test_meter_with_empty_name(self, otel_provider):
|
||||
"""Test that get_meter works with empty name."""
|
||||
meter = otel_provider.get_meter("")
|
||||
|
||||
assert "large_counter" in otel_provider._counters
|
||||
assert meter is not None
|
||||
assert isinstance(meter, Meter)
|
||||
|
||||
def test_record_histogram_with_negative_value(self, otel_provider):
|
||||
"""Test that record_histogram works with negative values."""
|
||||
otel_provider.record_histogram("negative_histogram", -10.0)
|
||||
def test_meter_instruments_with_special_characters(self, otel_provider):
|
||||
"""Test that metric names with dots, underscores, and hyphens work."""
|
||||
meter = otel_provider.get_meter("test.meter")
|
||||
|
||||
assert "negative_histogram" in otel_provider._histograms
|
||||
|
||||
def test_record_up_down_counter_with_negative_value(self, otel_provider):
|
||||
"""Test that record_up_down_counter works with negative values."""
|
||||
otel_provider.record_up_down_counter("negative_updown", -5.0)
|
||||
counter = meter.create_counter("test.counter_name-special")
|
||||
histogram = meter.create_histogram("test.histogram_name-special")
|
||||
|
||||
assert "negative_updown" in otel_provider._up_down_counters
|
||||
|
||||
def test_metric_names_with_special_characters(self, otel_provider):
|
||||
"""Test that metric names with dots and underscores work."""
|
||||
otel_provider.record_count("test.counter_name-special", 1.0)
|
||||
otel_provider.record_histogram("test.histogram_name-special", 10.0)
|
||||
assert counter is not None
|
||||
assert histogram is not None
|
||||
|
||||
assert "test.counter_name-special" in otel_provider._counters
|
||||
assert "test.histogram_name-special" in otel_provider._histograms
|
||||
# Verify they can be used
|
||||
counter.add(1)
|
||||
histogram.record(10.0)
|
||||
|
||||
def test_empty_attributes_dict(self, otel_provider):
|
||||
def test_meter_counter_with_zero_value(self, otel_provider):
|
||||
"""Test that counters work with zero value."""
|
||||
meter = otel_provider.get_meter("test.meter")
|
||||
counter = meter.create_counter("test.counter")
|
||||
|
||||
# Should not raise an exception
|
||||
counter.add(0.0)
|
||||
|
||||
def test_meter_histogram_with_negative_value(self, otel_provider):
|
||||
"""Test that histograms accept negative values."""
|
||||
meter = otel_provider.get_meter("test.meter")
|
||||
histogram = meter.create_histogram("test.histogram")
|
||||
|
||||
# Should not raise an exception
|
||||
histogram.record(-10.0)
|
||||
|
||||
def test_meter_up_down_counter_with_negative_value(self, otel_provider):
|
||||
"""Test that up/down counters work with negative values."""
|
||||
meter = otel_provider.get_meter("test.meter")
|
||||
up_down_counter = meter.create_up_down_counter("test.updown")
|
||||
|
||||
# Should not raise an exception
|
||||
up_down_counter.add(-5.0)
|
||||
|
||||
def test_meter_instruments_with_empty_attributes(self, otel_provider):
|
||||
"""Test that empty attributes dict is handled correctly."""
|
||||
otel_provider.record_count("test_counter", 1.0, attributes={})
|
||||
meter = otel_provider.get_meter("test.meter")
|
||||
counter = meter.create_counter("test.counter")
|
||||
|
||||
assert "test_counter" in otel_provider._counters
|
||||
# Should not raise an exception
|
||||
counter.add(1.0, attributes={})
|
||||
|
||||
def test_none_attributes(self, otel_provider):
|
||||
def test_meter_instruments_with_none_attributes(self, otel_provider):
|
||||
"""Test that None attributes are handled correctly."""
|
||||
otel_provider.record_count("test_counter", 1.0, attributes=None)
|
||||
meter = otel_provider.get_meter("test.meter")
|
||||
counter = meter.create_counter("test.counter")
|
||||
|
||||
assert "test_counter" in otel_provider._counters
|
||||
# Should not raise an exception
|
||||
counter.add(1.0, attributes=None)
|
||||
|
||||
|
||||
class TestOTelTelemetryProviderRealisticScenarios:
|
||||
"""Tests simulating realistic usage scenarios."""
|
||||
|
||||
def test_inference_request_telemetry(self, otel_provider):
|
||||
"""Simulate telemetry for a complete inference request."""
|
||||
tracer = otel_provider.get_tracer("llama_stack.inference")
|
||||
meter = otel_provider.get_meter("llama_stack.metrics")
|
||||
|
||||
# Create instruments
|
||||
request_counter = meter.create_counter("llama.requests.total")
|
||||
token_counter = meter.create_counter("llama.tokens.total")
|
||||
latency_histogram = meter.create_histogram("llama.request.duration_ms")
|
||||
in_flight_gauge = meter.create_up_down_counter("llama.requests.in_flight")
|
||||
|
||||
# Simulate request
|
||||
with tracer.start_as_current_span("inference.request") as request_span:
|
||||
request_span.set_attribute("model.id", "llama-3.2-1b")
|
||||
request_span.set_attribute("user.id", "test-user")
|
||||
|
||||
request_counter.add(1, {"model": "llama-3.2-1b"})
|
||||
in_flight_gauge.add(1)
|
||||
|
||||
# Simulate token counting
|
||||
token_counter.add(25, {"type": "input", "model": "llama-3.2-1b"})
|
||||
token_counter.add(150, {"type": "output", "model": "llama-3.2-1b"})
|
||||
|
||||
# Simulate latency
|
||||
latency_histogram.record(125.5, {"model": "llama-3.2-1b"})
|
||||
|
||||
in_flight_gauge.add(-1)
|
||||
request_span.set_attribute("tokens.input", 25)
|
||||
request_span.set_attribute("tokens.output", 150)
|
||||
|
||||
def test_multi_step_workflow_with_nested_spans(self, otel_provider):
|
||||
"""Simulate a multi-step workflow with nested spans."""
|
||||
tracer = otel_provider.get_tracer("llama_stack.workflow")
|
||||
meter = otel_provider.get_meter("llama_stack.workflow.metrics")
|
||||
|
||||
step_counter = meter.create_counter("workflow.steps.completed")
|
||||
|
||||
with tracer.start_as_current_span("workflow.execute") as root_span:
|
||||
root_span.set_attribute("workflow.id", "wf-123")
|
||||
|
||||
# Step 1: Validate
|
||||
with tracer.start_as_current_span("step.validate") as span:
|
||||
span.set_attribute("validation.result", "pass")
|
||||
step_counter.add(1, {"step": "validate", "status": "success"})
|
||||
|
||||
# Step 2: Process
|
||||
with tracer.start_as_current_span("step.process") as span:
|
||||
span.set_attribute("items.processed", 100)
|
||||
step_counter.add(1, {"step": "process", "status": "success"})
|
||||
|
||||
# Step 3: Finalize
|
||||
with tracer.start_as_current_span("step.finalize") as span:
|
||||
span.set_attribute("output.size", 1024)
|
||||
step_counter.add(1, {"step": "finalize", "status": "success"})
|
||||
|
||||
root_span.set_attribute("workflow.status", "completed")
|
||||
|
||||
def test_error_handling_with_telemetry(self, otel_provider):
|
||||
"""Test telemetry when errors occur."""
|
||||
tracer = otel_provider.get_tracer("llama_stack.errors")
|
||||
meter = otel_provider.get_meter("llama_stack.errors.metrics")
|
||||
|
||||
error_counter = meter.create_counter("llama.errors.total")
|
||||
|
||||
with tracer.start_as_current_span("operation.with.error") as span:
|
||||
try:
|
||||
span.set_attribute("step", "processing")
|
||||
# Simulate an error
|
||||
raise ValueError("Test error")
|
||||
except ValueError as e:
|
||||
span.record_exception(e)
|
||||
span.set_status(trace.Status(trace.StatusCode.ERROR, str(e)))
|
||||
error_counter.add(1, {"error.type": "ValueError"})
|
||||
|
||||
# Should not raise - error was handled
|
||||
|
||||
def test_batch_operations_telemetry(self, otel_provider):
|
||||
"""Test telemetry for batch operations."""
|
||||
tracer = otel_provider.get_tracer("llama_stack.batch")
|
||||
meter = otel_provider.get_meter("llama_stack.batch.metrics")
|
||||
|
||||
batch_counter = meter.create_counter("llama.batch.items.processed")
|
||||
batch_duration = meter.create_histogram("llama.batch.duration_ms")
|
||||
|
||||
with tracer.start_as_current_span("batch.process") as batch_span:
|
||||
batch_span.set_attribute("batch.size", 100)
|
||||
|
||||
for i in range(100):
|
||||
with tracer.start_as_current_span(f"item.{i}") as item_span:
|
||||
item_span.set_attribute("item.index", i)
|
||||
batch_counter.add(1, {"status": "success"})
|
||||
|
||||
batch_duration.record(5000.0, {"batch.size": "100"})
|
||||
batch_span.set_attribute("batch.status", "completed")
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue