mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 12:07:34 +00:00
fix(telemetry): add integration and unit tests for otel provider
This commit is contained in:
parent
e45e77f7b0
commit
9a0294ab4f
11 changed files with 1052 additions and 30 deletions
368
tests/unit/providers/telemetry/test_otel.py
Normal file
368
tests/unit/providers/telemetry/test_otel.py
Normal file
|
@ -0,0 +1,368 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import concurrent.futures
|
||||
import threading
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from llama_stack.providers.inline.telemetry.otel.config import OTelTelemetryConfig
|
||||
from llama_stack.providers.inline.telemetry.otel.otel import OTelTelemetryProvider
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def otel_config():
|
||||
"""Fixture providing a basic OTelTelemetryConfig."""
|
||||
return OTelTelemetryConfig(
|
||||
service_name="test-service",
|
||||
service_version="1.0.0",
|
||||
deployment_environment="test",
|
||||
span_processor="simple",
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def otel_provider(otel_config, monkeypatch):
|
||||
"""Fixture providing an OTelTelemetryProvider instance with mocked environment."""
|
||||
# Set required environment variables to avoid warnings
|
||||
monkeypatch.setenv("OTEL_EXPORTER_OTLP_ENDPOINT", "http://localhost:4318")
|
||||
return OTelTelemetryProvider(config=otel_config)
|
||||
|
||||
|
||||
class TestOTelTelemetryProviderInitialization:
|
||||
"""Tests for OTelTelemetryProvider initialization."""
|
||||
|
||||
def test_initialization_with_valid_config(self, otel_config, monkeypatch):
|
||||
"""Test that provider initializes correctly with valid configuration."""
|
||||
monkeypatch.setenv("OTEL_EXPORTER_OTLP_ENDPOINT", "http://localhost:4318")
|
||||
|
||||
provider = OTelTelemetryProvider(config=otel_config)
|
||||
|
||||
assert provider.config == otel_config
|
||||
assert hasattr(provider, "_lock")
|
||||
assert provider._lock is not None
|
||||
assert isinstance(provider._counters, dict)
|
||||
assert isinstance(provider._histograms, dict)
|
||||
assert isinstance(provider._up_down_counters, dict)
|
||||
assert isinstance(provider._gauges, dict)
|
||||
|
||||
def test_initialization_sets_service_attributes(self, otel_config, monkeypatch):
|
||||
"""Test that service attributes are properly configured."""
|
||||
monkeypatch.setenv("OTEL_EXPORTER_OTLP_ENDPOINT", "http://localhost:4318")
|
||||
|
||||
provider = OTelTelemetryProvider(config=otel_config)
|
||||
|
||||
assert provider.config.service_name == "test-service"
|
||||
assert provider.config.service_version == "1.0.0"
|
||||
assert provider.config.deployment_environment == "test"
|
||||
|
||||
def test_initialization_with_batch_processor(self, monkeypatch):
|
||||
"""Test initialization with batch span processor."""
|
||||
monkeypatch.setenv("OTEL_EXPORTER_OTLP_ENDPOINT", "http://localhost:4318")
|
||||
config = OTelTelemetryConfig(
|
||||
service_name="test-service",
|
||||
service_version="1.0.0",
|
||||
deployment_environment="test",
|
||||
span_processor="batch",
|
||||
)
|
||||
|
||||
provider = OTelTelemetryProvider(config=config)
|
||||
|
||||
assert provider.config.span_processor == "batch"
|
||||
|
||||
def test_warns_when_endpoints_missing(self, otel_config, monkeypatch, caplog):
|
||||
"""Test that warnings are issued when OTLP endpoints are not set."""
|
||||
# Remove all endpoint environment variables
|
||||
monkeypatch.delenv("OTEL_EXPORTER_OTLP_ENDPOINT", raising=False)
|
||||
monkeypatch.delenv("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT", raising=False)
|
||||
monkeypatch.delenv("OTEL_EXPORTER_OTLP_METRICS_ENDPOINT", raising=False)
|
||||
|
||||
OTelTelemetryProvider(config=otel_config)
|
||||
|
||||
# Check that warnings were logged
|
||||
assert any("Traces will not be exported" in record.message for record in caplog.records)
|
||||
assert any("Metrics will not be exported" in record.message for record in caplog.records)
|
||||
|
||||
|
||||
class TestOTelTelemetryProviderMetrics:
|
||||
"""Tests for metric recording functionality."""
|
||||
|
||||
def test_record_count_creates_counter(self, otel_provider):
|
||||
"""Test that record_count creates a counter on first call."""
|
||||
assert "test_counter" not in otel_provider._counters
|
||||
|
||||
otel_provider.record_count("test_counter", 1.0)
|
||||
|
||||
assert "test_counter" in otel_provider._counters
|
||||
assert otel_provider._counters["test_counter"] is not None
|
||||
|
||||
def test_record_count_reuses_counter(self, otel_provider):
|
||||
"""Test that record_count reuses existing counter."""
|
||||
otel_provider.record_count("test_counter", 1.0)
|
||||
first_counter = otel_provider._counters["test_counter"]
|
||||
|
||||
otel_provider.record_count("test_counter", 2.0)
|
||||
second_counter = otel_provider._counters["test_counter"]
|
||||
|
||||
assert first_counter is second_counter
|
||||
assert len(otel_provider._counters) == 1
|
||||
|
||||
def test_record_count_with_attributes(self, otel_provider):
|
||||
"""Test that record_count works with attributes."""
|
||||
otel_provider.record_count(
|
||||
"test_counter",
|
||||
1.0,
|
||||
attributes={"key": "value", "env": "test"}
|
||||
)
|
||||
|
||||
assert "test_counter" in otel_provider._counters
|
||||
|
||||
def test_record_histogram_creates_histogram(self, otel_provider):
|
||||
"""Test that record_histogram creates a histogram on first call."""
|
||||
assert "test_histogram" not in otel_provider._histograms
|
||||
|
||||
otel_provider.record_histogram("test_histogram", 42.5)
|
||||
|
||||
assert "test_histogram" in otel_provider._histograms
|
||||
assert otel_provider._histograms["test_histogram"] is not None
|
||||
|
||||
def test_record_histogram_reuses_histogram(self, otel_provider):
|
||||
"""Test that record_histogram reuses existing histogram."""
|
||||
otel_provider.record_histogram("test_histogram", 10.0)
|
||||
first_histogram = otel_provider._histograms["test_histogram"]
|
||||
|
||||
otel_provider.record_histogram("test_histogram", 20.0)
|
||||
second_histogram = otel_provider._histograms["test_histogram"]
|
||||
|
||||
assert first_histogram is second_histogram
|
||||
assert len(otel_provider._histograms) == 1
|
||||
|
||||
def test_record_histogram_with_bucket_boundaries(self, otel_provider):
|
||||
"""Test that record_histogram works with explicit bucket boundaries."""
|
||||
boundaries = [0.0, 10.0, 50.0, 100.0]
|
||||
|
||||
otel_provider.record_histogram(
|
||||
"test_histogram",
|
||||
25.0,
|
||||
explicit_bucket_boundaries_advisory=boundaries
|
||||
)
|
||||
|
||||
assert "test_histogram" in otel_provider._histograms
|
||||
|
||||
def test_record_up_down_counter_creates_counter(self, otel_provider):
|
||||
"""Test that record_up_down_counter creates a counter on first call."""
|
||||
assert "test_updown" not in otel_provider._up_down_counters
|
||||
|
||||
otel_provider.record_up_down_counter("test_updown", 1.0)
|
||||
|
||||
assert "test_updown" in otel_provider._up_down_counters
|
||||
assert otel_provider._up_down_counters["test_updown"] is not None
|
||||
|
||||
def test_record_up_down_counter_reuses_counter(self, otel_provider):
|
||||
"""Test that record_up_down_counter reuses existing counter."""
|
||||
otel_provider.record_up_down_counter("test_updown", 5.0)
|
||||
first_counter = otel_provider._up_down_counters["test_updown"]
|
||||
|
||||
otel_provider.record_up_down_counter("test_updown", -3.0)
|
||||
second_counter = otel_provider._up_down_counters["test_updown"]
|
||||
|
||||
assert first_counter is second_counter
|
||||
assert len(otel_provider._up_down_counters) == 1
|
||||
|
||||
def test_multiple_metrics_with_different_names(self, otel_provider):
|
||||
"""Test that multiple metrics with different names are cached separately."""
|
||||
otel_provider.record_count("counter1", 1.0)
|
||||
otel_provider.record_count("counter2", 2.0)
|
||||
otel_provider.record_histogram("histogram1", 10.0)
|
||||
otel_provider.record_up_down_counter("updown1", 5.0)
|
||||
|
||||
assert len(otel_provider._counters) == 2
|
||||
assert len(otel_provider._histograms) == 1
|
||||
assert len(otel_provider._up_down_counters) == 1
|
||||
|
||||
|
||||
class TestOTelTelemetryProviderThreadSafety:
|
||||
"""Tests for thread safety of metric operations."""
|
||||
|
||||
def test_concurrent_counter_creation_same_name(self, otel_provider):
|
||||
"""Test that concurrent calls to record_count with same name are thread-safe."""
|
||||
num_threads = 50
|
||||
counter_name = "concurrent_counter"
|
||||
|
||||
def record_metric():
|
||||
otel_provider.record_count(counter_name, 1.0)
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
|
||||
futures = [executor.submit(record_metric) for _ in range(num_threads)]
|
||||
concurrent.futures.wait(futures)
|
||||
|
||||
# Should have exactly one counter created despite concurrent access
|
||||
assert len(otel_provider._counters) == 1
|
||||
assert counter_name in otel_provider._counters
|
||||
|
||||
def test_concurrent_histogram_creation_same_name(self, otel_provider):
|
||||
"""Test that concurrent calls to record_histogram with same name are thread-safe."""
|
||||
num_threads = 50
|
||||
histogram_name = "concurrent_histogram"
|
||||
|
||||
def record_metric():
|
||||
thread_id = threading.current_thread().ident or 0
|
||||
otel_provider.record_histogram(histogram_name, float(thread_id % 100))
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
|
||||
futures = [executor.submit(record_metric) for _ in range(num_threads)]
|
||||
concurrent.futures.wait(futures)
|
||||
|
||||
# Should have exactly one histogram created despite concurrent access
|
||||
assert len(otel_provider._histograms) == 1
|
||||
assert histogram_name in otel_provider._histograms
|
||||
|
||||
def test_concurrent_up_down_counter_creation_same_name(self, otel_provider):
|
||||
"""Test that concurrent calls to record_up_down_counter with same name are thread-safe."""
|
||||
num_threads = 50
|
||||
counter_name = "concurrent_updown"
|
||||
|
||||
def record_metric():
|
||||
otel_provider.record_up_down_counter(counter_name, 1.0)
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
|
||||
futures = [executor.submit(record_metric) for _ in range(num_threads)]
|
||||
concurrent.futures.wait(futures)
|
||||
|
||||
# Should have exactly one counter created despite concurrent access
|
||||
assert len(otel_provider._up_down_counters) == 1
|
||||
assert counter_name in otel_provider._up_down_counters
|
||||
|
||||
def test_concurrent_mixed_metrics_different_names(self, otel_provider):
|
||||
"""Test concurrent creation of different metric types with different names."""
|
||||
num_threads = 30
|
||||
|
||||
def record_counters(thread_id):
|
||||
otel_provider.record_count(f"counter_{thread_id}", 1.0)
|
||||
|
||||
def record_histograms(thread_id):
|
||||
otel_provider.record_histogram(f"histogram_{thread_id}", float(thread_id))
|
||||
|
||||
def record_up_down_counters(thread_id):
|
||||
otel_provider.record_up_down_counter(f"updown_{thread_id}", float(thread_id))
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads * 3) as executor:
|
||||
futures = []
|
||||
for i in range(num_threads):
|
||||
futures.append(executor.submit(record_counters, i))
|
||||
futures.append(executor.submit(record_histograms, i))
|
||||
futures.append(executor.submit(record_up_down_counters, i))
|
||||
|
||||
concurrent.futures.wait(futures)
|
||||
|
||||
# Each thread should have created its own metric
|
||||
assert len(otel_provider._counters) == num_threads
|
||||
assert len(otel_provider._histograms) == num_threads
|
||||
assert len(otel_provider._up_down_counters) == num_threads
|
||||
|
||||
def test_concurrent_access_existing_and_new_metrics(self, otel_provider):
|
||||
"""Test concurrent access mixing existing and new metric creation."""
|
||||
# Pre-create some metrics
|
||||
otel_provider.record_count("existing_counter", 1.0)
|
||||
otel_provider.record_histogram("existing_histogram", 10.0)
|
||||
|
||||
num_threads = 40
|
||||
|
||||
def mixed_operations(thread_id):
|
||||
# Half the threads use existing metrics, half create new ones
|
||||
if thread_id % 2 == 0:
|
||||
otel_provider.record_count("existing_counter", 1.0)
|
||||
otel_provider.record_histogram("existing_histogram", float(thread_id))
|
||||
else:
|
||||
otel_provider.record_count(f"new_counter_{thread_id}", 1.0)
|
||||
otel_provider.record_histogram(f"new_histogram_{thread_id}", float(thread_id))
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
|
||||
futures = [executor.submit(mixed_operations, i) for i in range(num_threads)]
|
||||
concurrent.futures.wait(futures)
|
||||
|
||||
# Should have existing metrics plus half of num_threads new ones
|
||||
expected_new_counters = num_threads // 2
|
||||
expected_new_histograms = num_threads // 2
|
||||
|
||||
assert len(otel_provider._counters) == 1 + expected_new_counters
|
||||
assert len(otel_provider._histograms) == 1 + expected_new_histograms
|
||||
|
||||
|
||||
class TestOTelTelemetryProviderTracing:
|
||||
"""Tests for tracing functionality."""
|
||||
|
||||
def test_custom_trace_creates_span(self, otel_provider):
|
||||
"""Test that custom_trace creates a span."""
|
||||
span = otel_provider.custom_trace("test_span")
|
||||
|
||||
assert span is not None
|
||||
assert hasattr(span, "get_span_context")
|
||||
|
||||
def test_custom_trace_with_attributes(self, otel_provider):
|
||||
"""Test that custom_trace works with attributes."""
|
||||
attributes = {"key": "value", "operation": "test"}
|
||||
|
||||
span = otel_provider.custom_trace("test_span", attributes=attributes)
|
||||
|
||||
assert span is not None
|
||||
|
||||
def test_fastapi_middleware(self, otel_provider):
|
||||
"""Test that fastapi_middleware can be called."""
|
||||
mock_app = MagicMock()
|
||||
|
||||
# Should not raise an exception
|
||||
otel_provider.fastapi_middleware(mock_app)
|
||||
|
||||
|
||||
class TestOTelTelemetryProviderEdgeCases:
|
||||
"""Tests for edge cases and error conditions."""
|
||||
|
||||
def test_record_count_with_zero(self, otel_provider):
|
||||
"""Test that record_count works with zero value."""
|
||||
otel_provider.record_count("zero_counter", 0.0)
|
||||
|
||||
assert "zero_counter" in otel_provider._counters
|
||||
|
||||
def test_record_count_with_large_value(self, otel_provider):
|
||||
"""Test that record_count works with large values."""
|
||||
otel_provider.record_count("large_counter", 1_000_000.0)
|
||||
|
||||
assert "large_counter" in otel_provider._counters
|
||||
|
||||
def test_record_histogram_with_negative_value(self, otel_provider):
|
||||
"""Test that record_histogram works with negative values."""
|
||||
otel_provider.record_histogram("negative_histogram", -10.0)
|
||||
|
||||
assert "negative_histogram" in otel_provider._histograms
|
||||
|
||||
def test_record_up_down_counter_with_negative_value(self, otel_provider):
|
||||
"""Test that record_up_down_counter works with negative values."""
|
||||
otel_provider.record_up_down_counter("negative_updown", -5.0)
|
||||
|
||||
assert "negative_updown" in otel_provider._up_down_counters
|
||||
|
||||
def test_metric_names_with_special_characters(self, otel_provider):
|
||||
"""Test that metric names with dots and underscores work."""
|
||||
otel_provider.record_count("test.counter_name-special", 1.0)
|
||||
otel_provider.record_histogram("test.histogram_name-special", 10.0)
|
||||
|
||||
assert "test.counter_name-special" in otel_provider._counters
|
||||
assert "test.histogram_name-special" in otel_provider._histograms
|
||||
|
||||
def test_empty_attributes_dict(self, otel_provider):
|
||||
"""Test that empty attributes dict is handled correctly."""
|
||||
otel_provider.record_count("test_counter", 1.0, attributes={})
|
||||
|
||||
assert "test_counter" in otel_provider._counters
|
||||
|
||||
def test_none_attributes(self, otel_provider):
|
||||
"""Test that None attributes are handled correctly."""
|
||||
otel_provider.record_count("test_counter", 1.0, attributes=None)
|
||||
|
||||
assert "test_counter" in otel_provider._counters
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue