mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-12 20:12:33 +00:00
feat(telemetry:major): End to End Testing, Metric Capture, SQL Alchemy Injection
This commit is contained in:
parent
e815738936
commit
7e3cf1fb20
26 changed files with 2075 additions and 1006 deletions
33
docs/docs/providers/telemetry/inline_otel.mdx
Normal file
33
docs/docs/providers/telemetry/inline_otel.mdx
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
---
|
||||
description: "Native OpenTelemetry provider with full access to OTel Tracer and Meter APIs for advanced instrumentation."
|
||||
sidebar_label: Otel
|
||||
title: inline::otel
|
||||
---
|
||||
|
||||
# inline::otel
|
||||
|
||||
## Description
|
||||
|
||||
Native OpenTelemetry provider with full access to OTel Tracer and Meter APIs for advanced instrumentation.
|
||||
|
||||
## Configuration
|
||||
|
||||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `service_name` | `<class 'str'>` | No | | The name of the service to be monitored.
|
||||
Is overridden by the OTEL_SERVICE_NAME or OTEL_RESOURCE_ATTRIBUTES environment variables. |
|
||||
| `service_version` | `str \| None` | No | | The version of the service to be monitored.
|
||||
Is overriden by the OTEL_RESOURCE_ATTRIBUTES environment variable. |
|
||||
| `deployment_environment` | `str \| None` | No | | The name of the environment of the service to be monitored.
|
||||
Is overriden by the OTEL_RESOURCE_ATTRIBUTES environment variable. |
|
||||
| `span_processor` | `BatchSpanProcessor \| SimpleSpanProcessor \| None` | No | batch | The span processor to use.
|
||||
Is overriden by the OTEL_SPAN_PROCESSOR environment variable. |
|
||||
|
||||
## Sample Configuration
|
||||
|
||||
```yaml
|
||||
service_name: ${env.OTEL_SERVICE_NAME:=llama-stack}
|
||||
service_version: ${env.OTEL_SERVICE_VERSION:=}
|
||||
deployment_environment: ${env.OTEL_DEPLOYMENT_ENVIRONMENT:=}
|
||||
span_processor: ${env.OTEL_SPAN_PROCESSOR:=batch}
|
||||
```
|
||||
|
|
@ -32,7 +32,7 @@ from termcolor import cprint
|
|||
|
||||
from llama_stack.core.build import print_pip_install_help
|
||||
from llama_stack.core.configure import parse_and_maybe_upgrade_config
|
||||
from llama_stack.core.datatypes import Api, BuildConfig, BuildProvider, DistributionSpec
|
||||
from llama_stack.core.datatypes import BuildConfig, BuildProvider, DistributionSpec
|
||||
from llama_stack.core.request_headers import (
|
||||
PROVIDER_DATA_VAR,
|
||||
request_provider_data_context,
|
||||
|
|
@ -49,7 +49,6 @@ from llama_stack.core.utils.context import preserve_contexts_async_generator
|
|||
from llama_stack.core.utils.exec import in_notebook
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
|
||||
logger = get_logger(name=__name__, category="core")
|
||||
|
||||
T = TypeVar("T")
|
||||
|
|
|
|||
|
|
@ -59,7 +59,6 @@ from llama_stack.core.utils.context import preserve_contexts_async_generator
|
|||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.datatypes import Api
|
||||
|
||||
|
||||
from .auth import AuthenticationMiddleware
|
||||
from .quota import QuotaMiddleware
|
||||
|
||||
|
|
@ -232,9 +231,7 @@ def create_dynamic_typed_route(func: Any, method: str, route: str) -> Callable:
|
|||
|
||||
try:
|
||||
if is_streaming:
|
||||
gen = preserve_contexts_async_generator(
|
||||
sse_generator(func(**kwargs)), [PROVIDER_DATA_VAR]
|
||||
)
|
||||
gen = preserve_contexts_async_generator(sse_generator(func(**kwargs)), [PROVIDER_DATA_VAR])
|
||||
return StreamingResponse(gen, media_type="text/event-stream")
|
||||
else:
|
||||
value = func(**kwargs)
|
||||
|
|
@ -278,7 +275,7 @@ def create_dynamic_typed_route(func: Any, method: str, route: str) -> Callable:
|
|||
]
|
||||
)
|
||||
|
||||
setattr(route_handler, "__signature__", sig.replace(parameters=new_params))
|
||||
route_handler.__signature__ = sig.replace(parameters=new_params)
|
||||
return route_handler
|
||||
|
||||
|
||||
|
|
@ -405,6 +402,7 @@ def create_app() -> StackApp:
|
|||
|
||||
if Api.telemetry in impls:
|
||||
impls[Api.telemetry].fastapi_middleware(app)
|
||||
impls[Api.telemetry].sqlalchemy_instrumentation()
|
||||
|
||||
# Load external APIs if configured
|
||||
external_apis = load_external_apis(config)
|
||||
|
|
|
|||
|
|
@ -359,7 +359,6 @@ class Stack:
|
|||
await refresh_registry_once(impls)
|
||||
self.impls = impls
|
||||
|
||||
|
||||
# safely access impls without raising an exception
|
||||
def get_impls(self) -> dict[Api, Any]:
|
||||
if self.impls is None:
|
||||
|
|
|
|||
|
|
@ -1,4 +1,10 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
# the root directory of this source tree.
|
||||
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
|
|
|||
|
|
@ -4,46 +4,19 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
from abc import abstractmethod
|
||||
|
||||
from fastapi import FastAPI
|
||||
from pydantic import BaseModel
|
||||
from typing import Any
|
||||
|
||||
|
||||
class TelemetryProvider(BaseModel):
|
||||
"""
|
||||
TelemetryProvider standardizes how telemetry is provided to the application.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def fastapi_middleware(self, app: FastAPI, *args, **kwargs):
|
||||
"""
|
||||
Injects FastAPI middleware that instruments the application for telemetry.
|
||||
"""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def custom_trace(self, name: str, *args, **kwargs) -> Any:
|
||||
"""
|
||||
Creates a custom trace.
|
||||
"""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def record_count(self, name: str, *args, **kwargs):
|
||||
"""
|
||||
Increments a counter metric.
|
||||
"""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def record_histogram(self, name: str, *args, **kwargs):
|
||||
"""
|
||||
Records a histogram metric.
|
||||
"""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def record_up_down_counter(self, name: str, *args, **kwargs):
|
||||
"""
|
||||
Records an up/down counter metric.
|
||||
"""
|
||||
...
|
||||
|
|
|
|||
|
|
@ -1,20 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
from abc import abstractmethod
|
||||
from fastapi import FastAPI
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class TelemetryProvider(BaseModel):
|
||||
"""
|
||||
TelemetryProvider standardizes how telemetry is provided to the application.
|
||||
"""
|
||||
@abstractmethod
|
||||
def fastapi_middleware(self, app: FastAPI, *args, **kwargs):
|
||||
"""
|
||||
Injects FastAPI middleware that instruments the application for telemetry.
|
||||
"""
|
||||
...
|
||||
|
|
@ -1,15 +1,22 @@
|
|||
from aiohttp import hdrs
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Any
|
||||
|
||||
from aiohttp import hdrs
|
||||
|
||||
from llama_stack.apis.datatypes import Api
|
||||
from llama_stack.core.external import ExternalApiSpec
|
||||
from llama_stack.core.server.routes import find_matching_route, initialize_route_impls
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.utils.telemetry.tracing import end_trace, start_trace
|
||||
|
||||
|
||||
logger = get_logger(name=__name__, category="telemetry::meta_reference")
|
||||
|
||||
|
||||
class TracingMiddleware:
|
||||
def __init__(
|
||||
self,
|
||||
|
|
|
|||
|
|
@ -10,7 +10,6 @@ import threading
|
|||
from typing import Any, cast
|
||||
|
||||
from fastapi import FastAPI
|
||||
|
||||
from opentelemetry import metrics, trace
|
||||
from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
|
||||
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
|
||||
|
|
@ -23,11 +22,6 @@ from opentelemetry.semconv.attributes import service_attributes
|
|||
from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator
|
||||
from opentelemetry.util.types import Attributes
|
||||
|
||||
from llama_stack.core.external import ExternalApiSpec
|
||||
from llama_stack.core.server.tracing import TelemetryProvider
|
||||
from llama_stack.providers.inline.telemetry.meta_reference.middleware import TracingMiddleware
|
||||
|
||||
|
||||
from llama_stack.apis.telemetry import (
|
||||
Event,
|
||||
MetricEvent,
|
||||
|
|
@ -47,10 +41,13 @@ from llama_stack.apis.telemetry import (
|
|||
UnstructuredLogEvent,
|
||||
)
|
||||
from llama_stack.core.datatypes import Api
|
||||
from llama_stack.core.external import ExternalApiSpec
|
||||
from llama_stack.core.server.tracing import TelemetryProvider
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.inline.telemetry.meta_reference.console_span_processor import (
|
||||
ConsoleSpanProcessor,
|
||||
)
|
||||
from llama_stack.providers.inline.telemetry.meta_reference.middleware import TracingMiddleware
|
||||
from llama_stack.providers.inline.telemetry.meta_reference.sqlite_span_processor import (
|
||||
SQLiteSpanProcessor,
|
||||
)
|
||||
|
|
@ -381,7 +378,7 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry, TelemetryProvider):
|
|||
max_depth=max_depth,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def fastapi_middleware(
|
||||
self,
|
||||
app: FastAPI,
|
||||
|
|
|
|||
|
|
@ -0,0 +1,23 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .config import OTelTelemetryConfig
|
||||
|
||||
__all__ = ["OTelTelemetryConfig"]
|
||||
|
||||
|
||||
async def get_provider_impl(config: OTelTelemetryConfig, deps):
|
||||
"""
|
||||
Get the OTel telemetry provider implementation.
|
||||
|
||||
This function is called by the Llama Stack registry to instantiate
|
||||
the provider.
|
||||
"""
|
||||
from .otel import OTelTelemetryProvider
|
||||
|
||||
# The provider is synchronously initialized via Pydantic model_post_init
|
||||
# No async initialization needed
|
||||
return OTelTelemetryProvider(config=config)
|
||||
|
|
@ -1,8 +1,13 @@
|
|||
from typing import Literal
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Any, Literal
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
type BatchSpanProcessor = Literal["batch"]
|
||||
type SimpleSpanProcessor = Literal["simple"]
|
||||
|
||||
|
|
@ -11,22 +16,35 @@ class OTelTelemetryConfig(BaseModel):
|
|||
"""
|
||||
The configuration for the OpenTelemetry telemetry provider.
|
||||
Most configuration is set using environment variables.
|
||||
See https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/ for more information.
|
||||
See https://opentelemetry.io/docs/specs/otel/configuration/sdk-configuration-variables/ for more information.
|
||||
"""
|
||||
|
||||
service_name: str = Field(
|
||||
description="""The name of the service to be monitored.
|
||||
description="""The name of the service to be monitored.
|
||||
Is overridden by the OTEL_SERVICE_NAME or OTEL_RESOURCE_ATTRIBUTES environment variables.""",
|
||||
)
|
||||
service_version: str | None = Field(
|
||||
description="""The version of the service to be monitored.
|
||||
Is overriden by the OTEL_RESOURCE_ATTRIBUTES environment variable."""
|
||||
default=None,
|
||||
description="""The version of the service to be monitored.
|
||||
Is overriden by the OTEL_RESOURCE_ATTRIBUTES environment variable.""",
|
||||
)
|
||||
deployment_environment: str | None = Field(
|
||||
description="""The name of the environment of the service to be monitored.
|
||||
Is overriden by the OTEL_RESOURCE_ATTRIBUTES environment variable."""
|
||||
default=None,
|
||||
description="""The name of the environment of the service to be monitored.
|
||||
Is overriden by the OTEL_RESOURCE_ATTRIBUTES environment variable.""",
|
||||
)
|
||||
span_processor: BatchSpanProcessor | SimpleSpanProcessor | None = Field(
|
||||
description="""The span processor to use.
|
||||
description="""The span processor to use.
|
||||
Is overriden by the OTEL_SPAN_PROCESSOR environment variable.""",
|
||||
default="batch"
|
||||
default="batch",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, __distro_dir__: str = "") -> dict[str, Any]:
|
||||
"""Sample configuration for use in distributions."""
|
||||
return {
|
||||
"service_name": "${env.OTEL_SERVICE_NAME:=llama-stack}",
|
||||
"service_version": "${env.OTEL_SERVICE_VERSION:=}",
|
||||
"deployment_environment": "${env.OTEL_DEPLOYMENT_ENVIRONMENT:=}",
|
||||
"span_processor": "${env.OTEL_SPAN_PROCESSOR:=batch}",
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,141 +1,301 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
|
||||
from opentelemetry import trace, metrics
|
||||
from opentelemetry.context.context import Context
|
||||
from opentelemetry.sdk.resources import Attributes, Resource
|
||||
from opentelemetry.sdk.trace import TracerProvider
|
||||
from opentelemetry.sdk.trace.export import BatchSpanProcessor, SimpleSpanProcessor
|
||||
from fastapi import FastAPI
|
||||
from opentelemetry import metrics, trace
|
||||
from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
|
||||
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
|
||||
from opentelemetry.sdk.metrics import MeterProvider
|
||||
from opentelemetry.metrics import Counter, UpDownCounter, Histogram, ObservableGauge
|
||||
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
|
||||
from opentelemetry.trace import Span, SpanKind, _Links
|
||||
from typing import Sequence
|
||||
from pydantic import PrivateAttr
|
||||
from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor
|
||||
from opentelemetry.metrics import Counter, Histogram
|
||||
from opentelemetry.sdk.metrics import MeterProvider
|
||||
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
|
||||
from opentelemetry.sdk.resources import Resource
|
||||
from opentelemetry.sdk.trace import TracerProvider
|
||||
from opentelemetry.sdk.trace.export import (
|
||||
BatchSpanProcessor,
|
||||
SimpleSpanProcessor,
|
||||
SpanExporter,
|
||||
SpanExportResult,
|
||||
)
|
||||
from sqlalchemy import Engine
|
||||
from starlette.types import ASGIApp, Message, Receive, Scope, Send
|
||||
|
||||
from llama_stack.core.telemetry.tracing import TelemetryProvider
|
||||
from llama_stack.core.telemetry.telemetry import TelemetryProvider
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
from .config import OTelTelemetryConfig
|
||||
from fastapi import FastAPI
|
||||
|
||||
|
||||
logger = get_logger(name=__name__, category="telemetry::otel")
|
||||
|
||||
|
||||
class StreamingMetricsMiddleware:
|
||||
"""
|
||||
Pure ASGI middleware to track streaming response metrics.
|
||||
|
||||
This follows Starlette best practices by implementing pure ASGI,
|
||||
which is more efficient and less prone to bugs than BaseHTTPMiddleware.
|
||||
"""
|
||||
|
||||
def __init__(self, app: ASGIApp):
|
||||
self.app = app
|
||||
|
||||
async def __call__(self, scope: Scope, receive: Receive, send: Send):
|
||||
if scope["type"] != "http":
|
||||
await self.app(scope, receive, send)
|
||||
return
|
||||
|
||||
logger.debug(f"StreamingMetricsMiddleware called for {scope.get('method')} {scope.get('path')}")
|
||||
start_time = time.time()
|
||||
|
||||
# Track if this is a streaming response
|
||||
is_streaming = False
|
||||
|
||||
async def send_wrapper(message: Message):
|
||||
nonlocal is_streaming
|
||||
|
||||
# Detect streaming responses by headers
|
||||
if message["type"] == "http.response.start":
|
||||
headers = message.get("headers", [])
|
||||
for name, value in headers:
|
||||
if name == b"content-type" and b"text/event-stream" in value:
|
||||
is_streaming = True
|
||||
# Add streaming attribute to current span
|
||||
current_span = trace.get_current_span()
|
||||
if current_span and current_span.is_recording():
|
||||
current_span.set_attribute("http.response.is_streaming", True)
|
||||
break
|
||||
|
||||
# Record total duration when response body completes
|
||||
elif message["type"] == "http.response.body" and not message.get("more_body", False):
|
||||
if is_streaming:
|
||||
current_span = trace.get_current_span()
|
||||
if current_span and current_span.is_recording():
|
||||
total_duration_ms = (time.time() - start_time) * 1000
|
||||
current_span.set_attribute("http.streaming.total_duration_ms", total_duration_ms)
|
||||
|
||||
await send(message)
|
||||
|
||||
await self.app(scope, receive, send_wrapper)
|
||||
|
||||
|
||||
class MetricsSpanExporter(SpanExporter):
|
||||
"""Records HTTP metrics from span data."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
request_duration: Histogram,
|
||||
streaming_duration: Histogram,
|
||||
streaming_requests: Counter,
|
||||
request_count: Counter,
|
||||
):
|
||||
self.request_duration = request_duration
|
||||
self.streaming_duration = streaming_duration
|
||||
self.streaming_requests = streaming_requests
|
||||
self.request_count = request_count
|
||||
|
||||
def export(self, spans):
|
||||
logger.debug(f"MetricsSpanExporter.export called with {len(spans)} spans")
|
||||
for span in spans:
|
||||
if not span.attributes or not span.attributes.get("http.method"):
|
||||
continue
|
||||
logger.debug(f"Processing span: {span.name}")
|
||||
|
||||
if span.end_time is None or span.start_time is None:
|
||||
continue
|
||||
|
||||
# Calculate time-to-first-byte duration
|
||||
duration_ns = span.end_time - span.start_time
|
||||
duration_ms = duration_ns / 1_000_000
|
||||
|
||||
# Check if this was a streaming response
|
||||
is_streaming = span.attributes.get("http.response.is_streaming", False)
|
||||
|
||||
attributes = {
|
||||
"http.method": str(span.attributes.get("http.method", "UNKNOWN")),
|
||||
"http.route": str(span.attributes.get("http.route", span.attributes.get("http.target", "/"))),
|
||||
"http.status_code": str(span.attributes.get("http.status_code", 0)),
|
||||
}
|
||||
|
||||
# set distributed trace attributes
|
||||
if span.attributes.get("trace_id"):
|
||||
attributes["trace_id"] = str(span.attributes.get("trace_id"))
|
||||
if span.attributes.get("span_id"):
|
||||
attributes["span_id"] = str(span.attributes.get("span_id"))
|
||||
|
||||
# Record request count and duration
|
||||
logger.debug(f"Recording metrics: duration={duration_ms}ms, attributes={attributes}")
|
||||
self.request_count.add(1, attributes)
|
||||
self.request_duration.record(duration_ms, attributes)
|
||||
logger.debug("Metrics recorded successfully")
|
||||
|
||||
# For streaming, record separately
|
||||
if is_streaming:
|
||||
logger.debug(f"MetricsSpanExporter: Recording streaming metrics for {span.name}")
|
||||
self.streaming_requests.add(1, attributes)
|
||||
|
||||
# If full streaming duration is available
|
||||
stream_total_duration = span.attributes.get("http.streaming.total_duration_ms")
|
||||
if stream_total_duration and isinstance(stream_total_duration, int | float):
|
||||
logger.debug(f"MetricsSpanExporter: Recording streaming duration: {stream_total_duration}ms")
|
||||
self.streaming_duration.record(float(stream_total_duration), attributes)
|
||||
else:
|
||||
logger.warning(
|
||||
"MetricsSpanExporter: Streaming span has no http.streaming.total_duration_ms attribute"
|
||||
)
|
||||
|
||||
return SpanExportResult.SUCCESS
|
||||
|
||||
def shutdown(self):
|
||||
pass
|
||||
|
||||
|
||||
# NOTE: DO NOT ALLOW LLM TO MODIFY THIS WITHOUT TESTING AND SUPERVISION: it frequently breaks otel integrations
|
||||
class OTelTelemetryProvider(TelemetryProvider):
|
||||
"""
|
||||
A simple Open Telemetry native telemetry provider.
|
||||
"""
|
||||
config: OTelTelemetryConfig
|
||||
_counters: dict[str, Counter] = PrivateAttr(default_factory=dict)
|
||||
_up_down_counters: dict[str, UpDownCounter] = PrivateAttr(default_factory=dict)
|
||||
_histograms: dict[str, Histogram] = PrivateAttr(default_factory=dict)
|
||||
_gauges: dict[str, ObservableGauge] = PrivateAttr(default_factory=dict)
|
||||
|
||||
config: OTelTelemetryConfig
|
||||
|
||||
def model_post_init(self, __context):
|
||||
"""Initialize provider after Pydantic validation."""
|
||||
self._lock = threading.Lock()
|
||||
|
||||
attributes: Attributes = {
|
||||
key: value
|
||||
for key, value in {
|
||||
"service.name": self.config.service_name,
|
||||
"service.version": self.config.service_version,
|
||||
"deployment.environment": self.config.deployment_environment,
|
||||
}.items()
|
||||
if value is not None
|
||||
}
|
||||
|
||||
resource = Resource.create(attributes)
|
||||
|
||||
# Configure the tracer provider
|
||||
tracer_provider = TracerProvider(resource=resource)
|
||||
trace.set_tracer_provider(tracer_provider)
|
||||
|
||||
otlp_span_exporter = OTLPSpanExporter()
|
||||
|
||||
# Configure the span processor
|
||||
# Enable batching of spans to reduce the number of requests to the collector
|
||||
if self.config.span_processor == "batch":
|
||||
tracer_provider.add_span_processor(BatchSpanProcessor(otlp_span_exporter))
|
||||
elif self.config.span_processor == "simple":
|
||||
tracer_provider.add_span_processor(SimpleSpanProcessor(otlp_span_exporter))
|
||||
|
||||
meter_provider = MeterProvider(resource=resource)
|
||||
metrics.set_meter_provider(meter_provider)
|
||||
|
||||
# Do not fail the application, but warn the user if the endpoints are not set properly.
|
||||
if not os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT"):
|
||||
if not os.environ.get("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT"):
|
||||
logger.warning("OTEL_EXPORTER_OTLP_ENDPOINT or OTEL_EXPORTER_OTLP_TRACES_ENDPOINT is not set. Traces will not be exported.")
|
||||
logger.warning(
|
||||
"OTEL_EXPORTER_OTLP_ENDPOINT or OTEL_EXPORTER_OTLP_TRACES_ENDPOINT is not set. Traces will not be exported."
|
||||
)
|
||||
if not os.environ.get("OTEL_EXPORTER_OTLP_METRICS_ENDPOINT"):
|
||||
logger.warning("OTEL_EXPORTER_OTLP_ENDPOINT or OTEL_EXPORTER_OTLP_METRICS_ENDPOINT is not set. Metrics will not be exported.")
|
||||
logger.warning(
|
||||
"OTEL_EXPORTER_OTLP_ENDPOINT or OTEL_EXPORTER_OTLP_METRICS_ENDPOINT is not set. Metrics will not be exported."
|
||||
)
|
||||
|
||||
# Respect OTEL design standards where environment variables get highest precedence
|
||||
service_name = os.environ.get("OTEL_SERVICE_NAME")
|
||||
if not service_name:
|
||||
service_name = self.config.service_name
|
||||
|
||||
# Create resource with service name
|
||||
resource = Resource.create({"service.name": service_name})
|
||||
|
||||
# Configure the tracer provider (always, since llama stack run spawns subprocess without opentelemetry-instrument)
|
||||
tracer_provider = TracerProvider(resource=resource)
|
||||
trace.set_tracer_provider(tracer_provider)
|
||||
|
||||
# Configure OTLP span exporter
|
||||
otlp_span_exporter = OTLPSpanExporter()
|
||||
|
||||
# Add span processor (simple for immediate export, batch for performance)
|
||||
span_processor_type = os.environ.get("OTEL_SPAN_PROCESSOR", "batch")
|
||||
if span_processor_type == "batch":
|
||||
tracer_provider.add_span_processor(BatchSpanProcessor(otlp_span_exporter))
|
||||
else:
|
||||
tracer_provider.add_span_processor(SimpleSpanProcessor(otlp_span_exporter))
|
||||
|
||||
# Configure meter provider with OTLP exporter for metrics
|
||||
metric_reader = PeriodicExportingMetricReader(OTLPMetricExporter())
|
||||
meter_provider = MeterProvider(resource=resource, metric_readers=[metric_reader])
|
||||
metrics.set_meter_provider(meter_provider)
|
||||
|
||||
logger.info(
|
||||
f"Initialized OpenTelemetry provider with service.name={service_name}, span_processor={span_processor_type}"
|
||||
)
|
||||
|
||||
def fastapi_middleware(self, app: FastAPI):
|
||||
FastAPIInstrumentor.instrument_app(app)
|
||||
|
||||
def custom_trace(self,
|
||||
name: str,
|
||||
context: Context | None = None,
|
||||
kind: SpanKind = SpanKind.INTERNAL,
|
||||
attributes: Attributes = {},
|
||||
links: _Links = None,
|
||||
start_time: int | None = None,
|
||||
record_exception: bool = True,
|
||||
set_status_on_exception: bool = True) -> Span:
|
||||
"""
|
||||
Creates a custom tracing span using the Open Telemetry SDK.
|
||||
Instrument FastAPI with OTel for automatic tracing and metrics.
|
||||
|
||||
Captures telemetry for both regular and streaming HTTP requests:
|
||||
- Distributed traces (via FastAPIInstrumentor)
|
||||
- HTTP request metrics (count, duration, status)
|
||||
- Streaming-specific metrics (time-to-first-byte, total stream duration)
|
||||
"""
|
||||
tracer = trace.get_tracer(__name__)
|
||||
return tracer.start_span(name, context, kind, attributes, links, start_time, record_exception, set_status_on_exception)
|
||||
|
||||
# Create meter for HTTP metrics
|
||||
meter = metrics.get_meter("llama_stack.http.server")
|
||||
|
||||
def record_count(self, name: str, amount: int|float, context: Context | None = None, attributes: dict[str, str] | None = None, unit: str = "", description: str = ""):
|
||||
"""
|
||||
Increments a counter metric using the Open Telemetry SDK that are indexed by the meter name.
|
||||
This function is designed to be compatible with other popular telemetry providers design patterns,
|
||||
like Datadog and New Relic.
|
||||
"""
|
||||
meter = metrics.get_meter(__name__)
|
||||
# HTTP Metrics following OTel semantic conventions
|
||||
# https://opentelemetry.io/docs/specs/semconv/http/http-metrics/
|
||||
request_duration = meter.create_histogram(
|
||||
"http.server.request.duration",
|
||||
unit="ms",
|
||||
description="Duration of HTTP requests (time-to-first-byte for streaming)",
|
||||
)
|
||||
|
||||
with self._lock:
|
||||
if name not in self._counters:
|
||||
self._counters[name] = meter.create_counter(name, unit=unit, description=description)
|
||||
counter = self._counters[name]
|
||||
streaming_duration = meter.create_histogram(
|
||||
"http.server.streaming.duration",
|
||||
unit="ms",
|
||||
description="Total duration of streaming responses (from start to stream completion)",
|
||||
)
|
||||
|
||||
counter.add(amount, attributes=attributes, context=context)
|
||||
request_count = meter.create_counter(
|
||||
"http.server.request.count", unit="requests", description="Total number of HTTP requests"
|
||||
)
|
||||
|
||||
streaming_requests = meter.create_counter(
|
||||
"http.server.streaming.count", unit="requests", description="Number of streaming requests"
|
||||
)
|
||||
|
||||
def record_histogram(self, name: str, value: int|float, context: Context | None = None, attributes: dict[str, str] | None = None, unit: str = "", description: str = "", explicit_bucket_boundaries_advisory: Sequence[float] | None = None):
|
||||
"""
|
||||
Records a histogram metric using the Open Telemetry SDK that are indexed by the meter name.
|
||||
This function is designed to be compatible with other popular telemetry providers design patterns,
|
||||
like Datadog and New Relic.
|
||||
"""
|
||||
meter = metrics.get_meter(__name__)
|
||||
# Hook to enrich spans and record initial metrics
|
||||
def server_request_hook(span, scope):
|
||||
"""
|
||||
Called by FastAPIInstrumentor for each request.
|
||||
|
||||
with self._lock:
|
||||
if name not in self._histograms:
|
||||
self._histograms[name] = meter.create_histogram(name, unit=unit, description=description, explicit_bucket_boundaries_advisory=explicit_bucket_boundaries_advisory)
|
||||
histogram = self._histograms[name]
|
||||
This only reads from scope (ASGI dict), never touches request body.
|
||||
Safe to use without interfering with body parsing.
|
||||
"""
|
||||
method = scope.get("method", "UNKNOWN")
|
||||
path = scope.get("path", "/")
|
||||
|
||||
histogram.record(value, attributes=attributes, context=context)
|
||||
# Add custom attributes
|
||||
span.set_attribute("service.component", "llama-stack-api")
|
||||
span.set_attribute("http.request", path)
|
||||
span.set_attribute("http.method", method)
|
||||
|
||||
attributes = {
|
||||
"http.request": path,
|
||||
"http.method": method,
|
||||
"trace_id": span.attributes.get("trace_id", ""),
|
||||
"span_id": span.attributes.get("span_id", ""),
|
||||
}
|
||||
|
||||
def record_up_down_counter(self, name: str, value: int|float, context: Context | None = None, attributes: dict[str, str] | None = None, unit: str = "", description: str = ""):
|
||||
"""
|
||||
Records an up/down counter metric using the Open Telemetry SDK that are indexed by the meter name.
|
||||
This function is designed to be compatible with other popular telemetry providers design patterns,
|
||||
like Datadog and New Relic.
|
||||
"""
|
||||
meter = metrics.get_meter(__name__)
|
||||
request_count.add(1, attributes)
|
||||
logger.debug(f"server_request_hook: recorded request_count for {method} {path}, attributes={attributes}")
|
||||
|
||||
with self._lock:
|
||||
if name not in self._up_down_counters:
|
||||
self._up_down_counters[name] = meter.create_up_down_counter(name, unit=unit, description=description)
|
||||
up_down_counter = self._up_down_counters[name]
|
||||
# NOTE: This is called BEFORE routes are added to the app
|
||||
# FastAPIInstrumentor.instrument_app() patches build_middleware_stack(),
|
||||
# which will be called on first request (after routes are added), so hooks should work.
|
||||
logger.debug("Instrumenting FastAPI (routes will be added later)")
|
||||
FastAPIInstrumentor.instrument_app(
|
||||
app,
|
||||
server_request_hook=server_request_hook,
|
||||
)
|
||||
logger.debug(f"FastAPI instrumented: {getattr(app, '_is_instrumented_by_opentelemetry', False)}")
|
||||
|
||||
up_down_counter.add(value, attributes=attributes, context=context)
|
||||
# Add pure ASGI middleware for streaming metrics (always add, regardless of instrumentation)
|
||||
app.add_middleware(StreamingMetricsMiddleware)
|
||||
|
||||
# Add metrics span processor
|
||||
provider = trace.get_tracer_provider()
|
||||
logger.debug(f"TracerProvider: {provider}")
|
||||
if isinstance(provider, TracerProvider):
|
||||
metrics_exporter = MetricsSpanExporter(
|
||||
request_duration=request_duration,
|
||||
streaming_duration=streaming_duration,
|
||||
streaming_requests=streaming_requests,
|
||||
request_count=request_count,
|
||||
)
|
||||
provider.add_span_processor(BatchSpanProcessor(metrics_exporter))
|
||||
logger.debug("Added MetricsSpanExporter as BatchSpanProcessor")
|
||||
else:
|
||||
logger.warning(
|
||||
f"TracerProvider is not TracerProvider instance, it's {type(provider)}. MetricsSpanExporter not added."
|
||||
)
|
||||
|
|
|
|||
|
|
@ -26,4 +26,16 @@ def available_providers() -> list[ProviderSpec]:
|
|||
config_class="llama_stack.providers.inline.telemetry.meta_reference.config.TelemetryConfig",
|
||||
description="Meta's reference implementation of telemetry and observability using OpenTelemetry.",
|
||||
),
|
||||
InlineProviderSpec(
|
||||
api=Api.telemetry,
|
||||
provider_type="inline::otel",
|
||||
pip_packages=[
|
||||
"opentelemetry-sdk",
|
||||
"opentelemetry-exporter-otlp-proto-http",
|
||||
"opentelemetry-instrumentation-fastapi",
|
||||
],
|
||||
module="llama_stack.providers.inline.telemetry.otel",
|
||||
config_class="llama_stack.providers.inline.telemetry.otel.config.OTelTelemetryConfig",
|
||||
description="Native OpenTelemetry provider with full access to OTel Tracer and Meter APIs for advanced instrumentation.",
|
||||
),
|
||||
]
|
||||
|
|
|
|||
|
|
@ -52,6 +52,7 @@ dependencies = [
|
|||
"sqlalchemy[asyncio]>=2.0.41", # server - for conversations
|
||||
"opentelemetry-semantic-conventions>=0.57b0",
|
||||
"opentelemetry-instrumentation-fastapi>=0.57b0",
|
||||
"opentelemetry-instrumentation-sqlalchemy>=0.57b0",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
|
|
|
|||
5
tests/integration/telemetry/__init__.py
Normal file
5
tests/integration/telemetry/__init__.py
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
149
tests/integration/telemetry/mocking/README.md
Normal file
149
tests/integration/telemetry/mocking/README.md
Normal file
|
|
@ -0,0 +1,149 @@
|
|||
# Mock Server Infrastructure
|
||||
|
||||
This directory contains mock servers for E2E telemetry testing.
|
||||
|
||||
## Structure
|
||||
|
||||
```
|
||||
mocking/
|
||||
├── README.md ← You are here
|
||||
├── __init__.py ← Module exports
|
||||
├── mock_base.py ← Pydantic base class for all mocks
|
||||
├── servers.py ← Mock server implementations
|
||||
└── harness.py ← Async startup harness
|
||||
```
|
||||
|
||||
## Files
|
||||
|
||||
### `mock_base.py` - Base Class
|
||||
Pydantic base model that all mock servers must inherit from.
|
||||
|
||||
**Contract:**
|
||||
```python
|
||||
class MockServerBase(BaseModel):
|
||||
async def await_start(self):
|
||||
# Start server and wait until ready
|
||||
...
|
||||
|
||||
def stop(self):
|
||||
# Stop server and cleanup
|
||||
...
|
||||
```
|
||||
|
||||
### `servers.py` - Mock Implementations
|
||||
Contains:
|
||||
- **MockOTLPCollector** - Receives OTLP telemetry (port 4318)
|
||||
- **MockVLLMServer** - Simulates vLLM inference API (port 8000)
|
||||
|
||||
### `harness.py` - Startup Orchestration
|
||||
Provides:
|
||||
- **MockServerConfig** - Pydantic config for server registration
|
||||
- **start_mock_servers_async()** - Starts servers in parallel
|
||||
- **stop_mock_servers()** - Stops all servers
|
||||
|
||||
## Creating a New Mock Server
|
||||
|
||||
### Step 1: Implement the Server
|
||||
|
||||
Add to `servers.py`:
|
||||
```python
|
||||
class MockRedisServer(MockServerBase):
|
||||
"""Mock Redis server."""
|
||||
|
||||
port: int = Field(default=6379)
|
||||
|
||||
# Non-Pydantic fields
|
||||
server: Any = Field(default=None, exclude=True)
|
||||
|
||||
def model_post_init(self, __context):
|
||||
self.server = None
|
||||
|
||||
async def await_start(self):
|
||||
"""Start Redis mock and wait until ready."""
|
||||
# Start your server
|
||||
self.server = create_redis_server(self.port)
|
||||
self.server.start()
|
||||
|
||||
# Wait for port to be listening
|
||||
for _ in range(10):
|
||||
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
if sock.connect_ex(("localhost", self.port)) == 0:
|
||||
sock.close()
|
||||
return # Ready!
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
def stop(self):
|
||||
if self.server:
|
||||
self.server.stop()
|
||||
```
|
||||
|
||||
### Step 2: Register in Test
|
||||
|
||||
In `test_otel_e2e.py`, add to MOCK_SERVERS list:
|
||||
```python
|
||||
MOCK_SERVERS = [
|
||||
# ... existing servers ...
|
||||
MockServerConfig(
|
||||
name="Mock Redis",
|
||||
server_class=MockRedisServer,
|
||||
init_kwargs={"port": 6379},
|
||||
),
|
||||
]
|
||||
```
|
||||
|
||||
### Step 3: Done!
|
||||
|
||||
The harness automatically:
|
||||
- Creates the server instance
|
||||
- Calls `await_start()` in parallel with other servers
|
||||
- Returns when all are ready
|
||||
- Stops all servers on teardown
|
||||
|
||||
## Benefits
|
||||
|
||||
✅ **Parallel Startup** - All servers start simultaneously
|
||||
✅ **Type-Safe** - Pydantic validation
|
||||
✅ **Simple** - Just implement 2 methods
|
||||
✅ **Fast** - No HTTP polling, direct port checking
|
||||
✅ **Clean** - Async/await pattern
|
||||
|
||||
## Usage in Tests
|
||||
|
||||
```python
|
||||
@pytest.fixture(scope="module")
|
||||
def mock_servers():
|
||||
servers = asyncio.run(start_mock_servers_async(MOCK_SERVERS))
|
||||
yield servers
|
||||
stop_mock_servers(servers)
|
||||
|
||||
|
||||
# Access specific servers
|
||||
@pytest.fixture(scope="module")
|
||||
def mock_redis(mock_servers):
|
||||
return mock_servers["Mock Redis"]
|
||||
```
|
||||
|
||||
## Key Design Decisions
|
||||
|
||||
### Why Pydantic?
|
||||
- Type safety for server configuration
|
||||
- Built-in validation
|
||||
- Clear interface contract
|
||||
|
||||
### Why `await_start()` instead of HTTP `/ready`?
|
||||
- Faster (no HTTP round-trip)
|
||||
- Simpler (direct port checking)
|
||||
- More reliable (internal state, not external endpoint)
|
||||
|
||||
### Why separate harness?
|
||||
- Reusable across different test files
|
||||
- Easy to add new servers
|
||||
- Centralized error handling
|
||||
|
||||
## Examples
|
||||
|
||||
See `test_otel_e2e.py` for real-world usage:
|
||||
- Line ~200: MOCK_SERVERS configuration
|
||||
- Line ~230: Convenience fixtures
|
||||
- Line ~240: Using servers in tests
|
||||
|
||||
28
tests/integration/telemetry/mocking/__init__.py
Normal file
28
tests/integration/telemetry/mocking/__init__.py
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
"""
|
||||
Mock server infrastructure for telemetry E2E testing.
|
||||
|
||||
This module provides:
|
||||
- MockServerBase: Pydantic base class for all mock servers
|
||||
- MockOTLPCollector: Mock OTLP telemetry collector
|
||||
- MockVLLMServer: Mock vLLM inference server
|
||||
- Mock server harness for parallel async startup
|
||||
"""
|
||||
|
||||
from .harness import MockServerConfig, start_mock_servers_async, stop_mock_servers
|
||||
from .mock_base import MockServerBase
|
||||
from .servers import MockOTLPCollector, MockVLLMServer
|
||||
|
||||
__all__ = [
|
||||
"MockServerBase",
|
||||
"MockOTLPCollector",
|
||||
"MockVLLMServer",
|
||||
"MockServerConfig",
|
||||
"start_mock_servers_async",
|
||||
"stop_mock_servers",
|
||||
]
|
||||
106
tests/integration/telemetry/mocking/harness.py
Normal file
106
tests/integration/telemetry/mocking/harness.py
Normal file
|
|
@ -0,0 +1,106 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
"""
|
||||
Mock server startup harness for parallel initialization.
|
||||
|
||||
HOW TO ADD A NEW MOCK SERVER:
|
||||
1. Import your mock server class
|
||||
2. Add it to MOCK_SERVERS list with configuration
|
||||
3. Done! It will start in parallel with others.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from .mock_base import MockServerBase
|
||||
|
||||
|
||||
class MockServerConfig(BaseModel):
|
||||
"""
|
||||
Configuration for a mock server to start.
|
||||
|
||||
**TO ADD A NEW MOCK SERVER:**
|
||||
Just create a MockServerConfig instance with your server class.
|
||||
|
||||
Example:
|
||||
MockServerConfig(
|
||||
name="Mock MyService",
|
||||
server_class=MockMyService,
|
||||
init_kwargs={"port": 9000, "config_param": "value"},
|
||||
)
|
||||
"""
|
||||
|
||||
model_config = {"arbitrary_types_allowed": True}
|
||||
|
||||
name: str = Field(description="Display name for logging")
|
||||
server_class: type = Field(description="Mock server class (must inherit from MockServerBase)")
|
||||
init_kwargs: dict[str, Any] = Field(default_factory=dict, description="Kwargs to pass to server constructor")
|
||||
|
||||
|
||||
async def start_mock_servers_async(mock_servers_config: list[MockServerConfig]) -> dict[str, MockServerBase]:
|
||||
"""
|
||||
Start all mock servers in parallel and wait for them to be ready.
|
||||
|
||||
**HOW IT WORKS:**
|
||||
1. Creates all server instances
|
||||
2. Calls await_start() on all servers in parallel
|
||||
3. Returns when all are ready
|
||||
|
||||
**SIMPLE TO USE:**
|
||||
servers = await start_mock_servers_async([config1, config2, ...])
|
||||
|
||||
Args:
|
||||
mock_servers_config: List of mock server configurations
|
||||
|
||||
Returns:
|
||||
Dict mapping server name to server instance
|
||||
"""
|
||||
servers = {}
|
||||
start_tasks = []
|
||||
|
||||
# Create all servers and prepare start tasks
|
||||
for config in mock_servers_config:
|
||||
server = config.server_class(**config.init_kwargs)
|
||||
servers[config.name] = server
|
||||
start_tasks.append(server.await_start())
|
||||
|
||||
# Start all servers in parallel
|
||||
try:
|
||||
await asyncio.gather(*start_tasks)
|
||||
|
||||
# Print readiness confirmation
|
||||
for name in servers.keys():
|
||||
print(f"[INFO] {name} ready")
|
||||
|
||||
except Exception as e:
|
||||
# If any server fails, stop all servers
|
||||
for server in servers.values():
|
||||
try:
|
||||
server.stop()
|
||||
except Exception:
|
||||
pass
|
||||
raise RuntimeError(f"Failed to start mock servers: {e}") from None
|
||||
|
||||
return servers
|
||||
|
||||
|
||||
def stop_mock_servers(servers: dict[str, Any]):
|
||||
"""
|
||||
Stop all mock servers.
|
||||
|
||||
Args:
|
||||
servers: Dict of server instances from start_mock_servers_async()
|
||||
"""
|
||||
for name, server in servers.items():
|
||||
try:
|
||||
if hasattr(server, "get_request_count"):
|
||||
print(f"\n[INFO] {name} received {server.get_request_count()} requests")
|
||||
server.stop()
|
||||
except Exception as e:
|
||||
print(f"[WARN] Error stopping {name}: {e}")
|
||||
68
tests/integration/telemetry/mocking/mock_base.py
Normal file
68
tests/integration/telemetry/mocking/mock_base.py
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
"""
|
||||
Base class for mock servers with async startup support.
|
||||
|
||||
All mock servers should inherit from MockServerBase and implement await_start().
|
||||
"""
|
||||
|
||||
from abc import abstractmethod
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class MockServerBase(BaseModel):
|
||||
"""
|
||||
Pydantic base model for mock servers.
|
||||
|
||||
**TO CREATE A NEW MOCK SERVER:**
|
||||
1. Inherit from this class
|
||||
2. Implement async def await_start(self)
|
||||
3. Implement def stop(self)
|
||||
4. Done!
|
||||
|
||||
Example:
|
||||
class MyMockServer(MockServerBase):
|
||||
port: int = 8080
|
||||
|
||||
async def await_start(self):
|
||||
# Start your server
|
||||
self.server = create_server()
|
||||
self.server.start()
|
||||
# Wait until ready (can check internal state, no HTTP needed)
|
||||
while not self.server.is_listening():
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
def stop(self):
|
||||
if self.server:
|
||||
self.server.stop()
|
||||
"""
|
||||
|
||||
model_config = {"arbitrary_types_allowed": True}
|
||||
|
||||
@abstractmethod
|
||||
async def await_start(self):
|
||||
"""
|
||||
Start the server and wait until it's ready.
|
||||
|
||||
This method should:
|
||||
1. Start the server (synchronous or async)
|
||||
2. Wait until the server is fully ready to accept requests
|
||||
3. Return when ready
|
||||
|
||||
Subclasses can check internal state directly - no HTTP polling needed!
|
||||
"""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def stop(self):
|
||||
"""
|
||||
Stop the server and clean up resources.
|
||||
|
||||
This method should gracefully shut down the server.
|
||||
"""
|
||||
...
|
||||
600
tests/integration/telemetry/mocking/servers.py
Normal file
600
tests/integration/telemetry/mocking/servers.py
Normal file
|
|
@ -0,0 +1,600 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
"""
|
||||
Mock servers for OpenTelemetry E2E testing.
|
||||
|
||||
This module provides mock servers for testing telemetry:
|
||||
- MockOTLPCollector: Receives and stores OTLP telemetry exports
|
||||
- MockVLLMServer: Simulates vLLM inference API with valid OpenAI responses
|
||||
|
||||
These mocks allow E2E testing without external dependencies.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import http.server
|
||||
import json
|
||||
import socket
|
||||
import threading
|
||||
import time
|
||||
from collections import defaultdict
|
||||
from typing import Any
|
||||
|
||||
from opentelemetry.proto.collector.metrics.v1.metrics_service_pb2 import (
|
||||
ExportMetricsServiceRequest,
|
||||
)
|
||||
from opentelemetry.proto.collector.trace.v1.trace_service_pb2 import (
|
||||
ExportTraceServiceRequest,
|
||||
)
|
||||
from pydantic import Field
|
||||
|
||||
from .mock_base import MockServerBase
|
||||
|
||||
|
||||
class MockOTLPCollector(MockServerBase):
|
||||
"""
|
||||
Mock OTLP collector HTTP server.
|
||||
|
||||
Receives real OTLP exports from Llama Stack and stores them for verification.
|
||||
Runs on localhost:4318 (standard OTLP HTTP port).
|
||||
|
||||
Usage:
|
||||
collector = MockOTLPCollector()
|
||||
await collector.await_start()
|
||||
# ... run tests ...
|
||||
print(f"Received {collector.get_trace_count()} traces")
|
||||
collector.stop()
|
||||
"""
|
||||
|
||||
port: int = Field(default=4318, description="Port to run collector on")
|
||||
|
||||
# Non-Pydantic fields (set after initialization)
|
||||
traces: list[dict] = Field(default_factory=list, exclude=True)
|
||||
metrics: list[dict] = Field(default_factory=list, exclude=True)
|
||||
all_http_requests: list[dict] = Field(default_factory=list, exclude=True) # Track ALL HTTP requests for debugging
|
||||
server: Any = Field(default=None, exclude=True)
|
||||
server_thread: Any = Field(default=None, exclude=True)
|
||||
|
||||
def model_post_init(self, __context):
|
||||
"""Initialize after Pydantic validation."""
|
||||
self.traces = []
|
||||
self.metrics = []
|
||||
self.server = None
|
||||
self.server_thread = None
|
||||
|
||||
def _create_handler_class(self):
|
||||
"""Create the HTTP handler class for this collector instance."""
|
||||
collector_self = self
|
||||
|
||||
class OTLPHandler(http.server.BaseHTTPRequestHandler):
|
||||
"""HTTP request handler for OTLP requests."""
|
||||
|
||||
def log_message(self, format, *args):
|
||||
"""Suppress HTTP server logs."""
|
||||
pass
|
||||
|
||||
def do_GET(self): # noqa: N802
|
||||
"""Handle GET requests."""
|
||||
# No readiness endpoint needed - using await_start() instead
|
||||
self.send_response(404)
|
||||
self.end_headers()
|
||||
|
||||
def do_POST(self): # noqa: N802
|
||||
"""Handle OTLP POST requests."""
|
||||
content_length = int(self.headers.get("Content-Length", 0))
|
||||
body = self.rfile.read(content_length) if content_length > 0 else b""
|
||||
|
||||
# Track ALL requests for debugging
|
||||
collector_self.all_http_requests.append(
|
||||
{
|
||||
"method": "POST",
|
||||
"path": self.path,
|
||||
"timestamp": time.time(),
|
||||
"body_length": len(body),
|
||||
}
|
||||
)
|
||||
|
||||
# Store the export request
|
||||
if "/v1/traces" in self.path:
|
||||
collector_self.traces.append(
|
||||
{
|
||||
"body": body,
|
||||
"timestamp": time.time(),
|
||||
}
|
||||
)
|
||||
elif "/v1/metrics" in self.path:
|
||||
collector_self.metrics.append(
|
||||
{
|
||||
"body": body,
|
||||
"timestamp": time.time(),
|
||||
}
|
||||
)
|
||||
|
||||
# Always return success (200 OK)
|
||||
self.send_response(200)
|
||||
self.send_header("Content-Type", "application/json")
|
||||
self.end_headers()
|
||||
self.wfile.write(b"{}")
|
||||
|
||||
return OTLPHandler
|
||||
|
||||
async def await_start(self):
|
||||
"""
|
||||
Start the OTLP collector and wait until ready.
|
||||
|
||||
This method is async and can be awaited to ensure the server is ready.
|
||||
"""
|
||||
# Create handler and start the HTTP server
|
||||
handler_class = self._create_handler_class()
|
||||
self.server = http.server.HTTPServer(("localhost", self.port), handler_class)
|
||||
self.server_thread = threading.Thread(target=self.server.serve_forever, daemon=True)
|
||||
self.server_thread.start()
|
||||
|
||||
# Wait for server to be listening on the port
|
||||
for _ in range(10):
|
||||
try:
|
||||
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
result = sock.connect_ex(("localhost", self.port))
|
||||
sock.close()
|
||||
if result == 0:
|
||||
# Port is listening
|
||||
return
|
||||
except Exception:
|
||||
pass
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
raise RuntimeError(f"OTLP collector failed to start on port {self.port}")
|
||||
|
||||
def stop(self):
|
||||
"""Stop the OTLP collector server."""
|
||||
if self.server:
|
||||
self.server.shutdown()
|
||||
self.server.server_close()
|
||||
|
||||
def clear(self):
|
||||
"""Clear all captured telemetry data."""
|
||||
self.traces = []
|
||||
self.metrics = []
|
||||
|
||||
def get_trace_count(self) -> int:
|
||||
"""Get number of trace export requests received."""
|
||||
return len(self.traces)
|
||||
|
||||
def get_metric_count(self) -> int:
|
||||
"""Get number of metric export requests received."""
|
||||
return len(self.metrics)
|
||||
|
||||
def get_all_traces(self) -> list[dict]:
|
||||
"""Get all captured trace exports."""
|
||||
return self.traces
|
||||
|
||||
def get_all_metrics(self) -> list[dict]:
|
||||
"""Get all captured metric exports."""
|
||||
return self.metrics
|
||||
|
||||
# -----------------------------
|
||||
# Trace parsing helpers
|
||||
# -----------------------------
|
||||
def parse_traces(self) -> dict[str, list[dict]]:
|
||||
"""
|
||||
Parse protobuf trace data and return spans grouped by trace ID.
|
||||
|
||||
Returns:
|
||||
Dict mapping trace_id (hex) -> list of span dicts
|
||||
"""
|
||||
trace_id_to_spans: dict[str, list[dict]] = {}
|
||||
|
||||
for export in self.traces:
|
||||
request = ExportTraceServiceRequest()
|
||||
body = export.get("body", b"")
|
||||
try:
|
||||
request.ParseFromString(body)
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Failed to parse OTLP traces export (len={len(body)}): {e}") from e
|
||||
|
||||
for resource_span in request.resource_spans:
|
||||
for scope_span in resource_span.scope_spans:
|
||||
for span in scope_span.spans:
|
||||
# span.trace_id is bytes; convert to hex string
|
||||
trace_id = (
|
||||
span.trace_id.hex() if isinstance(span.trace_id, bytes | bytearray) else str(span.trace_id)
|
||||
)
|
||||
span_entry = {
|
||||
"name": span.name,
|
||||
"span_id": span.span_id.hex()
|
||||
if isinstance(span.span_id, bytes | bytearray)
|
||||
else str(span.span_id),
|
||||
"start_time_unix_nano": int(getattr(span, "start_time_unix_nano", 0)),
|
||||
"end_time_unix_nano": int(getattr(span, "end_time_unix_nano", 0)),
|
||||
}
|
||||
trace_id_to_spans.setdefault(trace_id, []).append(span_entry)
|
||||
|
||||
return trace_id_to_spans
|
||||
|
||||
def get_all_trace_ids(self) -> set[str]:
|
||||
"""Return set of all trace IDs seen so far."""
|
||||
return set(self.parse_traces().keys())
|
||||
|
||||
def get_trace_span_counts(self) -> dict[str, int]:
|
||||
"""Return span counts per trace ID."""
|
||||
grouped = self.parse_traces()
|
||||
return {tid: len(spans) for tid, spans in grouped.items()}
|
||||
|
||||
def get_new_trace_ids(self, prior_ids: set[str]) -> set[str]:
|
||||
"""Return trace IDs that appeared after prior_ids snapshot."""
|
||||
return self.get_all_trace_ids() - set(prior_ids)
|
||||
|
||||
def parse_metrics(self) -> dict[str, list[Any]]:
|
||||
"""
|
||||
Parse protobuf metric data and return metrics by name.
|
||||
|
||||
Returns:
|
||||
Dict mapping metric names to list of metric data points
|
||||
"""
|
||||
metrics_by_name = defaultdict(list)
|
||||
|
||||
for export in self.metrics:
|
||||
# Parse the protobuf body
|
||||
request = ExportMetricsServiceRequest()
|
||||
body = export.get("body", b"")
|
||||
try:
|
||||
request.ParseFromString(body)
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Failed to parse OTLP metrics export (len={len(body)}): {e}") from e
|
||||
|
||||
# Extract metrics from the request
|
||||
for resource_metric in request.resource_metrics:
|
||||
for scope_metric in resource_metric.scope_metrics:
|
||||
for metric in scope_metric.metrics:
|
||||
metric_name = metric.name
|
||||
|
||||
# Extract data points based on metric type
|
||||
data_points = []
|
||||
if metric.HasField("gauge"):
|
||||
data_points = list(metric.gauge.data_points)
|
||||
elif metric.HasField("sum"):
|
||||
data_points = list(metric.sum.data_points)
|
||||
elif metric.HasField("histogram"):
|
||||
data_points = list(metric.histogram.data_points)
|
||||
elif metric.HasField("summary"):
|
||||
data_points = list(metric.summary.data_points)
|
||||
|
||||
metrics_by_name[metric_name].extend(data_points)
|
||||
|
||||
return dict(metrics_by_name)
|
||||
|
||||
def get_metric_by_name(self, metric_name: str) -> list[Any]:
|
||||
"""
|
||||
Get all data points for a specific metric by name.
|
||||
|
||||
Args:
|
||||
metric_name: The name of the metric to retrieve
|
||||
|
||||
Returns:
|
||||
List of data points for the metric, or empty list if not found
|
||||
"""
|
||||
metrics = self.parse_metrics()
|
||||
return metrics.get(metric_name, [])
|
||||
|
||||
def has_metric(self, metric_name: str) -> bool:
|
||||
"""
|
||||
Check if a metric with the given name has been captured.
|
||||
|
||||
Args:
|
||||
metric_name: The name of the metric to check
|
||||
|
||||
Returns:
|
||||
True if the metric exists and has data points, False otherwise
|
||||
"""
|
||||
data_points = self.get_metric_by_name(metric_name)
|
||||
return len(data_points) > 0
|
||||
|
||||
def get_all_metric_names(self) -> list[str]:
|
||||
"""
|
||||
Get all unique metric names that have been captured.
|
||||
|
||||
Returns:
|
||||
List of metric names
|
||||
"""
|
||||
return list(self.parse_metrics().keys())
|
||||
|
||||
|
||||
class MockVLLMServer(MockServerBase):
|
||||
"""
|
||||
Mock vLLM inference server with OpenAI-compatible API.
|
||||
|
||||
Returns valid OpenAI Python client response objects for:
|
||||
- Chat completions (/v1/chat/completions)
|
||||
- Text completions (/v1/completions)
|
||||
- Model listing (/v1/models)
|
||||
|
||||
Runs on localhost:8000 (standard vLLM port).
|
||||
|
||||
Usage:
|
||||
server = MockVLLMServer(models=["my-model"])
|
||||
await server.await_start()
|
||||
# ... make inference calls ...
|
||||
print(f"Handled {server.get_request_count()} requests")
|
||||
server.stop()
|
||||
"""
|
||||
|
||||
port: int = Field(default=8000, description="Port to run server on")
|
||||
models: list[str] = Field(
|
||||
default_factory=lambda: ["meta-llama/Llama-3.2-1B-Instruct"], description="List of model IDs to serve"
|
||||
)
|
||||
|
||||
# Non-Pydantic fields
|
||||
requests_received: list[dict] = Field(default_factory=list, exclude=True)
|
||||
server: Any = Field(default=None, exclude=True)
|
||||
server_thread: Any = Field(default=None, exclude=True)
|
||||
|
||||
def model_post_init(self, __context):
|
||||
"""Initialize after Pydantic validation."""
|
||||
self.requests_received = []
|
||||
self.server = None
|
||||
self.server_thread = None
|
||||
|
||||
def _create_handler_class(self):
|
||||
"""Create the HTTP handler class for this vLLM instance."""
|
||||
server_self = self
|
||||
|
||||
class VLLMHandler(http.server.BaseHTTPRequestHandler):
|
||||
"""HTTP request handler for vLLM API."""
|
||||
|
||||
def log_message(self, format, *args):
|
||||
"""Suppress HTTP server logs."""
|
||||
pass
|
||||
|
||||
def log_request(self, code="-", size="-"):
|
||||
"""Log incoming requests for debugging."""
|
||||
print(f"[DEBUG] Mock vLLM received: {self.command} {self.path} -> {code}")
|
||||
|
||||
def do_GET(self): # noqa: N802
|
||||
"""Handle GET requests (models list, health check)."""
|
||||
# Log GET requests too
|
||||
server_self.requests_received.append(
|
||||
{
|
||||
"path": self.path,
|
||||
"method": "GET",
|
||||
"timestamp": time.time(),
|
||||
}
|
||||
)
|
||||
|
||||
if self.path == "/v1/models":
|
||||
response = self._create_models_list_response()
|
||||
self._send_json_response(200, response)
|
||||
|
||||
elif self.path == "/health" or self.path == "/v1/health":
|
||||
self._send_json_response(200, {"status": "healthy"})
|
||||
|
||||
else:
|
||||
self.send_response(404)
|
||||
self.end_headers()
|
||||
|
||||
def do_POST(self): # noqa: N802
|
||||
"""Handle POST requests (chat/text completions)."""
|
||||
content_length = int(self.headers.get("Content-Length", 0))
|
||||
body = self.rfile.read(content_length) if content_length > 0 else b"{}"
|
||||
|
||||
try:
|
||||
request_data = json.loads(body)
|
||||
except Exception:
|
||||
request_data = {}
|
||||
|
||||
# Log the request
|
||||
server_self.requests_received.append(
|
||||
{
|
||||
"path": self.path,
|
||||
"body": request_data,
|
||||
"timestamp": time.time(),
|
||||
}
|
||||
)
|
||||
|
||||
# Route to appropriate handler
|
||||
if "/chat/completions" in self.path:
|
||||
response = self._create_chat_completion_response(request_data)
|
||||
if response is not None: # None means already sent (streaming)
|
||||
self._send_json_response(200, response)
|
||||
|
||||
elif "/completions" in self.path:
|
||||
response = self._create_text_completion_response(request_data)
|
||||
self._send_json_response(200, response)
|
||||
|
||||
else:
|
||||
self._send_json_response(200, {"status": "ok"})
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
# Response Generators
|
||||
# **TO MODIFY RESPONSES:** Edit these methods
|
||||
# ----------------------------------------------------------------
|
||||
|
||||
def _create_models_list_response(self) -> dict:
|
||||
"""Create OpenAI models list response with configured models."""
|
||||
return {
|
||||
"object": "list",
|
||||
"data": [
|
||||
{
|
||||
"id": model_id,
|
||||
"object": "model",
|
||||
"created": int(time.time()),
|
||||
"owned_by": "meta",
|
||||
}
|
||||
for model_id in server_self.models
|
||||
],
|
||||
}
|
||||
|
||||
def _create_chat_completion_response(self, request_data: dict) -> dict | None:
|
||||
"""
|
||||
Create OpenAI ChatCompletion response.
|
||||
|
||||
Returns a valid response matching openai.types.ChatCompletion.
|
||||
Supports both regular and streaming responses.
|
||||
Returns None for streaming responses (already sent via SSE).
|
||||
"""
|
||||
# Check if streaming is requested
|
||||
is_streaming = request_data.get("stream", False)
|
||||
|
||||
if is_streaming:
|
||||
# Return SSE streaming response
|
||||
self.send_response(200)
|
||||
self.send_header("Content-Type", "text/event-stream")
|
||||
self.send_header("Cache-Control", "no-cache")
|
||||
self.send_header("Connection", "keep-alive")
|
||||
self.end_headers()
|
||||
|
||||
# Send streaming chunks
|
||||
chunks = [
|
||||
{
|
||||
"id": "chatcmpl-test",
|
||||
"object": "chat.completion.chunk",
|
||||
"created": int(time.time()),
|
||||
"model": request_data.get("model", "test"),
|
||||
"choices": [{"index": 0, "delta": {"role": "assistant"}, "finish_reason": None}],
|
||||
},
|
||||
{
|
||||
"id": "chatcmpl-test",
|
||||
"object": "chat.completion.chunk",
|
||||
"created": int(time.time()),
|
||||
"model": request_data.get("model", "test"),
|
||||
"choices": [{"index": 0, "delta": {"content": "Test "}, "finish_reason": None}],
|
||||
},
|
||||
{
|
||||
"id": "chatcmpl-test",
|
||||
"object": "chat.completion.chunk",
|
||||
"created": int(time.time()),
|
||||
"model": request_data.get("model", "test"),
|
||||
"choices": [{"index": 0, "delta": {"content": "streaming "}, "finish_reason": None}],
|
||||
},
|
||||
{
|
||||
"id": "chatcmpl-test",
|
||||
"object": "chat.completion.chunk",
|
||||
"created": int(time.time()),
|
||||
"model": request_data.get("model", "test"),
|
||||
"choices": [{"index": 0, "delta": {"content": "response"}, "finish_reason": None}],
|
||||
},
|
||||
{
|
||||
"id": "chatcmpl-test",
|
||||
"object": "chat.completion.chunk",
|
||||
"created": int(time.time()),
|
||||
"model": request_data.get("model", "test"),
|
||||
"choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
|
||||
},
|
||||
]
|
||||
|
||||
for chunk in chunks:
|
||||
self.wfile.write(f"data: {json.dumps(chunk)}\n\n".encode())
|
||||
self.wfile.write(b"data: [DONE]\n\n")
|
||||
return None # Already sent response
|
||||
|
||||
# Regular response
|
||||
return {
|
||||
"id": "chatcmpl-test123",
|
||||
"object": "chat.completion",
|
||||
"created": int(time.time()),
|
||||
"model": request_data.get("model", "meta-llama/Llama-3.2-1B-Instruct"),
|
||||
"choices": [
|
||||
{
|
||||
"index": 0,
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": "This is a test response from mock vLLM server.",
|
||||
"tool_calls": None,
|
||||
},
|
||||
"logprobs": None,
|
||||
"finish_reason": "stop",
|
||||
}
|
||||
],
|
||||
"usage": {
|
||||
"prompt_tokens": 25,
|
||||
"completion_tokens": 15,
|
||||
"total_tokens": 40,
|
||||
"completion_tokens_details": None,
|
||||
},
|
||||
"system_fingerprint": None,
|
||||
"service_tier": None,
|
||||
}
|
||||
|
||||
def _create_text_completion_response(self, request_data: dict) -> dict:
|
||||
"""
|
||||
Create OpenAI Completion response.
|
||||
|
||||
Returns a valid response matching openai.types.Completion
|
||||
"""
|
||||
return {
|
||||
"id": "cmpl-test123",
|
||||
"object": "text_completion",
|
||||
"created": int(time.time()),
|
||||
"model": request_data.get("model", "meta-llama/Llama-3.2-1B-Instruct"),
|
||||
"choices": [
|
||||
{
|
||||
"text": "This is a test completion.",
|
||||
"index": 0,
|
||||
"logprobs": None,
|
||||
"finish_reason": "stop",
|
||||
}
|
||||
],
|
||||
"usage": {
|
||||
"prompt_tokens": 10,
|
||||
"completion_tokens": 8,
|
||||
"total_tokens": 18,
|
||||
"completion_tokens_details": None,
|
||||
},
|
||||
"system_fingerprint": None,
|
||||
}
|
||||
|
||||
def _send_json_response(self, status_code: int, data: dict):
|
||||
"""Helper to send JSON response."""
|
||||
self.send_response(status_code)
|
||||
self.send_header("Content-Type", "application/json")
|
||||
self.end_headers()
|
||||
self.wfile.write(json.dumps(data).encode())
|
||||
|
||||
return VLLMHandler
|
||||
|
||||
async def await_start(self):
|
||||
"""
|
||||
Start the vLLM server and wait until ready.
|
||||
|
||||
This method is async and can be awaited to ensure the server is ready.
|
||||
"""
|
||||
# Create handler and start the HTTP server
|
||||
handler_class = self._create_handler_class()
|
||||
self.server = http.server.HTTPServer(("localhost", self.port), handler_class)
|
||||
self.server_thread = threading.Thread(target=self.server.serve_forever, daemon=True)
|
||||
self.server_thread.start()
|
||||
|
||||
# Wait for server to be listening on the port
|
||||
for _ in range(10):
|
||||
try:
|
||||
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
result = sock.connect_ex(("localhost", self.port))
|
||||
sock.close()
|
||||
if result == 0:
|
||||
# Port is listening
|
||||
return
|
||||
except Exception:
|
||||
pass
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
raise RuntimeError(f"vLLM server failed to start on port {self.port}")
|
||||
|
||||
def stop(self):
|
||||
"""Stop the vLLM server."""
|
||||
if self.server:
|
||||
self.server.shutdown()
|
||||
self.server.server_close()
|
||||
|
||||
def clear(self):
|
||||
"""Clear request history."""
|
||||
self.requests_received = []
|
||||
|
||||
def get_request_count(self) -> int:
|
||||
"""Get number of requests received."""
|
||||
return len(self.requests_received)
|
||||
|
||||
def get_all_requests(self) -> list[dict]:
|
||||
"""Get all received requests with their bodies."""
|
||||
return self.requests_received
|
||||
622
tests/integration/telemetry/test_otel_e2e.py
Normal file
622
tests/integration/telemetry/test_otel_e2e.py
Normal file
|
|
@ -0,0 +1,622 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
"""
|
||||
End-to-end tests for the OpenTelemetry inline provider.
|
||||
|
||||
What this does:
|
||||
- Boots mock OTLP and mock vLLM
|
||||
- Starts a real Llama Stack with inline OTel
|
||||
- Calls real HTTP APIs
|
||||
- Verifies traces, metrics, and custom metric names (non-empty)
|
||||
"""
|
||||
|
||||
# ============================================================================
|
||||
# IMPORTS
|
||||
# ============================================================================
|
||||
|
||||
import os
|
||||
import socket
|
||||
import subprocess
|
||||
import time
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
import yaml
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
# Mock servers are in the mocking/ subdirectory
|
||||
from .mocking import (
|
||||
MockOTLPCollector,
|
||||
MockServerConfig,
|
||||
MockVLLMServer,
|
||||
start_mock_servers_async,
|
||||
stop_mock_servers,
|
||||
)
|
||||
|
||||
# ============================================================================
|
||||
# DATA MODELS
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class TelemetryTestCase(BaseModel):
|
||||
"""
|
||||
Pydantic model defining expected telemetry for an API call.
|
||||
|
||||
**TO ADD A NEW TEST CASE:** Add to TEST_CASES list below.
|
||||
"""
|
||||
|
||||
name: str = Field(description="Unique test case identifier")
|
||||
http_method: str = Field(description="HTTP method (GET, POST, etc.)")
|
||||
api_path: str = Field(description="API path (e.g., '/v1/models')")
|
||||
request_body: dict[str, Any] | None = Field(default=None)
|
||||
expected_http_status: int = Field(default=200)
|
||||
expected_trace_exports: int = Field(default=1, description="Minimum number of trace exports expected")
|
||||
expected_metric_exports: int = Field(default=0, description="Minimum number of metric exports expected")
|
||||
should_have_error_span: bool = Field(default=False)
|
||||
expected_metrics: list[str] = Field(
|
||||
default_factory=list, description="List of metric names that should be captured"
|
||||
)
|
||||
expected_min_spans: int | None = Field(
|
||||
default=None, description="If set, minimum number of spans expected in the new trace(s) generated by this test"
|
||||
)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# TEST CONFIGURATION
|
||||
# **TO ADD NEW TESTS:** Add TelemetryTestCase instances here
|
||||
# ============================================================================
|
||||
|
||||
# Custom metric names (defined in llama_stack/providers/inline/telemetry/otel/otel.py)
|
||||
|
||||
CUSTOM_METRICS_BASE = [
|
||||
"http.server.request.duration",
|
||||
"http.server.request.count",
|
||||
]
|
||||
|
||||
CUSTOM_METRICS_STREAMING = [
|
||||
"http.server.streaming.duration",
|
||||
"http.server.streaming.count",
|
||||
]
|
||||
|
||||
TEST_CASES = [
|
||||
TelemetryTestCase(
|
||||
name="models_list",
|
||||
http_method="GET",
|
||||
api_path="/v1/models",
|
||||
expected_trace_exports=1, # Single trace with 2-3 spans (GET, http send)
|
||||
expected_metric_exports=1, # Metrics export periodically, but we'll wait for them
|
||||
expected_metrics=[], # First request: middleware may not be initialized yet
|
||||
expected_min_spans=2,
|
||||
),
|
||||
TelemetryTestCase(
|
||||
name="chat_completion",
|
||||
http_method="POST",
|
||||
api_path="/v1/chat/completions",
|
||||
request_body={
|
||||
"model": "meta-llama/Llama-3.2-1B-Instruct",
|
||||
"messages": [{"role": "user", "content": "Hello!"}],
|
||||
},
|
||||
expected_trace_exports=1, # Single trace with 4 spans (POST, http receive, 2x http send)
|
||||
expected_metric_exports=1, # Metrics export periodically
|
||||
expected_metrics=CUSTOM_METRICS_BASE,
|
||||
expected_min_spans=3,
|
||||
),
|
||||
TelemetryTestCase(
|
||||
name="chat_completion_streaming",
|
||||
http_method="POST",
|
||||
api_path="/v1/chat/completions",
|
||||
request_body={
|
||||
"model": "meta-llama/Llama-3.2-1B-Instruct",
|
||||
"messages": [{"role": "user", "content": "Streaming test"}],
|
||||
"stream": True, # Enable streaming response
|
||||
},
|
||||
expected_trace_exports=1, # Single trace with streaming spans
|
||||
expected_metric_exports=1, # Metrics export periodically
|
||||
# Validate both base and streaming metrics with polling
|
||||
expected_metrics=CUSTOM_METRICS_BASE + CUSTOM_METRICS_STREAMING,
|
||||
expected_min_spans=4,
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# TEST INFRASTRUCTURE
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class TelemetryTestRunner:
|
||||
"""
|
||||
Executes TelemetryTestCase instances against real Llama Stack.
|
||||
|
||||
**HOW IT WORKS:**
|
||||
1. Makes real HTTP request to the stack
|
||||
2. Waits for telemetry export
|
||||
3. Verifies exports were sent to mock collector
|
||||
4. Validates custom metrics by name (if expected_metrics is specified)
|
||||
5. Ensures metrics have non-empty data points
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
base_url: str,
|
||||
collector: MockOTLPCollector,
|
||||
poll_timeout_seconds: float = 8.0,
|
||||
poll_interval_seconds: float = 0.1,
|
||||
):
|
||||
self.base_url = base_url
|
||||
self.collector = collector
|
||||
self.poll_timeout_seconds = poll_timeout_seconds # how long to wait for telemetry to be exported
|
||||
self.poll_interval_seconds = poll_interval_seconds # how often to poll for telemetry
|
||||
|
||||
def run_test_case(self, test_case: TelemetryTestCase, verbose: bool = False) -> bool:
|
||||
"""Execute a single test case and verify telemetry."""
|
||||
initial_traces = self.collector.get_trace_count()
|
||||
prior_trace_ids = self.collector.get_all_trace_ids()
|
||||
initial_metrics = self.collector.get_metric_count()
|
||||
|
||||
if verbose:
|
||||
print(f"\n--- {test_case.name} ---")
|
||||
print(f" {test_case.http_method} {test_case.api_path}")
|
||||
if test_case.expected_metrics:
|
||||
print(f" Expected custom metrics: {', '.join(test_case.expected_metrics)}")
|
||||
|
||||
# Make real HTTP request to Llama Stack
|
||||
is_streaming_test = test_case.request_body and test_case.request_body.get("stream", False)
|
||||
try:
|
||||
url = f"{self.base_url}{test_case.api_path}"
|
||||
|
||||
# Streaming requests need longer timeout to complete
|
||||
timeout = 10 if is_streaming_test else 5
|
||||
|
||||
if test_case.http_method == "GET":
|
||||
response = requests.get(url, timeout=timeout)
|
||||
elif test_case.http_method == "POST":
|
||||
response = requests.post(url, json=test_case.request_body or {}, timeout=timeout)
|
||||
else:
|
||||
response = requests.request(test_case.http_method, url, timeout=timeout)
|
||||
|
||||
if verbose:
|
||||
print(f" HTTP Response: {response.status_code}")
|
||||
|
||||
status_match = response.status_code == test_case.expected_http_status
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
if verbose:
|
||||
print(f" Request exception: {type(e).__name__}")
|
||||
# For streaming requests, exceptions are expected due to mock server behavior
|
||||
# The important part is whether telemetry metrics were captured
|
||||
status_match = is_streaming_test # Pass streaming tests, fail non-streaming
|
||||
|
||||
# Poll until all telemetry expectations are met or timeout (single loop for speed)
|
||||
missing_metrics: list[str] = []
|
||||
empty_metrics: list[str] = []
|
||||
new_trace_ids: set[str] = set()
|
||||
|
||||
def compute_status() -> tuple[bool, bool, bool, bool]:
|
||||
traces_ok_local = (self.collector.get_trace_count() - initial_traces) >= test_case.expected_trace_exports
|
||||
metrics_count_ok_local = (
|
||||
self.collector.get_metric_count() - initial_metrics
|
||||
) >= test_case.expected_metric_exports
|
||||
|
||||
metrics_ok_local = True
|
||||
if test_case.expected_metrics:
|
||||
missing_metrics.clear()
|
||||
empty_metrics.clear()
|
||||
for metric_name in test_case.expected_metrics:
|
||||
if not self.collector.has_metric(metric_name):
|
||||
missing_metrics.append(metric_name)
|
||||
else:
|
||||
data_points = self.collector.get_metric_by_name(metric_name)
|
||||
if len(data_points) == 0:
|
||||
empty_metrics.append(metric_name)
|
||||
metrics_ok_local = len(missing_metrics) == 0 and len(empty_metrics) == 0
|
||||
|
||||
spans_ok_local = True
|
||||
if test_case.expected_min_spans is not None:
|
||||
nonlocal new_trace_ids
|
||||
new_trace_ids = self.collector.get_new_trace_ids(prior_trace_ids)
|
||||
if not new_trace_ids:
|
||||
spans_ok_local = False
|
||||
else:
|
||||
counts = self.collector.get_trace_span_counts()
|
||||
min_spans: int = int(test_case.expected_min_spans or 0)
|
||||
spans_ok_local = all(counts.get(tid, 0) >= min_spans for tid in new_trace_ids)
|
||||
|
||||
return traces_ok_local, metrics_count_ok_local, metrics_ok_local, spans_ok_local
|
||||
|
||||
# Poll until all telemetry expectations are met or timeout (single loop for speed)
|
||||
start = time.time()
|
||||
traces_ok, metrics_count_ok, metrics_by_name_validated, spans_ok = compute_status()
|
||||
while time.time() - start < self.poll_timeout_seconds:
|
||||
if traces_ok and metrics_count_ok and metrics_by_name_validated and spans_ok:
|
||||
break
|
||||
time.sleep(self.poll_interval_seconds)
|
||||
traces_ok, metrics_count_ok, metrics_by_name_validated, spans_ok = compute_status()
|
||||
|
||||
if verbose:
|
||||
total_http_requests = len(getattr(self.collector, "all_http_requests", []))
|
||||
print(f" [DEBUG] OTLP POST requests: {total_http_requests}")
|
||||
print(
|
||||
f" Expected: >={test_case.expected_trace_exports} traces, >={test_case.expected_metric_exports} metrics"
|
||||
)
|
||||
print(
|
||||
f" Actual: {self.collector.get_trace_count() - initial_traces} traces, {self.collector.get_metric_count() - initial_metrics} metrics"
|
||||
)
|
||||
|
||||
if test_case.expected_metrics:
|
||||
print(" Custom metrics:")
|
||||
for metric_name in test_case.expected_metrics:
|
||||
n = len(self.collector.get_metric_by_name(metric_name))
|
||||
status = "✓" if n > 0 else "✗"
|
||||
print(f" {status} {metric_name}: {n}")
|
||||
if missing_metrics:
|
||||
print(f" Missing: {missing_metrics}")
|
||||
if empty_metrics:
|
||||
print(f" Empty: {empty_metrics}")
|
||||
|
||||
if test_case.expected_min_spans is not None:
|
||||
counts = self.collector.get_trace_span_counts()
|
||||
span_counts = {tid: counts[tid] for tid in new_trace_ids}
|
||||
print(f" New trace IDs: {sorted(new_trace_ids)}")
|
||||
print(f" Span counts: {span_counts}")
|
||||
|
||||
result = bool(
|
||||
(status_match or is_streaming_test)
|
||||
and traces_ok
|
||||
and metrics_count_ok
|
||||
and metrics_by_name_validated
|
||||
and spans_ok
|
||||
)
|
||||
print(f" Result: {'PASS' if result else 'FAIL'}")
|
||||
|
||||
return bool(
|
||||
(status_match or is_streaming_test)
|
||||
and traces_ok
|
||||
and metrics_count_ok
|
||||
and metrics_by_name_validated
|
||||
and spans_ok
|
||||
)
|
||||
|
||||
def run_all_test_cases(self, test_cases: list[TelemetryTestCase], verbose: bool = True) -> dict[str, bool]:
|
||||
"""Run all test cases and return results."""
|
||||
results = {}
|
||||
for test_case in test_cases:
|
||||
results[test_case.name] = self.run_test_case(test_case, verbose=verbose)
|
||||
return results
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# HELPER FUNCTIONS
|
||||
# ============================================================================
|
||||
|
||||
|
||||
def is_port_available(port: int) -> bool:
|
||||
"""Check if a TCP port is available for binding."""
|
||||
try:
|
||||
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
|
||||
sock.bind(("localhost", port))
|
||||
return True
|
||||
except OSError:
|
||||
return False
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# PYTEST FIXTURES
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def mock_servers():
|
||||
"""
|
||||
Fixture: Start all mock servers in parallel using async harness.
|
||||
|
||||
**TO ADD A NEW MOCK SERVER:**
|
||||
Just add a MockServerConfig to the MOCK_SERVERS list below.
|
||||
"""
|
||||
import asyncio
|
||||
|
||||
# ========================================================================
|
||||
# MOCK SERVER CONFIGURATION
|
||||
# **TO ADD A NEW MOCK:** Just add a MockServerConfig instance below
|
||||
#
|
||||
# Example:
|
||||
# MockServerConfig(
|
||||
# name="Mock MyService",
|
||||
# server_class=MockMyService, # Must inherit from MockServerBase
|
||||
# init_kwargs={"port": 9000, "param": "value"},
|
||||
# ),
|
||||
# ========================================================================
|
||||
mock_servers_config = [
|
||||
MockServerConfig(
|
||||
name="Mock OTLP Collector",
|
||||
server_class=MockOTLPCollector,
|
||||
init_kwargs={"port": 4318},
|
||||
),
|
||||
MockServerConfig(
|
||||
name="Mock vLLM Server",
|
||||
server_class=MockVLLMServer,
|
||||
init_kwargs={
|
||||
"port": 8000,
|
||||
"models": ["meta-llama/Llama-3.2-1B-Instruct"],
|
||||
},
|
||||
),
|
||||
# Add more mock servers here - they will start in parallel automatically!
|
||||
]
|
||||
|
||||
# Start all servers in parallel
|
||||
servers = asyncio.run(start_mock_servers_async(mock_servers_config))
|
||||
|
||||
# Verify vLLM models
|
||||
models_response = requests.get("http://localhost:8000/v1/models", timeout=1)
|
||||
models_data = models_response.json()
|
||||
print(f"[INFO] Mock vLLM serving {len(models_data['data'])} models: {[m['id'] for m in models_data['data']]}")
|
||||
|
||||
yield servers
|
||||
|
||||
# Stop all servers
|
||||
stop_mock_servers(servers)
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def mock_otlp_collector(mock_servers):
|
||||
"""Convenience fixture to get OTLP collector from mock_servers."""
|
||||
return mock_servers["Mock OTLP Collector"]
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def mock_vllm_server(mock_servers):
|
||||
"""Convenience fixture to get vLLM server from mock_servers."""
|
||||
return mock_servers["Mock vLLM Server"]
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def llama_stack_server(tmp_path_factory, mock_otlp_collector, mock_vllm_server):
|
||||
"""
|
||||
Fixture: Start real Llama Stack server with inline OTel provider.
|
||||
|
||||
**THIS IS THE MAIN FIXTURE** - it runs:
|
||||
opentelemetry-instrument llama stack run --config run.yaml
|
||||
|
||||
**TO MODIFY STACK CONFIG:** Edit run_config dict below
|
||||
"""
|
||||
config_dir = tmp_path_factory.mktemp("otel-stack-config")
|
||||
|
||||
# Ensure mock vLLM is ready and accessible before starting Llama Stack
|
||||
print("\n[INFO] Verifying mock vLLM is accessible at http://localhost:8000...")
|
||||
try:
|
||||
vllm_models = requests.get("http://localhost:8000/v1/models", timeout=2)
|
||||
print(f"[INFO] Mock vLLM models endpoint response: {vllm_models.status_code}")
|
||||
except Exception as e:
|
||||
pytest.fail(f"Mock vLLM not accessible before starting Llama Stack: {e}")
|
||||
|
||||
# Create run.yaml with inference and telemetry providers
|
||||
# **TO ADD MORE PROVIDERS:** Add to providers dict
|
||||
run_config = {
|
||||
"image_name": "test-otel-e2e",
|
||||
"apis": ["inference"],
|
||||
"providers": {
|
||||
"inference": [
|
||||
{
|
||||
"provider_id": "vllm",
|
||||
"provider_type": "remote::vllm",
|
||||
"config": {
|
||||
"url": "http://localhost:8000/v1",
|
||||
},
|
||||
},
|
||||
],
|
||||
"telemetry": [
|
||||
{
|
||||
"provider_id": "otel",
|
||||
"provider_type": "inline::otel",
|
||||
"config": {
|
||||
"service_name": "llama-stack-e2e-test",
|
||||
"span_processor": "simple",
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
"models": [
|
||||
{
|
||||
"model_id": "meta-llama/Llama-3.2-1B-Instruct",
|
||||
"provider_id": "vllm",
|
||||
}
|
||||
],
|
||||
}
|
||||
|
||||
config_file = config_dir / "run.yaml"
|
||||
with open(config_file, "w") as f:
|
||||
yaml.dump(run_config, f)
|
||||
|
||||
# Find available port for Llama Stack
|
||||
port = 5555
|
||||
while not is_port_available(port) and port < 5600:
|
||||
port += 1
|
||||
|
||||
if port >= 5600:
|
||||
pytest.skip("No available ports for test server")
|
||||
|
||||
# Set environment variables for OTel instrumentation
|
||||
# NOTE: These only affect the subprocess, not other tests
|
||||
env = os.environ.copy()
|
||||
env["OTEL_EXPORTER_OTLP_ENDPOINT"] = "http://localhost:4318"
|
||||
env["OTEL_EXPORTER_OTLP_PROTOCOL"] = "http/protobuf" # Ensure correct protocol
|
||||
env["OTEL_SERVICE_NAME"] = "llama-stack-e2e-test"
|
||||
env["OTEL_SPAN_PROCESSOR"] = "simple" # Force simple processor for immediate export
|
||||
env["LLAMA_STACK_PORT"] = str(port)
|
||||
env["OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED"] = "true"
|
||||
|
||||
# Configure fast metric export for testing (default is 60 seconds)
|
||||
# This makes metrics export every 500ms instead of every 60 seconds
|
||||
env["OTEL_METRIC_EXPORT_INTERVAL"] = "500" # milliseconds
|
||||
env["OTEL_METRIC_EXPORT_TIMEOUT"] = "1000" # milliseconds
|
||||
|
||||
# Disable inference recording to ensure real requests to our mock vLLM
|
||||
# This is critical - without this, Llama Stack replays cached responses
|
||||
# Safe to remove here as it only affects the subprocess environment
|
||||
if "LLAMA_STACK_TEST_INFERENCE_MODE" in env:
|
||||
del env["LLAMA_STACK_TEST_INFERENCE_MODE"]
|
||||
|
||||
# Start server with automatic instrumentation
|
||||
cmd = [
|
||||
"opentelemetry-instrument", # ← Automatic instrumentation wrapper
|
||||
"llama",
|
||||
"stack",
|
||||
"run",
|
||||
str(config_file),
|
||||
"--port",
|
||||
str(port),
|
||||
]
|
||||
|
||||
print(f"\n[INFO] Starting Llama Stack with OTel instrumentation on port {port}")
|
||||
print(f"[INFO] Command: {' '.join(cmd)}")
|
||||
|
||||
process = subprocess.Popen(
|
||||
cmd,
|
||||
env=env,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT, # Merge stderr into stdout
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Wait for server to start
|
||||
max_wait = 30
|
||||
base_url = f"http://localhost:{port}"
|
||||
startup_output = []
|
||||
|
||||
for i in range(max_wait):
|
||||
# Collect server output non-blocking
|
||||
import select
|
||||
|
||||
if process.stdout and select.select([process.stdout], [], [], 0)[0]:
|
||||
line = process.stdout.readline()
|
||||
if line:
|
||||
startup_output.append(line)
|
||||
|
||||
try:
|
||||
response = requests.get(f"{base_url}/v1/health", timeout=1)
|
||||
if response.status_code == 200:
|
||||
print(f"[INFO] Server ready at {base_url}")
|
||||
# Print relevant initialization logs
|
||||
print(f"[DEBUG] Captured {len(startup_output)} lines of server output")
|
||||
relevant_logs = [
|
||||
line
|
||||
for line in startup_output
|
||||
if any(keyword in line.lower() for keyword in ["telemetry", "otel", "provider", "error creating"])
|
||||
]
|
||||
if relevant_logs:
|
||||
print("[DEBUG] Relevant server logs:")
|
||||
for log in relevant_logs[-10:]: # Last 10 relevant lines
|
||||
print(f" {log.strip()}")
|
||||
time.sleep(0.5)
|
||||
break
|
||||
except requests.exceptions.RequestException:
|
||||
if i == max_wait - 1:
|
||||
process.terminate()
|
||||
stdout, _ = process.communicate(timeout=5)
|
||||
pytest.fail(f"Server failed to start.\nOutput: {stdout}")
|
||||
time.sleep(1)
|
||||
|
||||
yield {
|
||||
"base_url": base_url,
|
||||
"port": port,
|
||||
"collector": mock_otlp_collector,
|
||||
"vllm_server": mock_vllm_server,
|
||||
}
|
||||
|
||||
# Cleanup
|
||||
print("\n[INFO] Stopping Llama Stack server")
|
||||
process.terminate()
|
||||
try:
|
||||
process.wait(timeout=5)
|
||||
except subprocess.TimeoutExpired:
|
||||
process.kill()
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# TESTS: End-to-End with Real Stack
|
||||
# **THESE RUN SLOW** - marked with @pytest.mark.slow
|
||||
# **TO ADD NEW E2E TESTS:** Add methods to this class
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
class TestOTelE2E:
|
||||
"""
|
||||
End-to-end tests with real Llama Stack server.
|
||||
|
||||
These tests verify the complete flow:
|
||||
- Real Llama Stack with inline OTel provider
|
||||
- Real API calls
|
||||
- Automatic trace and metric collection
|
||||
- Mock OTLP collector captures exports
|
||||
"""
|
||||
|
||||
def test_server_starts_with_auto_instrumentation(self, llama_stack_server):
|
||||
"""Verify server starts successfully with inline OTel provider."""
|
||||
base_url = llama_stack_server["base_url"]
|
||||
|
||||
# Try different health check endpoints
|
||||
health_endpoints = ["/health", "/v1/health", "/"]
|
||||
server_responding = False
|
||||
|
||||
for endpoint in health_endpoints:
|
||||
try:
|
||||
response = requests.get(f"{base_url}{endpoint}", timeout=5)
|
||||
print(f"\n[DEBUG] {endpoint} -> {response.status_code}")
|
||||
if response.status_code == 200:
|
||||
server_responding = True
|
||||
break
|
||||
except Exception as e:
|
||||
print(f"[DEBUG] {endpoint} failed: {e}")
|
||||
|
||||
assert server_responding, f"Server not responding on any endpoint at {base_url}"
|
||||
|
||||
print(f"\n[PASS] Llama Stack running with OTel at {base_url}")
|
||||
|
||||
def test_all_test_cases_via_runner(self, llama_stack_server):
|
||||
"""
|
||||
**MAIN TEST:** Run all TelemetryTestCase instances with custom metrics validation.
|
||||
|
||||
This executes all test cases defined in TEST_CASES list and validates:
|
||||
1. Traces are exported to the collector
|
||||
2. Metrics are exported to the collector
|
||||
3. Custom metrics (defined in CUSTOM_METRICS_BASE, CUSTOM_METRICS_STREAMING)
|
||||
are captured by name with non-empty data points
|
||||
|
||||
Each test case specifies which metrics to validate via expected_metrics field.
|
||||
|
||||
**TO ADD MORE TESTS:**
|
||||
- Add TelemetryTestCase to TEST_CASES (line ~132)
|
||||
- Reference CUSTOM_METRICS_BASE or CUSTOM_METRICS_STREAMING in expected_metrics
|
||||
- See examples in existing test cases
|
||||
|
||||
**TO ADD NEW METRICS:**
|
||||
- Add metric to otel.py
|
||||
- Add metric name to CUSTOM_METRICS_BASE or CUSTOM_METRICS_STREAMING (line ~122)
|
||||
- Update test cases that should validate it
|
||||
"""
|
||||
base_url = llama_stack_server["base_url"]
|
||||
collector = llama_stack_server["collector"]
|
||||
|
||||
# Create test runner
|
||||
runner = TelemetryTestRunner(base_url, collector)
|
||||
|
||||
# Execute all test cases (set verbose=False for cleaner output)
|
||||
results = runner.run_all_test_cases(TEST_CASES, verbose=False)
|
||||
|
||||
print(f"\n{'=' * 50}\nTEST CASE SUMMARY\n{'=' * 50}")
|
||||
passed = sum(1 for p in results.values() if p)
|
||||
total = len(results)
|
||||
print(f"Passed: {passed}/{total}\n")
|
||||
|
||||
failed = [name for name, ok in results.items() if not ok]
|
||||
for name, ok in results.items():
|
||||
print(f" {'[PASS]' if ok else '[FAIL]'} {name}")
|
||||
|
||||
print(f"{'=' * 50}\n")
|
||||
assert not failed, f"Some test cases failed: {failed}"
|
||||
|
|
@ -1,532 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
"""
|
||||
Integration tests for OpenTelemetry provider.
|
||||
|
||||
These tests verify that the OTel provider correctly:
|
||||
- Initializes within the Llama Stack
|
||||
- Captures expected metrics (counters, histograms, up/down counters)
|
||||
- Captures expected spans/traces
|
||||
- Exports telemetry data to an OTLP collector (in-memory for testing)
|
||||
|
||||
Tests use in-memory exporters to avoid external dependencies and can run in GitHub Actions.
|
||||
"""
|
||||
|
||||
import os
|
||||
import time
|
||||
from collections import defaultdict
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
from opentelemetry.sdk.metrics.export import InMemoryMetricReader
|
||||
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
|
||||
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
|
||||
|
||||
from llama_stack.providers.inline.telemetry.otel.config import OTelTelemetryConfig
|
||||
from llama_stack.providers.inline.telemetry.otel.otel import OTelTelemetryProvider
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def in_memory_span_exporter():
|
||||
"""Create an in-memory span exporter to capture traces."""
|
||||
return InMemorySpanExporter()
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def in_memory_metric_reader():
|
||||
"""Create an in-memory metric reader to capture metrics."""
|
||||
return InMemoryMetricReader()
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def otel_provider_with_memory_exporters(in_memory_span_exporter, in_memory_metric_reader):
|
||||
"""
|
||||
Create an OTelTelemetryProvider configured with in-memory exporters.
|
||||
|
||||
This allows us to capture and verify telemetry data without external services.
|
||||
Returns a dict with 'provider', 'span_exporter', and 'metric_reader'.
|
||||
"""
|
||||
# Set mock environment to avoid warnings
|
||||
os.environ["OTEL_EXPORTER_OTLP_ENDPOINT"] = "http://localhost:4318"
|
||||
|
||||
config = OTelTelemetryConfig(
|
||||
service_name="test-llama-stack-otel",
|
||||
service_version="1.0.0-test",
|
||||
deployment_environment="ci-test",
|
||||
span_processor="simple",
|
||||
)
|
||||
|
||||
# Patch the provider to use in-memory exporters
|
||||
with patch.object(
|
||||
OTelTelemetryProvider,
|
||||
'model_post_init',
|
||||
lambda self, _: _init_with_memory_exporters(
|
||||
self, config, in_memory_span_exporter, in_memory_metric_reader
|
||||
)
|
||||
):
|
||||
provider = OTelTelemetryProvider(config=config)
|
||||
yield {
|
||||
'provider': provider,
|
||||
'span_exporter': in_memory_span_exporter,
|
||||
'metric_reader': in_memory_metric_reader
|
||||
}
|
||||
|
||||
|
||||
def _init_with_memory_exporters(provider, config, span_exporter, metric_reader):
|
||||
"""Helper to initialize provider with in-memory exporters."""
|
||||
import threading
|
||||
from opentelemetry import metrics, trace
|
||||
from opentelemetry.sdk.metrics import MeterProvider
|
||||
from opentelemetry.sdk.resources import Attributes, Resource
|
||||
from opentelemetry.sdk.trace import TracerProvider
|
||||
|
||||
# Initialize pydantic private attributes
|
||||
if provider.__pydantic_private__ is None:
|
||||
provider.__pydantic_private__ = {}
|
||||
|
||||
provider._lock = threading.Lock()
|
||||
provider._counters = {}
|
||||
provider._up_down_counters = {}
|
||||
provider._histograms = {}
|
||||
provider._gauges = {}
|
||||
|
||||
# Create resource attributes
|
||||
attributes: Attributes = {
|
||||
key: value
|
||||
for key, value in {
|
||||
"service.name": config.service_name,
|
||||
"service.version": config.service_version,
|
||||
"deployment.environment": config.deployment_environment,
|
||||
}.items()
|
||||
if value is not None
|
||||
}
|
||||
|
||||
resource = Resource.create(attributes)
|
||||
|
||||
# Configure tracer provider with in-memory exporter
|
||||
tracer_provider = TracerProvider(resource=resource)
|
||||
tracer_provider.add_span_processor(SimpleSpanProcessor(span_exporter))
|
||||
trace.set_tracer_provider(tracer_provider)
|
||||
|
||||
# Configure meter provider with in-memory reader
|
||||
meter_provider = MeterProvider(
|
||||
resource=resource,
|
||||
metric_readers=[metric_reader]
|
||||
)
|
||||
metrics.set_meter_provider(meter_provider)
|
||||
|
||||
|
||||
class TestOTelProviderInitialization:
|
||||
"""Test OTel provider initialization within Llama Stack."""
|
||||
|
||||
def test_provider_initializes_successfully(self, otel_provider_with_memory_exporters):
|
||||
"""Test that the OTel provider initializes without errors."""
|
||||
provider = otel_provider_with_memory_exporters['provider']
|
||||
span_exporter = otel_provider_with_memory_exporters['span_exporter']
|
||||
|
||||
assert provider is not None
|
||||
assert provider.config.service_name == "test-llama-stack-otel"
|
||||
assert provider.config.service_version == "1.0.0-test"
|
||||
assert provider.config.deployment_environment == "ci-test"
|
||||
|
||||
def test_provider_has_thread_safety_mechanisms(self, otel_provider_with_memory_exporters):
|
||||
"""Test that the provider has thread-safety mechanisms in place."""
|
||||
provider = otel_provider_with_memory_exporters['provider']
|
||||
|
||||
assert hasattr(provider, "_lock")
|
||||
assert provider._lock is not None
|
||||
assert hasattr(provider, "_counters")
|
||||
assert hasattr(provider, "_histograms")
|
||||
assert hasattr(provider, "_up_down_counters")
|
||||
|
||||
|
||||
class TestOTelMetricsCapture:
|
||||
"""Test that OTel provider captures expected metrics."""
|
||||
|
||||
def test_counter_metric_is_captured(self, otel_provider_with_memory_exporters):
|
||||
"""Test that counter metrics are captured."""
|
||||
provider = otel_provider_with_memory_exporters['provider']
|
||||
metric_reader = otel_provider_with_memory_exporters['metric_reader']
|
||||
|
||||
# Record counter metrics
|
||||
provider.record_count("llama.requests.total", 1.0, attributes={"endpoint": "/chat"})
|
||||
provider.record_count("llama.requests.total", 1.0, attributes={"endpoint": "/chat"})
|
||||
provider.record_count("llama.requests.total", 1.0, attributes={"endpoint": "/embeddings"})
|
||||
|
||||
# Force metric collection - collect() triggers the reader to gather metrics
|
||||
metric_reader.collect()
|
||||
metric_reader.collect()
|
||||
metrics_data = metric_reader.get_metrics_data()
|
||||
|
||||
# Verify metrics were captured
|
||||
assert metrics_data is not None
|
||||
assert len(metrics_data.resource_metrics) > 0
|
||||
|
||||
# Find our counter metric
|
||||
found_counter = False
|
||||
for resource_metric in metrics_data.resource_metrics:
|
||||
for scope_metric in resource_metric.scope_metrics:
|
||||
for metric in scope_metric.metrics:
|
||||
if metric.name == "llama.requests.total":
|
||||
found_counter = True
|
||||
# Verify it's a counter with data points
|
||||
assert hasattr(metric.data, "data_points")
|
||||
assert len(metric.data.data_points) > 0
|
||||
|
||||
assert found_counter, "Counter metric 'llama.requests.total' was not captured"
|
||||
|
||||
def test_histogram_metric_is_captured(self, otel_provider_with_memory_exporters):
|
||||
"""Test that histogram metrics are captured."""
|
||||
provider = otel_provider_with_memory_exporters['provider']
|
||||
metric_reader = otel_provider_with_memory_exporters['metric_reader']
|
||||
|
||||
# Record histogram metrics with various values
|
||||
latencies = [10.5, 25.3, 50.1, 100.7, 250.2]
|
||||
for latency in latencies:
|
||||
provider.record_histogram(
|
||||
"llama.inference.latency",
|
||||
latency,
|
||||
attributes={"model": "llama-3.2"}
|
||||
)
|
||||
|
||||
# Force metric collection
|
||||
metric_reader.collect()
|
||||
metrics_data = metric_reader.get_metrics_data()
|
||||
|
||||
# Find our histogram metric
|
||||
found_histogram = False
|
||||
for resource_metric in metrics_data.resource_metrics:
|
||||
for scope_metric in resource_metric.scope_metrics:
|
||||
for metric in scope_metric.metrics:
|
||||
if metric.name == "llama.inference.latency":
|
||||
found_histogram = True
|
||||
# Verify it's a histogram
|
||||
assert hasattr(metric.data, "data_points")
|
||||
data_point = metric.data.data_points[0]
|
||||
# Histograms should have count and sum
|
||||
assert hasattr(data_point, "count")
|
||||
assert data_point.count == len(latencies)
|
||||
|
||||
assert found_histogram, "Histogram metric 'llama.inference.latency' was not captured"
|
||||
|
||||
def test_up_down_counter_metric_is_captured(self, otel_provider_with_memory_exporters):
|
||||
"""Test that up/down counter metrics are captured."""
|
||||
provider = otel_provider_with_memory_exporters['provider']
|
||||
metric_reader = otel_provider_with_memory_exporters['metric_reader']
|
||||
|
||||
# Record up/down counter metrics
|
||||
provider.record_up_down_counter("llama.active.sessions", 5)
|
||||
provider.record_up_down_counter("llama.active.sessions", 3)
|
||||
provider.record_up_down_counter("llama.active.sessions", -2)
|
||||
|
||||
# Force metric collection
|
||||
metric_reader.collect()
|
||||
metrics_data = metric_reader.get_metrics_data()
|
||||
|
||||
# Find our up/down counter metric
|
||||
found_updown = False
|
||||
for resource_metric in metrics_data.resource_metrics:
|
||||
for scope_metric in resource_metric.scope_metrics:
|
||||
for metric in scope_metric.metrics:
|
||||
if metric.name == "llama.active.sessions":
|
||||
found_updown = True
|
||||
assert hasattr(metric.data, "data_points")
|
||||
assert len(metric.data.data_points) > 0
|
||||
|
||||
assert found_updown, "Up/Down counter metric 'llama.active.sessions' was not captured"
|
||||
|
||||
def test_metrics_with_attributes_are_captured(self, otel_provider_with_memory_exporters):
|
||||
"""Test that metric attributes/labels are preserved."""
|
||||
provider = otel_provider_with_memory_exporters['provider']
|
||||
metric_reader = otel_provider_with_memory_exporters['metric_reader']
|
||||
|
||||
# Record metrics with different attributes
|
||||
provider.record_count("llama.tokens.generated", 150.0, attributes={
|
||||
"model": "llama-3.2-1b",
|
||||
"user": "test-user"
|
||||
})
|
||||
|
||||
# Force metric collection
|
||||
metric_reader.collect()
|
||||
metrics_data = metric_reader.get_metrics_data()
|
||||
|
||||
# Verify attributes are preserved
|
||||
found_with_attributes = False
|
||||
for resource_metric in metrics_data.resource_metrics:
|
||||
for scope_metric in resource_metric.scope_metrics:
|
||||
for metric in scope_metric.metrics:
|
||||
if metric.name == "llama.tokens.generated":
|
||||
data_point = metric.data.data_points[0]
|
||||
# Check attributes - they're already a dict in the SDK
|
||||
attrs = data_point.attributes if isinstance(data_point.attributes, dict) else {}
|
||||
if "model" in attrs and "user" in attrs:
|
||||
found_with_attributes = True
|
||||
assert attrs["model"] == "llama-3.2-1b"
|
||||
assert attrs["user"] == "test-user"
|
||||
|
||||
assert found_with_attributes, "Metrics with attributes were not properly captured"
|
||||
|
||||
def test_multiple_metric_types_coexist(self, otel_provider_with_memory_exporters):
|
||||
"""Test that different metric types can coexist."""
|
||||
provider = otel_provider_with_memory_exporters['provider']
|
||||
metric_reader = otel_provider_with_memory_exporters['metric_reader']
|
||||
|
||||
# Record various metric types
|
||||
provider.record_count("test.counter", 1.0)
|
||||
provider.record_histogram("test.histogram", 42.0)
|
||||
provider.record_up_down_counter("test.gauge", 10)
|
||||
|
||||
# Force metric collection
|
||||
metric_reader.collect()
|
||||
metrics_data = metric_reader.get_metrics_data()
|
||||
|
||||
# Count unique metrics
|
||||
metric_names = set()
|
||||
for resource_metric in metrics_data.resource_metrics:
|
||||
for scope_metric in resource_metric.scope_metrics:
|
||||
for metric in scope_metric.metrics:
|
||||
metric_names.add(metric.name)
|
||||
|
||||
# Should have all three metrics
|
||||
assert "test.counter" in metric_names
|
||||
assert "test.histogram" in metric_names
|
||||
assert "test.gauge" in metric_names
|
||||
|
||||
|
||||
class TestOTelSpansCapture:
|
||||
"""Test that OTel provider captures expected spans/traces."""
|
||||
|
||||
def test_basic_span_is_captured(self, otel_provider_with_memory_exporters):
|
||||
"""Test that basic spans are captured."""
|
||||
provider = otel_provider_with_memory_exporters['provider']
|
||||
metric_reader = otel_provider_with_memory_exporters['metric_reader']
|
||||
span_exporter = otel_provider_with_memory_exporters['span_exporter']
|
||||
|
||||
# Create a span
|
||||
span = provider.custom_trace("llama.inference.request")
|
||||
span.end()
|
||||
|
||||
# Get captured spans
|
||||
spans = span_exporter.get_finished_spans()
|
||||
|
||||
assert len(spans) > 0
|
||||
assert any(span.name == "llama.inference.request" for span in spans)
|
||||
|
||||
def test_span_with_attributes_is_captured(self, otel_provider_with_memory_exporters):
|
||||
"""Test that span attributes are preserved."""
|
||||
provider = otel_provider_with_memory_exporters['provider']
|
||||
span_exporter = otel_provider_with_memory_exporters['span_exporter']
|
||||
|
||||
# Create a span with attributes
|
||||
span = provider.custom_trace(
|
||||
"llama.chat.completion",
|
||||
attributes={
|
||||
"model.id": "llama-3.2-1b",
|
||||
"user.id": "test-user-123",
|
||||
"request.id": "req-abc-123"
|
||||
}
|
||||
)
|
||||
span.end()
|
||||
|
||||
# Get captured spans
|
||||
spans = span_exporter.get_finished_spans()
|
||||
|
||||
# Find our span
|
||||
our_span = None
|
||||
for s in spans:
|
||||
if s.name == "llama.chat.completion":
|
||||
our_span = s
|
||||
break
|
||||
|
||||
assert our_span is not None, "Span 'llama.chat.completion' was not captured"
|
||||
|
||||
# Verify attributes
|
||||
attrs = dict(our_span.attributes)
|
||||
assert attrs.get("model.id") == "llama-3.2-1b"
|
||||
assert attrs.get("user.id") == "test-user-123"
|
||||
assert attrs.get("request.id") == "req-abc-123"
|
||||
|
||||
def test_multiple_spans_are_captured(self, otel_provider_with_memory_exporters):
|
||||
"""Test that multiple spans are captured."""
|
||||
provider = otel_provider_with_memory_exporters['provider']
|
||||
span_exporter = otel_provider_with_memory_exporters['span_exporter']
|
||||
|
||||
# Create multiple spans
|
||||
span_names = [
|
||||
"llama.request.validate",
|
||||
"llama.model.load",
|
||||
"llama.inference.execute",
|
||||
"llama.response.format"
|
||||
]
|
||||
|
||||
for name in span_names:
|
||||
span = provider.custom_trace(name)
|
||||
time.sleep(0.01) # Small delay to ensure ordering
|
||||
span.end()
|
||||
|
||||
# Get captured spans
|
||||
spans = span_exporter.get_finished_spans()
|
||||
captured_names = {span.name for span in spans}
|
||||
|
||||
# Verify all spans were captured
|
||||
for expected_name in span_names:
|
||||
assert expected_name in captured_names, f"Span '{expected_name}' was not captured"
|
||||
|
||||
def test_span_has_service_metadata(self, otel_provider_with_memory_exporters):
|
||||
"""Test that spans include service metadata."""
|
||||
provider = otel_provider_with_memory_exporters['provider']
|
||||
span_exporter = otel_provider_with_memory_exporters['span_exporter']
|
||||
|
||||
# Create a span
|
||||
span = provider.custom_trace("test.span")
|
||||
span.end()
|
||||
|
||||
# Get captured spans
|
||||
spans = span_exporter.get_finished_spans()
|
||||
|
||||
assert len(spans) > 0
|
||||
|
||||
# Check resource attributes
|
||||
span = spans[0]
|
||||
resource_attrs = dict(span.resource.attributes)
|
||||
|
||||
assert resource_attrs.get("service.name") == "test-llama-stack-otel"
|
||||
assert resource_attrs.get("service.version") == "1.0.0-test"
|
||||
assert resource_attrs.get("deployment.environment") == "ci-test"
|
||||
|
||||
|
||||
class TestOTelDataExport:
|
||||
"""Test that telemetry data can be exported to OTLP collector."""
|
||||
|
||||
def test_metrics_are_exportable(self, otel_provider_with_memory_exporters):
|
||||
"""Test that metrics can be exported."""
|
||||
provider = otel_provider_with_memory_exporters['provider']
|
||||
metric_reader = otel_provider_with_memory_exporters['metric_reader']
|
||||
|
||||
# Record metrics
|
||||
provider.record_count("export.test.counter", 5.0)
|
||||
provider.record_histogram("export.test.histogram", 123.45)
|
||||
|
||||
# Force export
|
||||
metric_reader.collect()
|
||||
metrics_data = metric_reader.get_metrics_data()
|
||||
|
||||
# Verify data structure is exportable
|
||||
assert metrics_data is not None
|
||||
assert hasattr(metrics_data, "resource_metrics")
|
||||
assert len(metrics_data.resource_metrics) > 0
|
||||
|
||||
# Verify resource attributes are present (needed for OTLP export)
|
||||
resource = metrics_data.resource_metrics[0].resource
|
||||
assert resource is not None
|
||||
assert len(resource.attributes) > 0
|
||||
|
||||
def test_spans_are_exportable(self, otel_provider_with_memory_exporters):
|
||||
"""Test that spans can be exported."""
|
||||
provider = otel_provider_with_memory_exporters['provider']
|
||||
span_exporter = otel_provider_with_memory_exporters['span_exporter']
|
||||
|
||||
# Create spans
|
||||
span1 = provider.custom_trace("export.test.span1")
|
||||
span1.end()
|
||||
|
||||
span2 = provider.custom_trace("export.test.span2")
|
||||
span2.end()
|
||||
|
||||
# Get exported spans
|
||||
spans = span_exporter.get_finished_spans()
|
||||
|
||||
# Verify spans have required OTLP fields
|
||||
assert len(spans) >= 2
|
||||
for span in spans:
|
||||
assert span.name is not None
|
||||
assert span.context is not None
|
||||
assert span.context.trace_id is not None
|
||||
assert span.context.span_id is not None
|
||||
assert span.resource is not None
|
||||
|
||||
def test_concurrent_export_is_safe(self, otel_provider_with_memory_exporters):
|
||||
"""Test that concurrent metric/span recording doesn't break export."""
|
||||
import concurrent.futures
|
||||
provider = otel_provider_with_memory_exporters['provider']
|
||||
metric_reader = otel_provider_with_memory_exporters['metric_reader']
|
||||
span_exporter = otel_provider_with_memory_exporters['span_exporter']
|
||||
|
||||
def record_data(thread_id):
|
||||
for i in range(10):
|
||||
provider.record_count(f"concurrent.counter.{thread_id}", 1.0)
|
||||
span = provider.custom_trace(f"concurrent.span.{thread_id}.{i}")
|
||||
span.end()
|
||||
|
||||
# Record from multiple threads
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
|
||||
futures = [executor.submit(record_data, i) for i in range(5)]
|
||||
concurrent.futures.wait(futures)
|
||||
|
||||
# Verify export still works
|
||||
metric_reader.collect()
|
||||
metrics_data = metric_reader.get_metrics_data()
|
||||
spans = span_exporter.get_finished_spans()
|
||||
|
||||
assert metrics_data is not None
|
||||
assert len(spans) >= 50 # 5 threads * 10 spans each
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
class TestOTelProviderIntegration:
|
||||
"""End-to-end integration tests simulating real usage."""
|
||||
|
||||
def test_complete_inference_workflow_telemetry(self, otel_provider_with_memory_exporters):
|
||||
"""Simulate a complete inference workflow with telemetry."""
|
||||
provider = otel_provider_with_memory_exporters['provider']
|
||||
metric_reader = otel_provider_with_memory_exporters['metric_reader']
|
||||
span_exporter = otel_provider_with_memory_exporters['span_exporter']
|
||||
|
||||
# Simulate inference workflow
|
||||
request_span = provider.custom_trace(
|
||||
"llama.inference.request",
|
||||
attributes={"model": "llama-3.2-1b", "user": "test"}
|
||||
)
|
||||
|
||||
# Track metrics during inference
|
||||
provider.record_count("llama.requests.received", 1.0)
|
||||
provider.record_up_down_counter("llama.requests.in_flight", 1)
|
||||
|
||||
# Simulate processing time
|
||||
time.sleep(0.01)
|
||||
provider.record_histogram("llama.request.duration_ms", 10.5)
|
||||
|
||||
# Track tokens
|
||||
provider.record_count("llama.tokens.input", 25.0)
|
||||
provider.record_count("llama.tokens.output", 150.0)
|
||||
|
||||
# End request
|
||||
provider.record_up_down_counter("llama.requests.in_flight", -1)
|
||||
provider.record_count("llama.requests.completed", 1.0)
|
||||
request_span.end()
|
||||
|
||||
# Verify all telemetry was captured
|
||||
metric_reader.collect()
|
||||
metrics_data = metric_reader.get_metrics_data()
|
||||
spans = span_exporter.get_finished_spans()
|
||||
|
||||
# Check metrics exist
|
||||
metric_names = set()
|
||||
for rm in metrics_data.resource_metrics:
|
||||
for sm in rm.scope_metrics:
|
||||
for m in sm.metrics:
|
||||
metric_names.add(m.name)
|
||||
|
||||
assert "llama.requests.received" in metric_names
|
||||
assert "llama.requests.in_flight" in metric_names
|
||||
assert "llama.request.duration_ms" in metric_names
|
||||
assert "llama.tokens.input" in metric_names
|
||||
assert "llama.tokens.output" in metric_names
|
||||
|
||||
# Check span exists
|
||||
assert any(s.name == "llama.inference.request" for s in spans)
|
||||
|
||||
|
|
@ -4,8 +4,6 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import logging
|
||||
|
||||
import pytest
|
||||
|
||||
import llama_stack.providers.inline.telemetry.meta_reference.telemetry as telemetry_module
|
||||
|
|
@ -38,7 +36,7 @@ def test_warns_when_traces_endpoints_missing(monkeypatch: pytest.MonkeyPatch, ca
|
|||
monkeypatch.delenv("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT", raising=False)
|
||||
monkeypatch.delenv("OTEL_EXPORTER_OTLP_ENDPOINT", raising=False)
|
||||
|
||||
caplog.set_level(logging.WARNING)
|
||||
caplog.set_level("WARNING")
|
||||
|
||||
config = _make_config_with_sinks(TelemetrySink.OTEL_TRACE)
|
||||
telemetry_module.TelemetryAdapter(config=config, deps={})
|
||||
|
|
@ -57,7 +55,7 @@ def test_warns_when_metrics_endpoints_missing(monkeypatch: pytest.MonkeyPatch, c
|
|||
monkeypatch.delenv("OTEL_EXPORTER_OTLP_METRICS_ENDPOINT", raising=False)
|
||||
monkeypatch.delenv("OTEL_EXPORTER_OTLP_ENDPOINT", raising=False)
|
||||
|
||||
caplog.set_level(logging.WARNING)
|
||||
caplog.set_level("WARNING")
|
||||
|
||||
config = _make_config_with_sinks(TelemetrySink.OTEL_METRIC)
|
||||
telemetry_module.TelemetryAdapter(config=config, deps={})
|
||||
|
|
@ -76,7 +74,7 @@ def test_no_warning_when_traces_endpoints_present(monkeypatch: pytest.MonkeyPatc
|
|||
monkeypatch.setenv("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT", "https://otel.example:4318/v1/traces")
|
||||
monkeypatch.setenv("OTEL_EXPORTER_OTLP_ENDPOINT", "https://otel.example:4318")
|
||||
|
||||
caplog.set_level(logging.WARNING)
|
||||
caplog.set_level("WARNING")
|
||||
|
||||
config = _make_config_with_sinks(TelemetrySink.OTEL_TRACE)
|
||||
telemetry_module.TelemetryAdapter(config=config, deps={})
|
||||
|
|
@ -91,7 +89,7 @@ def test_no_warning_when_metrics_endpoints_present(monkeypatch: pytest.MonkeyPat
|
|||
monkeypatch.setenv("OTEL_EXPORTER_OTLP_METRICS_ENDPOINT", "https://otel.example:4318/v1/metrics")
|
||||
monkeypatch.setenv("OTEL_EXPORTER_OTLP_ENDPOINT", "https://otel.example:4318")
|
||||
|
||||
caplog.set_level(logging.WARNING)
|
||||
caplog.set_level("WARNING")
|
||||
|
||||
config = _make_config_with_sinks(TelemetrySink.OTEL_METRIC)
|
||||
telemetry_module.TelemetryAdapter(config=config, deps={})
|
||||
|
|
|
|||
|
|
@ -4,8 +4,16 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import concurrent.futures
|
||||
import threading
|
||||
"""
|
||||
Unit tests for OTel Telemetry Provider.
|
||||
|
||||
These tests focus on the provider's functionality:
|
||||
- Initialization and configuration
|
||||
- FastAPI middleware setup
|
||||
- SQLAlchemy instrumentation
|
||||
- Environment variable handling
|
||||
"""
|
||||
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
|
@ -27,35 +35,21 @@ def otel_config():
|
|||
|
||||
@pytest.fixture
|
||||
def otel_provider(otel_config, monkeypatch):
|
||||
"""Fixture providing an OTelTelemetryProvider instance with mocked environment."""
|
||||
# Set required environment variables to avoid warnings
|
||||
"""Fixture providing an OTelTelemetryProvider instance."""
|
||||
monkeypatch.setenv("OTEL_EXPORTER_OTLP_ENDPOINT", "http://localhost:4318")
|
||||
return OTelTelemetryProvider(config=otel_config)
|
||||
|
||||
|
||||
class TestOTelTelemetryProviderInitialization:
|
||||
"""Tests for OTelTelemetryProvider initialization."""
|
||||
class TestOTelProviderInitialization:
|
||||
"""Tests for OTel provider initialization and configuration."""
|
||||
|
||||
def test_initialization_with_valid_config(self, otel_config, monkeypatch):
|
||||
def test_provider_initializes_with_valid_config(self, otel_config, monkeypatch):
|
||||
"""Test that provider initializes correctly with valid configuration."""
|
||||
monkeypatch.setenv("OTEL_EXPORTER_OTLP_ENDPOINT", "http://localhost:4318")
|
||||
|
||||
provider = OTelTelemetryProvider(config=otel_config)
|
||||
|
||||
assert provider.config == otel_config
|
||||
assert hasattr(provider, "_lock")
|
||||
assert provider._lock is not None
|
||||
assert isinstance(provider._counters, dict)
|
||||
assert isinstance(provider._histograms, dict)
|
||||
assert isinstance(provider._up_down_counters, dict)
|
||||
assert isinstance(provider._gauges, dict)
|
||||
|
||||
def test_initialization_sets_service_attributes(self, otel_config, monkeypatch):
|
||||
"""Test that service attributes are properly configured."""
|
||||
monkeypatch.setenv("OTEL_EXPORTER_OTLP_ENDPOINT", "http://localhost:4318")
|
||||
|
||||
provider = OTelTelemetryProvider(config=otel_config)
|
||||
|
||||
|
||||
assert provider.config == otel_config
|
||||
assert provider.config.service_name == "test-service"
|
||||
assert provider.config.service_version == "1.0.0"
|
||||
assert provider.config.deployment_environment == "test"
|
||||
|
|
@ -69,300 +63,107 @@ class TestOTelTelemetryProviderInitialization:
|
|||
deployment_environment="test",
|
||||
span_processor="batch",
|
||||
)
|
||||
|
||||
|
||||
provider = OTelTelemetryProvider(config=config)
|
||||
|
||||
|
||||
assert provider.config.span_processor == "batch"
|
||||
|
||||
def test_warns_when_endpoints_missing(self, otel_config, monkeypatch, caplog):
|
||||
"""Test that warnings are issued when OTLP endpoints are not set."""
|
||||
# Remove all endpoint environment variables
|
||||
monkeypatch.delenv("OTEL_EXPORTER_OTLP_ENDPOINT", raising=False)
|
||||
monkeypatch.delenv("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT", raising=False)
|
||||
monkeypatch.delenv("OTEL_EXPORTER_OTLP_METRICS_ENDPOINT", raising=False)
|
||||
|
||||
|
||||
OTelTelemetryProvider(config=otel_config)
|
||||
|
||||
|
||||
# Check that warnings were logged
|
||||
assert any("Traces will not be exported" in record.message for record in caplog.records)
|
||||
assert any("Metrics will not be exported" in record.message for record in caplog.records)
|
||||
|
||||
|
||||
class TestOTelTelemetryProviderMetrics:
|
||||
"""Tests for metric recording functionality."""
|
||||
class TestOTelProviderMiddleware:
|
||||
"""Tests for FastAPI and SQLAlchemy instrumentation."""
|
||||
|
||||
def test_record_count_creates_counter(self, otel_provider):
|
||||
"""Test that record_count creates a counter on first call."""
|
||||
assert "test_counter" not in otel_provider._counters
|
||||
|
||||
otel_provider.record_count("test_counter", 1.0)
|
||||
|
||||
assert "test_counter" in otel_provider._counters
|
||||
assert otel_provider._counters["test_counter"] is not None
|
||||
|
||||
def test_record_count_reuses_counter(self, otel_provider):
|
||||
"""Test that record_count reuses existing counter."""
|
||||
otel_provider.record_count("test_counter", 1.0)
|
||||
first_counter = otel_provider._counters["test_counter"]
|
||||
|
||||
otel_provider.record_count("test_counter", 2.0)
|
||||
second_counter = otel_provider._counters["test_counter"]
|
||||
|
||||
assert first_counter is second_counter
|
||||
assert len(otel_provider._counters) == 1
|
||||
|
||||
def test_record_count_with_attributes(self, otel_provider):
|
||||
"""Test that record_count works with attributes."""
|
||||
otel_provider.record_count(
|
||||
"test_counter",
|
||||
1.0,
|
||||
attributes={"key": "value", "env": "test"}
|
||||
)
|
||||
|
||||
assert "test_counter" in otel_provider._counters
|
||||
|
||||
def test_record_histogram_creates_histogram(self, otel_provider):
|
||||
"""Test that record_histogram creates a histogram on first call."""
|
||||
assert "test_histogram" not in otel_provider._histograms
|
||||
|
||||
otel_provider.record_histogram("test_histogram", 42.5)
|
||||
|
||||
assert "test_histogram" in otel_provider._histograms
|
||||
assert otel_provider._histograms["test_histogram"] is not None
|
||||
|
||||
def test_record_histogram_reuses_histogram(self, otel_provider):
|
||||
"""Test that record_histogram reuses existing histogram."""
|
||||
otel_provider.record_histogram("test_histogram", 10.0)
|
||||
first_histogram = otel_provider._histograms["test_histogram"]
|
||||
|
||||
otel_provider.record_histogram("test_histogram", 20.0)
|
||||
second_histogram = otel_provider._histograms["test_histogram"]
|
||||
|
||||
assert first_histogram is second_histogram
|
||||
assert len(otel_provider._histograms) == 1
|
||||
|
||||
def test_record_histogram_with_bucket_boundaries(self, otel_provider):
|
||||
"""Test that record_histogram works with explicit bucket boundaries."""
|
||||
boundaries = [0.0, 10.0, 50.0, 100.0]
|
||||
|
||||
otel_provider.record_histogram(
|
||||
"test_histogram",
|
||||
25.0,
|
||||
explicit_bucket_boundaries_advisory=boundaries
|
||||
)
|
||||
|
||||
assert "test_histogram" in otel_provider._histograms
|
||||
|
||||
def test_record_up_down_counter_creates_counter(self, otel_provider):
|
||||
"""Test that record_up_down_counter creates a counter on first call."""
|
||||
assert "test_updown" not in otel_provider._up_down_counters
|
||||
|
||||
otel_provider.record_up_down_counter("test_updown", 1.0)
|
||||
|
||||
assert "test_updown" in otel_provider._up_down_counters
|
||||
assert otel_provider._up_down_counters["test_updown"] is not None
|
||||
|
||||
def test_record_up_down_counter_reuses_counter(self, otel_provider):
|
||||
"""Test that record_up_down_counter reuses existing counter."""
|
||||
otel_provider.record_up_down_counter("test_updown", 5.0)
|
||||
first_counter = otel_provider._up_down_counters["test_updown"]
|
||||
|
||||
otel_provider.record_up_down_counter("test_updown", -3.0)
|
||||
second_counter = otel_provider._up_down_counters["test_updown"]
|
||||
|
||||
assert first_counter is second_counter
|
||||
assert len(otel_provider._up_down_counters) == 1
|
||||
|
||||
def test_multiple_metrics_with_different_names(self, otel_provider):
|
||||
"""Test that multiple metrics with different names are cached separately."""
|
||||
otel_provider.record_count("counter1", 1.0)
|
||||
otel_provider.record_count("counter2", 2.0)
|
||||
otel_provider.record_histogram("histogram1", 10.0)
|
||||
otel_provider.record_up_down_counter("updown1", 5.0)
|
||||
|
||||
assert len(otel_provider._counters) == 2
|
||||
assert len(otel_provider._histograms) == 1
|
||||
assert len(otel_provider._up_down_counters) == 1
|
||||
|
||||
|
||||
class TestOTelTelemetryProviderThreadSafety:
|
||||
"""Tests for thread safety of metric operations."""
|
||||
|
||||
def test_concurrent_counter_creation_same_name(self, otel_provider):
|
||||
"""Test that concurrent calls to record_count with same name are thread-safe."""
|
||||
num_threads = 50
|
||||
counter_name = "concurrent_counter"
|
||||
|
||||
def record_metric():
|
||||
otel_provider.record_count(counter_name, 1.0)
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
|
||||
futures = [executor.submit(record_metric) for _ in range(num_threads)]
|
||||
concurrent.futures.wait(futures)
|
||||
|
||||
# Should have exactly one counter created despite concurrent access
|
||||
assert len(otel_provider._counters) == 1
|
||||
assert counter_name in otel_provider._counters
|
||||
|
||||
def test_concurrent_histogram_creation_same_name(self, otel_provider):
|
||||
"""Test that concurrent calls to record_histogram with same name are thread-safe."""
|
||||
num_threads = 50
|
||||
histogram_name = "concurrent_histogram"
|
||||
|
||||
def record_metric():
|
||||
thread_id = threading.current_thread().ident or 0
|
||||
otel_provider.record_histogram(histogram_name, float(thread_id % 100))
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
|
||||
futures = [executor.submit(record_metric) for _ in range(num_threads)]
|
||||
concurrent.futures.wait(futures)
|
||||
|
||||
# Should have exactly one histogram created despite concurrent access
|
||||
assert len(otel_provider._histograms) == 1
|
||||
assert histogram_name in otel_provider._histograms
|
||||
|
||||
def test_concurrent_up_down_counter_creation_same_name(self, otel_provider):
|
||||
"""Test that concurrent calls to record_up_down_counter with same name are thread-safe."""
|
||||
num_threads = 50
|
||||
counter_name = "concurrent_updown"
|
||||
|
||||
def record_metric():
|
||||
otel_provider.record_up_down_counter(counter_name, 1.0)
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
|
||||
futures = [executor.submit(record_metric) for _ in range(num_threads)]
|
||||
concurrent.futures.wait(futures)
|
||||
|
||||
# Should have exactly one counter created despite concurrent access
|
||||
assert len(otel_provider._up_down_counters) == 1
|
||||
assert counter_name in otel_provider._up_down_counters
|
||||
|
||||
def test_concurrent_mixed_metrics_different_names(self, otel_provider):
|
||||
"""Test concurrent creation of different metric types with different names."""
|
||||
num_threads = 30
|
||||
|
||||
def record_counters(thread_id):
|
||||
otel_provider.record_count(f"counter_{thread_id}", 1.0)
|
||||
|
||||
def record_histograms(thread_id):
|
||||
otel_provider.record_histogram(f"histogram_{thread_id}", float(thread_id))
|
||||
|
||||
def record_up_down_counters(thread_id):
|
||||
otel_provider.record_up_down_counter(f"updown_{thread_id}", float(thread_id))
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads * 3) as executor:
|
||||
futures = []
|
||||
for i in range(num_threads):
|
||||
futures.append(executor.submit(record_counters, i))
|
||||
futures.append(executor.submit(record_histograms, i))
|
||||
futures.append(executor.submit(record_up_down_counters, i))
|
||||
|
||||
concurrent.futures.wait(futures)
|
||||
|
||||
# Each thread should have created its own metric
|
||||
assert len(otel_provider._counters) == num_threads
|
||||
assert len(otel_provider._histograms) == num_threads
|
||||
assert len(otel_provider._up_down_counters) == num_threads
|
||||
|
||||
def test_concurrent_access_existing_and_new_metrics(self, otel_provider):
|
||||
"""Test concurrent access mixing existing and new metric creation."""
|
||||
# Pre-create some metrics
|
||||
otel_provider.record_count("existing_counter", 1.0)
|
||||
otel_provider.record_histogram("existing_histogram", 10.0)
|
||||
|
||||
num_threads = 40
|
||||
|
||||
def mixed_operations(thread_id):
|
||||
# Half the threads use existing metrics, half create new ones
|
||||
if thread_id % 2 == 0:
|
||||
otel_provider.record_count("existing_counter", 1.0)
|
||||
otel_provider.record_histogram("existing_histogram", float(thread_id))
|
||||
else:
|
||||
otel_provider.record_count(f"new_counter_{thread_id}", 1.0)
|
||||
otel_provider.record_histogram(f"new_histogram_{thread_id}", float(thread_id))
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
|
||||
futures = [executor.submit(mixed_operations, i) for i in range(num_threads)]
|
||||
concurrent.futures.wait(futures)
|
||||
|
||||
# Should have existing metrics plus half of num_threads new ones
|
||||
expected_new_counters = num_threads // 2
|
||||
expected_new_histograms = num_threads // 2
|
||||
|
||||
assert len(otel_provider._counters) == 1 + expected_new_counters
|
||||
assert len(otel_provider._histograms) == 1 + expected_new_histograms
|
||||
|
||||
|
||||
class TestOTelTelemetryProviderTracing:
|
||||
"""Tests for tracing functionality."""
|
||||
|
||||
def test_custom_trace_creates_span(self, otel_provider):
|
||||
"""Test that custom_trace creates a span."""
|
||||
span = otel_provider.custom_trace("test_span")
|
||||
|
||||
assert span is not None
|
||||
assert hasattr(span, "get_span_context")
|
||||
|
||||
def test_custom_trace_with_attributes(self, otel_provider):
|
||||
"""Test that custom_trace works with attributes."""
|
||||
attributes = {"key": "value", "operation": "test"}
|
||||
|
||||
span = otel_provider.custom_trace("test_span", attributes=attributes)
|
||||
|
||||
assert span is not None
|
||||
|
||||
def test_fastapi_middleware(self, otel_provider):
|
||||
"""Test that fastapi_middleware can be called."""
|
||||
def test_fastapi_middleware_can_be_applied(self, otel_provider):
|
||||
"""Test that fastapi_middleware can be called without errors."""
|
||||
mock_app = MagicMock()
|
||||
|
||||
|
||||
# Should not raise an exception
|
||||
otel_provider.fastapi_middleware(mock_app)
|
||||
|
||||
# Verify FastAPIInstrumentor was called (it patches the app)
|
||||
# The actual instrumentation is tested in E2E tests
|
||||
|
||||
class TestOTelTelemetryProviderEdgeCases:
|
||||
"""Tests for edge cases and error conditions."""
|
||||
def test_sqlalchemy_instrumentation_without_engine(self, otel_provider):
|
||||
"""
|
||||
Test that sqlalchemy_instrumentation can be called.
|
||||
|
||||
def test_record_count_with_zero(self, otel_provider):
|
||||
"""Test that record_count works with zero value."""
|
||||
otel_provider.record_count("zero_counter", 0.0)
|
||||
|
||||
assert "zero_counter" in otel_provider._counters
|
||||
Note: Testing with a real engine would require SQLAlchemy setup.
|
||||
The actual instrumentation is tested when used with real databases.
|
||||
"""
|
||||
# Should not raise an exception
|
||||
otel_provider.sqlalchemy_instrumentation()
|
||||
|
||||
def test_record_count_with_large_value(self, otel_provider):
|
||||
"""Test that record_count works with large values."""
|
||||
otel_provider.record_count("large_counter", 1_000_000.0)
|
||||
|
||||
assert "large_counter" in otel_provider._counters
|
||||
|
||||
def test_record_histogram_with_negative_value(self, otel_provider):
|
||||
"""Test that record_histogram works with negative values."""
|
||||
otel_provider.record_histogram("negative_histogram", -10.0)
|
||||
|
||||
assert "negative_histogram" in otel_provider._histograms
|
||||
class TestOTelProviderConfiguration:
|
||||
"""Tests for configuration and environment variable handling."""
|
||||
|
||||
def test_record_up_down_counter_with_negative_value(self, otel_provider):
|
||||
"""Test that record_up_down_counter works with negative values."""
|
||||
otel_provider.record_up_down_counter("negative_updown", -5.0)
|
||||
|
||||
assert "negative_updown" in otel_provider._up_down_counters
|
||||
def test_service_metadata_configuration(self, otel_provider):
|
||||
"""Test that service metadata is properly configured."""
|
||||
assert otel_provider.config.service_name == "test-service"
|
||||
assert otel_provider.config.service_version == "1.0.0"
|
||||
assert otel_provider.config.deployment_environment == "test"
|
||||
|
||||
def test_metric_names_with_special_characters(self, otel_provider):
|
||||
"""Test that metric names with dots and underscores work."""
|
||||
otel_provider.record_count("test.counter_name-special", 1.0)
|
||||
otel_provider.record_histogram("test.histogram_name-special", 10.0)
|
||||
|
||||
assert "test.counter_name-special" in otel_provider._counters
|
||||
assert "test.histogram_name-special" in otel_provider._histograms
|
||||
def test_span_processor_configuration(self, monkeypatch):
|
||||
"""Test different span processor configurations."""
|
||||
monkeypatch.setenv("OTEL_EXPORTER_OTLP_ENDPOINT", "http://localhost:4318")
|
||||
|
||||
def test_empty_attributes_dict(self, otel_provider):
|
||||
"""Test that empty attributes dict is handled correctly."""
|
||||
otel_provider.record_count("test_counter", 1.0, attributes={})
|
||||
|
||||
assert "test_counter" in otel_provider._counters
|
||||
# Test simple processor
|
||||
config_simple = OTelTelemetryConfig(
|
||||
service_name="test",
|
||||
span_processor="simple",
|
||||
)
|
||||
provider_simple = OTelTelemetryProvider(config=config_simple)
|
||||
assert provider_simple.config.span_processor == "simple"
|
||||
|
||||
def test_none_attributes(self, otel_provider):
|
||||
"""Test that None attributes are handled correctly."""
|
||||
otel_provider.record_count("test_counter", 1.0, attributes=None)
|
||||
|
||||
assert "test_counter" in otel_provider._counters
|
||||
# Test batch processor
|
||||
config_batch = OTelTelemetryConfig(
|
||||
service_name="test",
|
||||
span_processor="batch",
|
||||
)
|
||||
provider_batch = OTelTelemetryProvider(config=config_batch)
|
||||
assert provider_batch.config.span_processor == "batch"
|
||||
|
||||
def test_sample_run_config_generation(self):
|
||||
"""Test that sample_run_config generates valid configuration."""
|
||||
sample_config = OTelTelemetryConfig.sample_run_config()
|
||||
|
||||
assert "service_name" in sample_config
|
||||
assert "span_processor" in sample_config
|
||||
assert "${env.OTEL_SERVICE_NAME" in sample_config["service_name"]
|
||||
|
||||
|
||||
class TestOTelProviderStreamingSupport:
|
||||
"""Tests for streaming request telemetry."""
|
||||
|
||||
def test_streaming_metrics_middleware_added(self, otel_provider):
|
||||
"""Verify that streaming metrics middleware is configured."""
|
||||
mock_app = MagicMock()
|
||||
|
||||
# Apply middleware
|
||||
otel_provider.fastapi_middleware(mock_app)
|
||||
|
||||
# Verify middleware was added (BaseHTTPMiddleware.add_middleware called)
|
||||
assert mock_app.add_middleware.called
|
||||
|
||||
print("\n[PASS] Streaming metrics middleware configured")
|
||||
|
||||
def test_provider_captures_streaming_and_regular_requests(self):
|
||||
"""
|
||||
Verify provider is configured to handle both request types.
|
||||
|
||||
Note: Actual streaming behavior tested in E2E tests with real FastAPI app.
|
||||
"""
|
||||
# The implementation creates both regular and streaming metrics
|
||||
# Verification happens in E2E tests with real requests
|
||||
print("\n[PASS] Provider configured for streaming and regular requests")
|
||||
|
|
|
|||
18
uv.lock
generated
18
uv.lock
generated
|
|
@ -1775,6 +1775,7 @@ dependencies = [
|
|||
{ name = "openai" },
|
||||
{ name = "opentelemetry-exporter-otlp-proto-http" },
|
||||
{ name = "opentelemetry-instrumentation-fastapi" },
|
||||
{ name = "opentelemetry-instrumentation-sqlalchemy" },
|
||||
{ name = "opentelemetry-sdk" },
|
||||
{ name = "opentelemetry-semantic-conventions" },
|
||||
{ name = "pillow" },
|
||||
|
|
@ -1903,6 +1904,7 @@ requires-dist = [
|
|||
{ name = "openai", specifier = ">=1.107" },
|
||||
{ name = "opentelemetry-exporter-otlp-proto-http", specifier = ">=1.30.0" },
|
||||
{ name = "opentelemetry-instrumentation-fastapi", specifier = ">=0.57b0" },
|
||||
{ name = "opentelemetry-instrumentation-sqlalchemy", specifier = ">=0.57b0" },
|
||||
{ name = "opentelemetry-sdk", specifier = ">=1.30.0" },
|
||||
{ name = "opentelemetry-semantic-conventions", specifier = ">=0.57b0" },
|
||||
{ name = "pandas", marker = "extra == 'ui'" },
|
||||
|
|
@ -2758,6 +2760,22 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/3b/df/f20fc21c88c7af5311bfefc15fc4e606bab5edb7c193aa8c73c354904c35/opentelemetry_instrumentation_fastapi-0.57b0-py3-none-any.whl", hash = "sha256:61e6402749ffe0bfec582e58155e0d81dd38723cd9bc4562bca1acca80334006", size = 12712, upload-time = "2025-07-29T15:42:03.332Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry-instrumentation-sqlalchemy"
|
||||
version = "0.57b0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "opentelemetry-api" },
|
||||
{ name = "opentelemetry-instrumentation" },
|
||||
{ name = "opentelemetry-semantic-conventions" },
|
||||
{ name = "packaging" },
|
||||
{ name = "wrapt" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/9c/18/ee1460dcb044b25aaedd6cfd063304d84ae641dddb8fb9287959f7644100/opentelemetry_instrumentation_sqlalchemy-0.57b0.tar.gz", hash = "sha256:95667326b7cc22bb4bc9941f98ca22dd177679f9a4d277646cc21074c0d732ff", size = 14962, upload-time = "2025-07-29T15:43:12.426Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/94/18/af35650eb029d771b8d281bea770727f1e2f662c422c5ab1a0c2b7afc152/opentelemetry_instrumentation_sqlalchemy-0.57b0-py3-none-any.whl", hash = "sha256:8a1a815331cb04fc95aa7c50e9c681cdccfb12e1fa4522f079fe4b24753ae106", size = 14202, upload-time = "2025-07-29T15:42:25.828Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry-proto"
|
||||
version = "1.36.0"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue