feat(telemetry:major): End to End Testing, Metric Capture, SQL Alchemy Injection

This commit is contained in:
Emilio Garcia 2025-10-03 12:17:41 -04:00
parent e815738936
commit 7e3cf1fb20
26 changed files with 2075 additions and 1006 deletions

View file

@ -1,15 +1,22 @@
from aiohttp import hdrs
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import Any
from aiohttp import hdrs
from llama_stack.apis.datatypes import Api
from llama_stack.core.external import ExternalApiSpec
from llama_stack.core.server.routes import find_matching_route, initialize_route_impls
from llama_stack.log import get_logger
from llama_stack.providers.utils.telemetry.tracing import end_trace, start_trace
logger = get_logger(name=__name__, category="telemetry::meta_reference")
class TracingMiddleware:
def __init__(
self,

View file

@ -10,7 +10,6 @@ import threading
from typing import Any, cast
from fastapi import FastAPI
from opentelemetry import metrics, trace
from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
@ -23,11 +22,6 @@ from opentelemetry.semconv.attributes import service_attributes
from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator
from opentelemetry.util.types import Attributes
from llama_stack.core.external import ExternalApiSpec
from llama_stack.core.server.tracing import TelemetryProvider
from llama_stack.providers.inline.telemetry.meta_reference.middleware import TracingMiddleware
from llama_stack.apis.telemetry import (
Event,
MetricEvent,
@ -47,10 +41,13 @@ from llama_stack.apis.telemetry import (
UnstructuredLogEvent,
)
from llama_stack.core.datatypes import Api
from llama_stack.core.external import ExternalApiSpec
from llama_stack.core.server.tracing import TelemetryProvider
from llama_stack.log import get_logger
from llama_stack.providers.inline.telemetry.meta_reference.console_span_processor import (
ConsoleSpanProcessor,
)
from llama_stack.providers.inline.telemetry.meta_reference.middleware import TracingMiddleware
from llama_stack.providers.inline.telemetry.meta_reference.sqlite_span_processor import (
SQLiteSpanProcessor,
)
@ -381,7 +378,7 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry, TelemetryProvider):
max_depth=max_depth,
)
)
def fastapi_middleware(
self,
app: FastAPI,

View file

@ -0,0 +1,23 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from .config import OTelTelemetryConfig
__all__ = ["OTelTelemetryConfig"]
async def get_provider_impl(config: OTelTelemetryConfig, deps):
"""
Get the OTel telemetry provider implementation.
This function is called by the Llama Stack registry to instantiate
the provider.
"""
from .otel import OTelTelemetryProvider
# The provider is synchronously initialized via Pydantic model_post_init
# No async initialization needed
return OTelTelemetryProvider(config=config)

View file

@ -1,8 +1,13 @@
from typing import Literal
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import Any, Literal
from pydantic import BaseModel, Field
type BatchSpanProcessor = Literal["batch"]
type SimpleSpanProcessor = Literal["simple"]
@ -11,22 +16,35 @@ class OTelTelemetryConfig(BaseModel):
"""
The configuration for the OpenTelemetry telemetry provider.
Most configuration is set using environment variables.
See https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/ for more information.
See https://opentelemetry.io/docs/specs/otel/configuration/sdk-configuration-variables/ for more information.
"""
service_name: str = Field(
description="""The name of the service to be monitored.
description="""The name of the service to be monitored.
Is overridden by the OTEL_SERVICE_NAME or OTEL_RESOURCE_ATTRIBUTES environment variables.""",
)
service_version: str | None = Field(
description="""The version of the service to be monitored.
Is overriden by the OTEL_RESOURCE_ATTRIBUTES environment variable."""
default=None,
description="""The version of the service to be monitored.
Is overriden by the OTEL_RESOURCE_ATTRIBUTES environment variable.""",
)
deployment_environment: str | None = Field(
description="""The name of the environment of the service to be monitored.
Is overriden by the OTEL_RESOURCE_ATTRIBUTES environment variable."""
default=None,
description="""The name of the environment of the service to be monitored.
Is overriden by the OTEL_RESOURCE_ATTRIBUTES environment variable.""",
)
span_processor: BatchSpanProcessor | SimpleSpanProcessor | None = Field(
description="""The span processor to use.
description="""The span processor to use.
Is overriden by the OTEL_SPAN_PROCESSOR environment variable.""",
default="batch"
default="batch",
)
@classmethod
def sample_run_config(cls, __distro_dir__: str = "") -> dict[str, Any]:
"""Sample configuration for use in distributions."""
return {
"service_name": "${env.OTEL_SERVICE_NAME:=llama-stack}",
"service_version": "${env.OTEL_SERVICE_VERSION:=}",
"deployment_environment": "${env.OTEL_DEPLOYMENT_ENVIRONMENT:=}",
"span_processor": "${env.OTEL_SPAN_PROCESSOR:=batch}",
}

View file

@ -1,141 +1,301 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import os
import threading
import time
from opentelemetry import trace, metrics
from opentelemetry.context.context import Context
from opentelemetry.sdk.resources import Attributes, Resource
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor, SimpleSpanProcessor
from fastapi import FastAPI
from opentelemetry import metrics, trace
from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
from opentelemetry.sdk.metrics import MeterProvider
from opentelemetry.metrics import Counter, UpDownCounter, Histogram, ObservableGauge
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
from opentelemetry.trace import Span, SpanKind, _Links
from typing import Sequence
from pydantic import PrivateAttr
from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor
from opentelemetry.metrics import Counter, Histogram
from opentelemetry.sdk.metrics import MeterProvider
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
from opentelemetry.sdk.resources import Resource
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import (
BatchSpanProcessor,
SimpleSpanProcessor,
SpanExporter,
SpanExportResult,
)
from sqlalchemy import Engine
from starlette.types import ASGIApp, Message, Receive, Scope, Send
from llama_stack.core.telemetry.tracing import TelemetryProvider
from llama_stack.core.telemetry.telemetry import TelemetryProvider
from llama_stack.log import get_logger
from .config import OTelTelemetryConfig
from fastapi import FastAPI
logger = get_logger(name=__name__, category="telemetry::otel")
class StreamingMetricsMiddleware:
"""
Pure ASGI middleware to track streaming response metrics.
This follows Starlette best practices by implementing pure ASGI,
which is more efficient and less prone to bugs than BaseHTTPMiddleware.
"""
def __init__(self, app: ASGIApp):
self.app = app
async def __call__(self, scope: Scope, receive: Receive, send: Send):
if scope["type"] != "http":
await self.app(scope, receive, send)
return
logger.debug(f"StreamingMetricsMiddleware called for {scope.get('method')} {scope.get('path')}")
start_time = time.time()
# Track if this is a streaming response
is_streaming = False
async def send_wrapper(message: Message):
nonlocal is_streaming
# Detect streaming responses by headers
if message["type"] == "http.response.start":
headers = message.get("headers", [])
for name, value in headers:
if name == b"content-type" and b"text/event-stream" in value:
is_streaming = True
# Add streaming attribute to current span
current_span = trace.get_current_span()
if current_span and current_span.is_recording():
current_span.set_attribute("http.response.is_streaming", True)
break
# Record total duration when response body completes
elif message["type"] == "http.response.body" and not message.get("more_body", False):
if is_streaming:
current_span = trace.get_current_span()
if current_span and current_span.is_recording():
total_duration_ms = (time.time() - start_time) * 1000
current_span.set_attribute("http.streaming.total_duration_ms", total_duration_ms)
await send(message)
await self.app(scope, receive, send_wrapper)
class MetricsSpanExporter(SpanExporter):
"""Records HTTP metrics from span data."""
def __init__(
self,
request_duration: Histogram,
streaming_duration: Histogram,
streaming_requests: Counter,
request_count: Counter,
):
self.request_duration = request_duration
self.streaming_duration = streaming_duration
self.streaming_requests = streaming_requests
self.request_count = request_count
def export(self, spans):
logger.debug(f"MetricsSpanExporter.export called with {len(spans)} spans")
for span in spans:
if not span.attributes or not span.attributes.get("http.method"):
continue
logger.debug(f"Processing span: {span.name}")
if span.end_time is None or span.start_time is None:
continue
# Calculate time-to-first-byte duration
duration_ns = span.end_time - span.start_time
duration_ms = duration_ns / 1_000_000
# Check if this was a streaming response
is_streaming = span.attributes.get("http.response.is_streaming", False)
attributes = {
"http.method": str(span.attributes.get("http.method", "UNKNOWN")),
"http.route": str(span.attributes.get("http.route", span.attributes.get("http.target", "/"))),
"http.status_code": str(span.attributes.get("http.status_code", 0)),
}
# set distributed trace attributes
if span.attributes.get("trace_id"):
attributes["trace_id"] = str(span.attributes.get("trace_id"))
if span.attributes.get("span_id"):
attributes["span_id"] = str(span.attributes.get("span_id"))
# Record request count and duration
logger.debug(f"Recording metrics: duration={duration_ms}ms, attributes={attributes}")
self.request_count.add(1, attributes)
self.request_duration.record(duration_ms, attributes)
logger.debug("Metrics recorded successfully")
# For streaming, record separately
if is_streaming:
logger.debug(f"MetricsSpanExporter: Recording streaming metrics for {span.name}")
self.streaming_requests.add(1, attributes)
# If full streaming duration is available
stream_total_duration = span.attributes.get("http.streaming.total_duration_ms")
if stream_total_duration and isinstance(stream_total_duration, int | float):
logger.debug(f"MetricsSpanExporter: Recording streaming duration: {stream_total_duration}ms")
self.streaming_duration.record(float(stream_total_duration), attributes)
else:
logger.warning(
"MetricsSpanExporter: Streaming span has no http.streaming.total_duration_ms attribute"
)
return SpanExportResult.SUCCESS
def shutdown(self):
pass
# NOTE: DO NOT ALLOW LLM TO MODIFY THIS WITHOUT TESTING AND SUPERVISION: it frequently breaks otel integrations
class OTelTelemetryProvider(TelemetryProvider):
"""
A simple Open Telemetry native telemetry provider.
"""
config: OTelTelemetryConfig
_counters: dict[str, Counter] = PrivateAttr(default_factory=dict)
_up_down_counters: dict[str, UpDownCounter] = PrivateAttr(default_factory=dict)
_histograms: dict[str, Histogram] = PrivateAttr(default_factory=dict)
_gauges: dict[str, ObservableGauge] = PrivateAttr(default_factory=dict)
config: OTelTelemetryConfig
def model_post_init(self, __context):
"""Initialize provider after Pydantic validation."""
self._lock = threading.Lock()
attributes: Attributes = {
key: value
for key, value in {
"service.name": self.config.service_name,
"service.version": self.config.service_version,
"deployment.environment": self.config.deployment_environment,
}.items()
if value is not None
}
resource = Resource.create(attributes)
# Configure the tracer provider
tracer_provider = TracerProvider(resource=resource)
trace.set_tracer_provider(tracer_provider)
otlp_span_exporter = OTLPSpanExporter()
# Configure the span processor
# Enable batching of spans to reduce the number of requests to the collector
if self.config.span_processor == "batch":
tracer_provider.add_span_processor(BatchSpanProcessor(otlp_span_exporter))
elif self.config.span_processor == "simple":
tracer_provider.add_span_processor(SimpleSpanProcessor(otlp_span_exporter))
meter_provider = MeterProvider(resource=resource)
metrics.set_meter_provider(meter_provider)
# Do not fail the application, but warn the user if the endpoints are not set properly.
if not os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT"):
if not os.environ.get("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT"):
logger.warning("OTEL_EXPORTER_OTLP_ENDPOINT or OTEL_EXPORTER_OTLP_TRACES_ENDPOINT is not set. Traces will not be exported.")
logger.warning(
"OTEL_EXPORTER_OTLP_ENDPOINT or OTEL_EXPORTER_OTLP_TRACES_ENDPOINT is not set. Traces will not be exported."
)
if not os.environ.get("OTEL_EXPORTER_OTLP_METRICS_ENDPOINT"):
logger.warning("OTEL_EXPORTER_OTLP_ENDPOINT or OTEL_EXPORTER_OTLP_METRICS_ENDPOINT is not set. Metrics will not be exported.")
logger.warning(
"OTEL_EXPORTER_OTLP_ENDPOINT or OTEL_EXPORTER_OTLP_METRICS_ENDPOINT is not set. Metrics will not be exported."
)
# Respect OTEL design standards where environment variables get highest precedence
service_name = os.environ.get("OTEL_SERVICE_NAME")
if not service_name:
service_name = self.config.service_name
# Create resource with service name
resource = Resource.create({"service.name": service_name})
# Configure the tracer provider (always, since llama stack run spawns subprocess without opentelemetry-instrument)
tracer_provider = TracerProvider(resource=resource)
trace.set_tracer_provider(tracer_provider)
# Configure OTLP span exporter
otlp_span_exporter = OTLPSpanExporter()
# Add span processor (simple for immediate export, batch for performance)
span_processor_type = os.environ.get("OTEL_SPAN_PROCESSOR", "batch")
if span_processor_type == "batch":
tracer_provider.add_span_processor(BatchSpanProcessor(otlp_span_exporter))
else:
tracer_provider.add_span_processor(SimpleSpanProcessor(otlp_span_exporter))
# Configure meter provider with OTLP exporter for metrics
metric_reader = PeriodicExportingMetricReader(OTLPMetricExporter())
meter_provider = MeterProvider(resource=resource, metric_readers=[metric_reader])
metrics.set_meter_provider(meter_provider)
logger.info(
f"Initialized OpenTelemetry provider with service.name={service_name}, span_processor={span_processor_type}"
)
def fastapi_middleware(self, app: FastAPI):
FastAPIInstrumentor.instrument_app(app)
def custom_trace(self,
name: str,
context: Context | None = None,
kind: SpanKind = SpanKind.INTERNAL,
attributes: Attributes = {},
links: _Links = None,
start_time: int | None = None,
record_exception: bool = True,
set_status_on_exception: bool = True) -> Span:
"""
Creates a custom tracing span using the Open Telemetry SDK.
Instrument FastAPI with OTel for automatic tracing and metrics.
Captures telemetry for both regular and streaming HTTP requests:
- Distributed traces (via FastAPIInstrumentor)
- HTTP request metrics (count, duration, status)
- Streaming-specific metrics (time-to-first-byte, total stream duration)
"""
tracer = trace.get_tracer(__name__)
return tracer.start_span(name, context, kind, attributes, links, start_time, record_exception, set_status_on_exception)
# Create meter for HTTP metrics
meter = metrics.get_meter("llama_stack.http.server")
def record_count(self, name: str, amount: int|float, context: Context | None = None, attributes: dict[str, str] | None = None, unit: str = "", description: str = ""):
"""
Increments a counter metric using the Open Telemetry SDK that are indexed by the meter name.
This function is designed to be compatible with other popular telemetry providers design patterns,
like Datadog and New Relic.
"""
meter = metrics.get_meter(__name__)
# HTTP Metrics following OTel semantic conventions
# https://opentelemetry.io/docs/specs/semconv/http/http-metrics/
request_duration = meter.create_histogram(
"http.server.request.duration",
unit="ms",
description="Duration of HTTP requests (time-to-first-byte for streaming)",
)
with self._lock:
if name not in self._counters:
self._counters[name] = meter.create_counter(name, unit=unit, description=description)
counter = self._counters[name]
streaming_duration = meter.create_histogram(
"http.server.streaming.duration",
unit="ms",
description="Total duration of streaming responses (from start to stream completion)",
)
counter.add(amount, attributes=attributes, context=context)
request_count = meter.create_counter(
"http.server.request.count", unit="requests", description="Total number of HTTP requests"
)
streaming_requests = meter.create_counter(
"http.server.streaming.count", unit="requests", description="Number of streaming requests"
)
def record_histogram(self, name: str, value: int|float, context: Context | None = None, attributes: dict[str, str] | None = None, unit: str = "", description: str = "", explicit_bucket_boundaries_advisory: Sequence[float] | None = None):
"""
Records a histogram metric using the Open Telemetry SDK that are indexed by the meter name.
This function is designed to be compatible with other popular telemetry providers design patterns,
like Datadog and New Relic.
"""
meter = metrics.get_meter(__name__)
# Hook to enrich spans and record initial metrics
def server_request_hook(span, scope):
"""
Called by FastAPIInstrumentor for each request.
with self._lock:
if name not in self._histograms:
self._histograms[name] = meter.create_histogram(name, unit=unit, description=description, explicit_bucket_boundaries_advisory=explicit_bucket_boundaries_advisory)
histogram = self._histograms[name]
This only reads from scope (ASGI dict), never touches request body.
Safe to use without interfering with body parsing.
"""
method = scope.get("method", "UNKNOWN")
path = scope.get("path", "/")
histogram.record(value, attributes=attributes, context=context)
# Add custom attributes
span.set_attribute("service.component", "llama-stack-api")
span.set_attribute("http.request", path)
span.set_attribute("http.method", method)
attributes = {
"http.request": path,
"http.method": method,
"trace_id": span.attributes.get("trace_id", ""),
"span_id": span.attributes.get("span_id", ""),
}
def record_up_down_counter(self, name: str, value: int|float, context: Context | None = None, attributes: dict[str, str] | None = None, unit: str = "", description: str = ""):
"""
Records an up/down counter metric using the Open Telemetry SDK that are indexed by the meter name.
This function is designed to be compatible with other popular telemetry providers design patterns,
like Datadog and New Relic.
"""
meter = metrics.get_meter(__name__)
request_count.add(1, attributes)
logger.debug(f"server_request_hook: recorded request_count for {method} {path}, attributes={attributes}")
with self._lock:
if name not in self._up_down_counters:
self._up_down_counters[name] = meter.create_up_down_counter(name, unit=unit, description=description)
up_down_counter = self._up_down_counters[name]
# NOTE: This is called BEFORE routes are added to the app
# FastAPIInstrumentor.instrument_app() patches build_middleware_stack(),
# which will be called on first request (after routes are added), so hooks should work.
logger.debug("Instrumenting FastAPI (routes will be added later)")
FastAPIInstrumentor.instrument_app(
app,
server_request_hook=server_request_hook,
)
logger.debug(f"FastAPI instrumented: {getattr(app, '_is_instrumented_by_opentelemetry', False)}")
up_down_counter.add(value, attributes=attributes, context=context)
# Add pure ASGI middleware for streaming metrics (always add, regardless of instrumentation)
app.add_middleware(StreamingMetricsMiddleware)
# Add metrics span processor
provider = trace.get_tracer_provider()
logger.debug(f"TracerProvider: {provider}")
if isinstance(provider, TracerProvider):
metrics_exporter = MetricsSpanExporter(
request_duration=request_duration,
streaming_duration=streaming_duration,
streaming_requests=streaming_requests,
request_count=request_count,
)
provider.add_span_processor(BatchSpanProcessor(metrics_exporter))
logger.debug("Added MetricsSpanExporter as BatchSpanProcessor")
else:
logger.warning(
f"TracerProvider is not TracerProvider instance, it's {type(provider)}. MetricsSpanExporter not added."
)

View file

@ -26,4 +26,16 @@ def available_providers() -> list[ProviderSpec]:
config_class="llama_stack.providers.inline.telemetry.meta_reference.config.TelemetryConfig",
description="Meta's reference implementation of telemetry and observability using OpenTelemetry.",
),
InlineProviderSpec(
api=Api.telemetry,
provider_type="inline::otel",
pip_packages=[
"opentelemetry-sdk",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-instrumentation-fastapi",
],
module="llama_stack.providers.inline.telemetry.otel",
config_class="llama_stack.providers.inline.telemetry.otel.config.OTelTelemetryConfig",
description="Native OpenTelemetry provider with full access to OTel Tracer and Meter APIs for advanced instrumentation.",
),
]