feat(telemetry:major): End to End Testing, Metric Capture, SQL Alchemy Injection

2025-12-15 17:53:06 +00:00 · 2025-10-03 12:17:41 -04:00 · 2025-10-03 12:17:41 -04:00 · 7e3cf1fb20
commit 7e3cf1fb20
parent e815738936
26 changed files with 2075 additions and 1006 deletions
--- a/llama_stack/providers/inline/telemetry/meta_reference/middleware.py
+++ b/llama_stack/providers/inline/telemetry/meta_reference/middleware.py
@ -1,15 +1,22 @@
-from aiohttp import hdrs
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
 from typing import Any

+from aiohttp import hdrs
+
 from llama_stack.apis.datatypes import Api
 from llama_stack.core.external import ExternalApiSpec
 from llama_stack.core.server.routes import find_matching_route, initialize_route_impls
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.telemetry.tracing import end_trace, start_trace

-
 logger = get_logger(name=__name__, category="telemetry::meta_reference")

+
 class TracingMiddleware:
    def __init__(
        self,
--- a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py
+++ b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py
@ -10,7 +10,6 @@ import threading
 from typing import Any, cast

 from fastapi import FastAPI
-
 from opentelemetry import metrics, trace
 from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
 from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
@ -23,11 +22,6 @@ from opentelemetry.semconv.attributes import service_attributes
 from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator
 from opentelemetry.util.types import Attributes

-from llama_stack.core.external import ExternalApiSpec
-from llama_stack.core.server.tracing import TelemetryProvider
-from llama_stack.providers.inline.telemetry.meta_reference.middleware import TracingMiddleware
-
-
 from llama_stack.apis.telemetry import (
    Event,
    MetricEvent,
@ -47,10 +41,13 @@ from llama_stack.apis.telemetry import (
    UnstructuredLogEvent,
 )
 from llama_stack.core.datatypes import Api
+from llama_stack.core.external import ExternalApiSpec
+from llama_stack.core.server.tracing import TelemetryProvider
 from llama_stack.log import get_logger
 from llama_stack.providers.inline.telemetry.meta_reference.console_span_processor import (
    ConsoleSpanProcessor,
 )
+from llama_stack.providers.inline.telemetry.meta_reference.middleware import TracingMiddleware
 from llama_stack.providers.inline.telemetry.meta_reference.sqlite_span_processor import (
    SQLiteSpanProcessor,
 )
@ -381,7 +378,7 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry, TelemetryProvider):
                max_depth=max_depth,
            )
        )
-    
+
    def fastapi_middleware(
        self,
        app: FastAPI,
--- a/llama_stack/providers/inline/telemetry/otel/init.py
+++ b/llama_stack/providers/inline/telemetry/otel/init.py
@ -0,0 +1,23 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .config import OTelTelemetryConfig
+
+__all__ = ["OTelTelemetryConfig"]
+
+
+async def get_provider_impl(config: OTelTelemetryConfig, deps):
+    """
+    Get the OTel telemetry provider implementation.
+
+    This function is called by the Llama Stack registry to instantiate
+    the provider.
+    """
+    from .otel import OTelTelemetryProvider
+
+    # The provider is synchronously initialized via Pydantic model_post_init
+    # No async initialization needed
+    return OTelTelemetryProvider(config=config)
--- a/llama_stack/providers/inline/telemetry/otel/config.py
+++ b/llama_stack/providers/inline/telemetry/otel/config.py
@ -1,8 +1,13 @@
-from typing import Literal
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any, Literal

 from pydantic import BaseModel, Field

-
 type BatchSpanProcessor = Literal["batch"]
 type SimpleSpanProcessor = Literal["simple"]

@ -11,22 +16,35 @@ class OTelTelemetryConfig(BaseModel):
    """
    The configuration for the OpenTelemetry telemetry provider.
    Most configuration is set using environment variables.
-    See https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/ for more information.
+    See https://opentelemetry.io/docs/specs/otel/configuration/sdk-configuration-variables/ for more information.
    """
+
    service_name: str = Field(
-        description="""The name of the service to be monitored. 
+        description="""The name of the service to be monitored.
        Is overridden by the OTEL_SERVICE_NAME or OTEL_RESOURCE_ATTRIBUTES environment variables.""",
    )
    service_version: str | None = Field(
-        description="""The version of the service to be monitored. 
-        Is overriden by the OTEL_RESOURCE_ATTRIBUTES environment variable."""
+        default=None,
+        description="""The version of the service to be monitored.
+        Is overriden by the OTEL_RESOURCE_ATTRIBUTES environment variable.""",
    )
    deployment_environment: str | None = Field(
-        description="""The name of the environment of the service to be monitored. 
-        Is overriden by the OTEL_RESOURCE_ATTRIBUTES environment variable."""
+        default=None,
+        description="""The name of the environment of the service to be monitored.
+        Is overriden by the OTEL_RESOURCE_ATTRIBUTES environment variable.""",
    )
    span_processor: BatchSpanProcessor | SimpleSpanProcessor | None = Field(
-        description="""The span processor to use. 
+        description="""The span processor to use.
        Is overriden by the OTEL_SPAN_PROCESSOR environment variable.""",
-        default="batch"
+        default="batch",
    )
+
+    @classmethod
+    def sample_run_config(cls, __distro_dir__: str = "") -> dict[str, Any]:
+        """Sample configuration for use in distributions."""
+        return {
+            "service_name": "${env.OTEL_SERVICE_NAME:=llama-stack}",
+            "service_version": "${env.OTEL_SERVICE_VERSION:=}",
+            "deployment_environment": "${env.OTEL_DEPLOYMENT_ENVIRONMENT:=}",
+            "span_processor": "${env.OTEL_SPAN_PROCESSOR:=batch}",
+        }
--- a/llama_stack/providers/inline/telemetry/otel/otel.py
+++ b/llama_stack/providers/inline/telemetry/otel/otel.py
@ -1,141 +1,301 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
 import os
-import threading
+import time

-from opentelemetry import trace, metrics
-from opentelemetry.context.context import Context
-from opentelemetry.sdk.resources import Attributes, Resource
-from opentelemetry.sdk.trace import TracerProvider
-from opentelemetry.sdk.trace.export import BatchSpanProcessor, SimpleSpanProcessor
+from fastapi import FastAPI
+from opentelemetry import metrics, trace
+from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
 from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
-from opentelemetry.sdk.metrics import MeterProvider
-from opentelemetry.metrics import Counter, UpDownCounter, Histogram, ObservableGauge
 from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
-from opentelemetry.trace import Span, SpanKind, _Links
-from typing import Sequence
-from pydantic import PrivateAttr
+from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor
+from opentelemetry.metrics import Counter, Histogram
+from opentelemetry.sdk.metrics import MeterProvider
+from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
+from opentelemetry.sdk.resources import Resource
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export import (
+    BatchSpanProcessor,
+    SimpleSpanProcessor,
+    SpanExporter,
+    SpanExportResult,
+)
+from sqlalchemy import Engine
+from starlette.types import ASGIApp, Message, Receive, Scope, Send

-from llama_stack.core.telemetry.tracing import TelemetryProvider
+from llama_stack.core.telemetry.telemetry import TelemetryProvider
 from llama_stack.log import get_logger

 from .config import OTelTelemetryConfig
-from fastapi import FastAPI
-

 logger = get_logger(name=__name__, category="telemetry::otel")


+class StreamingMetricsMiddleware:
+    """
+    Pure ASGI middleware to track streaming response metrics.
+
+    This follows Starlette best practices by implementing pure ASGI,
+    which is more efficient and less prone to bugs than BaseHTTPMiddleware.
+    """
+
+    def __init__(self, app: ASGIApp):
+        self.app = app
+
+    async def __call__(self, scope: Scope, receive: Receive, send: Send):
+        if scope["type"] != "http":
+            await self.app(scope, receive, send)
+            return
+
+        logger.debug(f"StreamingMetricsMiddleware called for {scope.get('method')} {scope.get('path')}")
+        start_time = time.time()
+
+        # Track if this is a streaming response
+        is_streaming = False
+
+        async def send_wrapper(message: Message):
+            nonlocal is_streaming
+
+            # Detect streaming responses by headers
+            if message["type"] == "http.response.start":
+                headers = message.get("headers", [])
+                for name, value in headers:
+                    if name == b"content-type" and b"text/event-stream" in value:
+                        is_streaming = True
+                        # Add streaming attribute to current span
+                        current_span = trace.get_current_span()
+                        if current_span and current_span.is_recording():
+                            current_span.set_attribute("http.response.is_streaming", True)
+                        break
+
+            # Record total duration when response body completes
+            elif message["type"] == "http.response.body" and not message.get("more_body", False):
+                if is_streaming:
+                    current_span = trace.get_current_span()
+                    if current_span and current_span.is_recording():
+                        total_duration_ms = (time.time() - start_time) * 1000
+                        current_span.set_attribute("http.streaming.total_duration_ms", total_duration_ms)
+
+            await send(message)
+
+        await self.app(scope, receive, send_wrapper)
+
+
+class MetricsSpanExporter(SpanExporter):
+    """Records HTTP metrics from span data."""
+
+    def __init__(
+        self,
+        request_duration: Histogram,
+        streaming_duration: Histogram,
+        streaming_requests: Counter,
+        request_count: Counter,
+    ):
+        self.request_duration = request_duration
+        self.streaming_duration = streaming_duration
+        self.streaming_requests = streaming_requests
+        self.request_count = request_count
+
+    def export(self, spans):
+        logger.debug(f"MetricsSpanExporter.export called with {len(spans)} spans")
+        for span in spans:
+            if not span.attributes or not span.attributes.get("http.method"):
+                continue
+            logger.debug(f"Processing span: {span.name}")
+
+            if span.end_time is None or span.start_time is None:
+                continue
+
+            # Calculate time-to-first-byte duration
+            duration_ns = span.end_time - span.start_time
+            duration_ms = duration_ns / 1_000_000
+
+            # Check if this was a streaming response
+            is_streaming = span.attributes.get("http.response.is_streaming", False)
+
+            attributes = {
+                "http.method": str(span.attributes.get("http.method", "UNKNOWN")),
+                "http.route": str(span.attributes.get("http.route", span.attributes.get("http.target", "/"))),
+                "http.status_code": str(span.attributes.get("http.status_code", 0)),
+            }
+
+            # set distributed trace attributes
+            if span.attributes.get("trace_id"):
+                attributes["trace_id"] = str(span.attributes.get("trace_id"))
+            if span.attributes.get("span_id"):
+                attributes["span_id"] = str(span.attributes.get("span_id"))
+
+            # Record request count and duration
+            logger.debug(f"Recording metrics: duration={duration_ms}ms, attributes={attributes}")
+            self.request_count.add(1, attributes)
+            self.request_duration.record(duration_ms, attributes)
+            logger.debug("Metrics recorded successfully")
+
+            # For streaming, record separately
+            if is_streaming:
+                logger.debug(f"MetricsSpanExporter: Recording streaming metrics for {span.name}")
+                self.streaming_requests.add(1, attributes)
+
+                # If full streaming duration is available
+                stream_total_duration = span.attributes.get("http.streaming.total_duration_ms")
+                if stream_total_duration and isinstance(stream_total_duration, int | float):
+                    logger.debug(f"MetricsSpanExporter: Recording streaming duration: {stream_total_duration}ms")
+                    self.streaming_duration.record(float(stream_total_duration), attributes)
+                else:
+                    logger.warning(
+                        "MetricsSpanExporter: Streaming span has no http.streaming.total_duration_ms attribute"
+                    )
+
+        return SpanExportResult.SUCCESS
+
+    def shutdown(self):
+        pass
+
+
+# NOTE: DO NOT ALLOW LLM TO MODIFY THIS WITHOUT TESTING AND SUPERVISION: it frequently breaks otel integrations
 class OTelTelemetryProvider(TelemetryProvider):
    """
    A simple Open Telemetry native telemetry provider.
    """
-    config: OTelTelemetryConfig
-    _counters: dict[str, Counter] = PrivateAttr(default_factory=dict)
-    _up_down_counters: dict[str, UpDownCounter] = PrivateAttr(default_factory=dict)
-    _histograms: dict[str, Histogram] = PrivateAttr(default_factory=dict)
-    _gauges: dict[str, ObservableGauge] = PrivateAttr(default_factory=dict)

+    config: OTelTelemetryConfig

    def model_post_init(self, __context):
        """Initialize provider after Pydantic validation."""
-        self._lock = threading.Lock()
-
-        attributes: Attributes = {
-            key: value
-            for key, value in {
-                "service.name": self.config.service_name,
-                "service.version": self.config.service_version,
-                "deployment.environment": self.config.deployment_environment,
-            }.items()
-            if value is not None
-        }
-
-        resource = Resource.create(attributes)
-
-        # Configure the tracer provider
-        tracer_provider = TracerProvider(resource=resource)
-        trace.set_tracer_provider(tracer_provider)
-
-        otlp_span_exporter = OTLPSpanExporter()
-
-        # Configure the span processor
-        # Enable batching of spans to reduce the number of requests to the collector
-        if self.config.span_processor == "batch":
-            tracer_provider.add_span_processor(BatchSpanProcessor(otlp_span_exporter))
-        elif self.config.span_processor == "simple":
-            tracer_provider.add_span_processor(SimpleSpanProcessor(otlp_span_exporter))
-        
-        meter_provider = MeterProvider(resource=resource)
-        metrics.set_meter_provider(meter_provider)

        # Do not fail the application, but warn the user if the endpoints are not set properly.
        if not os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT"):
            if not os.environ.get("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT"):
-                logger.warning("OTEL_EXPORTER_OTLP_ENDPOINT or OTEL_EXPORTER_OTLP_TRACES_ENDPOINT is not set. Traces will not be exported.")
+                logger.warning(
+                    "OTEL_EXPORTER_OTLP_ENDPOINT or OTEL_EXPORTER_OTLP_TRACES_ENDPOINT is not set. Traces will not be exported."
+                )
            if not os.environ.get("OTEL_EXPORTER_OTLP_METRICS_ENDPOINT"):
-                logger.warning("OTEL_EXPORTER_OTLP_ENDPOINT or OTEL_EXPORTER_OTLP_METRICS_ENDPOINT is not set. Metrics will not be exported.")
+                logger.warning(
+                    "OTEL_EXPORTER_OTLP_ENDPOINT or OTEL_EXPORTER_OTLP_METRICS_ENDPOINT is not set. Metrics will not be exported."
+                )
+
+        # Respect OTEL design standards where environment variables get highest precedence
+        service_name = os.environ.get("OTEL_SERVICE_NAME")
+        if not service_name:
+            service_name = self.config.service_name
+
+        # Create resource with service name
+        resource = Resource.create({"service.name": service_name})
+
+        # Configure the tracer provider (always, since llama stack run spawns subprocess without opentelemetry-instrument)
+        tracer_provider = TracerProvider(resource=resource)
+        trace.set_tracer_provider(tracer_provider)
+
+        # Configure OTLP span exporter
+        otlp_span_exporter = OTLPSpanExporter()
+
+        # Add span processor (simple for immediate export, batch for performance)
+        span_processor_type = os.environ.get("OTEL_SPAN_PROCESSOR", "batch")
+        if span_processor_type == "batch":
+            tracer_provider.add_span_processor(BatchSpanProcessor(otlp_span_exporter))
+        else:
+            tracer_provider.add_span_processor(SimpleSpanProcessor(otlp_span_exporter))
+
+        # Configure meter provider with OTLP exporter for metrics
+        metric_reader = PeriodicExportingMetricReader(OTLPMetricExporter())
+        meter_provider = MeterProvider(resource=resource, metric_readers=[metric_reader])
+        metrics.set_meter_provider(meter_provider)
+
+        logger.info(
+            f"Initialized OpenTelemetry provider with service.name={service_name}, span_processor={span_processor_type}"
+        )

    def fastapi_middleware(self, app: FastAPI):
-        FastAPIInstrumentor.instrument_app(app)
-
-    def custom_trace(self, 
-    name: str,
-    context: Context | None = None,
-    kind: SpanKind = SpanKind.INTERNAL,
-    attributes: Attributes = {},
-    links: _Links = None,
-    start_time: int | None = None,
-    record_exception: bool = True,
-    set_status_on_exception: bool = True) -> Span:
        """
-        Creates a custom tracing span using the Open Telemetry SDK.
+        Instrument FastAPI with OTel for automatic tracing and metrics.
+
+        Captures telemetry for both regular and streaming HTTP requests:
+        - Distributed traces (via FastAPIInstrumentor)
+        - HTTP request metrics (count, duration, status)
+        - Streaming-specific metrics (time-to-first-byte, total stream duration)
        """
-        tracer = trace.get_tracer(__name__)
-        return tracer.start_span(name, context, kind, attributes, links, start_time, record_exception, set_status_on_exception)

+        # Create meter for HTTP metrics
+        meter = metrics.get_meter("llama_stack.http.server")

-    def record_count(self, name: str, amount: int|float, context: Context | None = None, attributes: dict[str, str] | None = None, unit: str = "", description: str = ""):
-        """
-        Increments a counter metric using the Open Telemetry SDK that are indexed by the meter name.
-        This function is designed to be compatible with other popular telemetry providers design patterns,
-        like Datadog and New Relic.
-        """
-        meter = metrics.get_meter(__name__)
+        # HTTP Metrics following OTel semantic conventions
+        # https://opentelemetry.io/docs/specs/semconv/http/http-metrics/
+        request_duration = meter.create_histogram(
+            "http.server.request.duration",
+            unit="ms",
+            description="Duration of HTTP requests (time-to-first-byte for streaming)",
+        )

-        with self._lock:
-            if name not in self._counters:
-                self._counters[name] = meter.create_counter(name, unit=unit, description=description)
-            counter = self._counters[name]
+        streaming_duration = meter.create_histogram(
+            "http.server.streaming.duration",
+            unit="ms",
+            description="Total duration of streaming responses (from start to stream completion)",
+        )

-        counter.add(amount, attributes=attributes, context=context)
+        request_count = meter.create_counter(
+            "http.server.request.count", unit="requests", description="Total number of HTTP requests"
+        )

+        streaming_requests = meter.create_counter(
+            "http.server.streaming.count", unit="requests", description="Number of streaming requests"
+        )

-    def record_histogram(self, name: str, value: int|float, context: Context | None = None, attributes: dict[str, str] | None = None, unit: str = "", description: str = "", explicit_bucket_boundaries_advisory: Sequence[float] | None = None):
-        """
-        Records a histogram metric using the Open Telemetry SDK that are indexed by the meter name.
-        This function is designed to be compatible with other popular telemetry providers design patterns,
-        like Datadog and New Relic.
-        """
-        meter = metrics.get_meter(__name__)
+        # Hook to enrich spans and record initial metrics
+        def server_request_hook(span, scope):
+            """
+            Called by FastAPIInstrumentor for each request.

-        with self._lock:
-            if name not in self._histograms:
-                self._histograms[name] = meter.create_histogram(name, unit=unit, description=description, explicit_bucket_boundaries_advisory=explicit_bucket_boundaries_advisory)
-            histogram = self._histograms[name]
+            This only reads from scope (ASGI dict), never touches request body.
+            Safe to use without interfering with body parsing.
+            """
+            method = scope.get("method", "UNKNOWN")
+            path = scope.get("path", "/")

-        histogram.record(value, attributes=attributes, context=context)
+            # Add custom attributes
+            span.set_attribute("service.component", "llama-stack-api")
+            span.set_attribute("http.request", path)
+            span.set_attribute("http.method", method)

+            attributes = {
+                "http.request": path,
+                "http.method": method,
+                "trace_id": span.attributes.get("trace_id", ""),
+                "span_id": span.attributes.get("span_id", ""),
+            }

-    def record_up_down_counter(self, name: str, value: int|float, context: Context | None = None, attributes: dict[str, str] | None = None, unit: str = "", description: str = ""):
-        """
-        Records an up/down counter metric using the Open Telemetry SDK that are indexed by the meter name.
-        This function is designed to be compatible with other popular telemetry providers design patterns,
-        like Datadog and New Relic.
-        """
-        meter = metrics.get_meter(__name__)
+            request_count.add(1, attributes)
+            logger.debug(f"server_request_hook: recorded request_count for {method} {path}, attributes={attributes}")

-        with self._lock:
-            if name not in self._up_down_counters:
-                self._up_down_counters[name] = meter.create_up_down_counter(name, unit=unit, description=description)
-            up_down_counter = self._up_down_counters[name]
+        # NOTE: This is called BEFORE routes are added to the app
+        # FastAPIInstrumentor.instrument_app() patches build_middleware_stack(),
+        # which will be called on first request (after routes are added), so hooks should work.
+        logger.debug("Instrumenting FastAPI (routes will be added later)")
+        FastAPIInstrumentor.instrument_app(
+            app,
+            server_request_hook=server_request_hook,
+        )
+        logger.debug(f"FastAPI instrumented: {getattr(app, '_is_instrumented_by_opentelemetry', False)}")

-        up_down_counter.add(value, attributes=attributes, context=context)
+        # Add pure ASGI middleware for streaming metrics (always add, regardless of instrumentation)
+        app.add_middleware(StreamingMetricsMiddleware)
+
+        # Add metrics span processor
+        provider = trace.get_tracer_provider()
+        logger.debug(f"TracerProvider: {provider}")
+        if isinstance(provider, TracerProvider):
+            metrics_exporter = MetricsSpanExporter(
+                request_duration=request_duration,
+                streaming_duration=streaming_duration,
+                streaming_requests=streaming_requests,
+                request_count=request_count,
+            )
+            provider.add_span_processor(BatchSpanProcessor(metrics_exporter))
+            logger.debug("Added MetricsSpanExporter as BatchSpanProcessor")
+        else:
+            logger.warning(
+                f"TracerProvider is not TracerProvider instance, it's {type(provider)}. MetricsSpanExporter not added."
+            )
--- a/llama_stack/providers/registry/telemetry.py
+++ b/llama_stack/providers/registry/telemetry.py
@ -26,4 +26,16 @@ def available_providers() -> list[ProviderSpec]:
            config_class="llama_stack.providers.inline.telemetry.meta_reference.config.TelemetryConfig",
            description="Meta's reference implementation of telemetry and observability using OpenTelemetry.",
        ),
+        InlineProviderSpec(
+            api=Api.telemetry,
+            provider_type="inline::otel",
+            pip_packages=[
+                "opentelemetry-sdk",
+                "opentelemetry-exporter-otlp-proto-http",
+                "opentelemetry-instrumentation-fastapi",
+            ],
+            module="llama_stack.providers.inline.telemetry.otel",
+            config_class="llama_stack.providers.inline.telemetry.otel.config.OTelTelemetryConfig",
+            description="Native OpenTelemetry provider with full access to OTel Tracer and Meter APIs for advanced instrumentation.",
+        ),
    ]