mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-15 17:53:06 +00:00
feat(telemetry:major): End to End Testing, Metric Capture, SQL Alchemy Injection
This commit is contained in:
parent
e815738936
commit
7e3cf1fb20
26 changed files with 2075 additions and 1006 deletions
|
|
@ -1,15 +1,22 @@
|
|||
from aiohttp import hdrs
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Any
|
||||
|
||||
from aiohttp import hdrs
|
||||
|
||||
from llama_stack.apis.datatypes import Api
|
||||
from llama_stack.core.external import ExternalApiSpec
|
||||
from llama_stack.core.server.routes import find_matching_route, initialize_route_impls
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.utils.telemetry.tracing import end_trace, start_trace
|
||||
|
||||
|
||||
logger = get_logger(name=__name__, category="telemetry::meta_reference")
|
||||
|
||||
|
||||
class TracingMiddleware:
|
||||
def __init__(
|
||||
self,
|
||||
|
|
|
|||
|
|
@ -10,7 +10,6 @@ import threading
|
|||
from typing import Any, cast
|
||||
|
||||
from fastapi import FastAPI
|
||||
|
||||
from opentelemetry import metrics, trace
|
||||
from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
|
||||
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
|
||||
|
|
@ -23,11 +22,6 @@ from opentelemetry.semconv.attributes import service_attributes
|
|||
from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator
|
||||
from opentelemetry.util.types import Attributes
|
||||
|
||||
from llama_stack.core.external import ExternalApiSpec
|
||||
from llama_stack.core.server.tracing import TelemetryProvider
|
||||
from llama_stack.providers.inline.telemetry.meta_reference.middleware import TracingMiddleware
|
||||
|
||||
|
||||
from llama_stack.apis.telemetry import (
|
||||
Event,
|
||||
MetricEvent,
|
||||
|
|
@ -47,10 +41,13 @@ from llama_stack.apis.telemetry import (
|
|||
UnstructuredLogEvent,
|
||||
)
|
||||
from llama_stack.core.datatypes import Api
|
||||
from llama_stack.core.external import ExternalApiSpec
|
||||
from llama_stack.core.server.tracing import TelemetryProvider
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.inline.telemetry.meta_reference.console_span_processor import (
|
||||
ConsoleSpanProcessor,
|
||||
)
|
||||
from llama_stack.providers.inline.telemetry.meta_reference.middleware import TracingMiddleware
|
||||
from llama_stack.providers.inline.telemetry.meta_reference.sqlite_span_processor import (
|
||||
SQLiteSpanProcessor,
|
||||
)
|
||||
|
|
@ -381,7 +378,7 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry, TelemetryProvider):
|
|||
max_depth=max_depth,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def fastapi_middleware(
|
||||
self,
|
||||
app: FastAPI,
|
||||
|
|
|
|||
|
|
@ -0,0 +1,23 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .config import OTelTelemetryConfig
|
||||
|
||||
__all__ = ["OTelTelemetryConfig"]
|
||||
|
||||
|
||||
async def get_provider_impl(config: OTelTelemetryConfig, deps):
|
||||
"""
|
||||
Get the OTel telemetry provider implementation.
|
||||
|
||||
This function is called by the Llama Stack registry to instantiate
|
||||
the provider.
|
||||
"""
|
||||
from .otel import OTelTelemetryProvider
|
||||
|
||||
# The provider is synchronously initialized via Pydantic model_post_init
|
||||
# No async initialization needed
|
||||
return OTelTelemetryProvider(config=config)
|
||||
|
|
@ -1,8 +1,13 @@
|
|||
from typing import Literal
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Any, Literal
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
type BatchSpanProcessor = Literal["batch"]
|
||||
type SimpleSpanProcessor = Literal["simple"]
|
||||
|
||||
|
|
@ -11,22 +16,35 @@ class OTelTelemetryConfig(BaseModel):
|
|||
"""
|
||||
The configuration for the OpenTelemetry telemetry provider.
|
||||
Most configuration is set using environment variables.
|
||||
See https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/ for more information.
|
||||
See https://opentelemetry.io/docs/specs/otel/configuration/sdk-configuration-variables/ for more information.
|
||||
"""
|
||||
|
||||
service_name: str = Field(
|
||||
description="""The name of the service to be monitored.
|
||||
description="""The name of the service to be monitored.
|
||||
Is overridden by the OTEL_SERVICE_NAME or OTEL_RESOURCE_ATTRIBUTES environment variables.""",
|
||||
)
|
||||
service_version: str | None = Field(
|
||||
description="""The version of the service to be monitored.
|
||||
Is overriden by the OTEL_RESOURCE_ATTRIBUTES environment variable."""
|
||||
default=None,
|
||||
description="""The version of the service to be monitored.
|
||||
Is overriden by the OTEL_RESOURCE_ATTRIBUTES environment variable.""",
|
||||
)
|
||||
deployment_environment: str | None = Field(
|
||||
description="""The name of the environment of the service to be monitored.
|
||||
Is overriden by the OTEL_RESOURCE_ATTRIBUTES environment variable."""
|
||||
default=None,
|
||||
description="""The name of the environment of the service to be monitored.
|
||||
Is overriden by the OTEL_RESOURCE_ATTRIBUTES environment variable.""",
|
||||
)
|
||||
span_processor: BatchSpanProcessor | SimpleSpanProcessor | None = Field(
|
||||
description="""The span processor to use.
|
||||
description="""The span processor to use.
|
||||
Is overriden by the OTEL_SPAN_PROCESSOR environment variable.""",
|
||||
default="batch"
|
||||
default="batch",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, __distro_dir__: str = "") -> dict[str, Any]:
|
||||
"""Sample configuration for use in distributions."""
|
||||
return {
|
||||
"service_name": "${env.OTEL_SERVICE_NAME:=llama-stack}",
|
||||
"service_version": "${env.OTEL_SERVICE_VERSION:=}",
|
||||
"deployment_environment": "${env.OTEL_DEPLOYMENT_ENVIRONMENT:=}",
|
||||
"span_processor": "${env.OTEL_SPAN_PROCESSOR:=batch}",
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,141 +1,301 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
|
||||
from opentelemetry import trace, metrics
|
||||
from opentelemetry.context.context import Context
|
||||
from opentelemetry.sdk.resources import Attributes, Resource
|
||||
from opentelemetry.sdk.trace import TracerProvider
|
||||
from opentelemetry.sdk.trace.export import BatchSpanProcessor, SimpleSpanProcessor
|
||||
from fastapi import FastAPI
|
||||
from opentelemetry import metrics, trace
|
||||
from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
|
||||
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
|
||||
from opentelemetry.sdk.metrics import MeterProvider
|
||||
from opentelemetry.metrics import Counter, UpDownCounter, Histogram, ObservableGauge
|
||||
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
|
||||
from opentelemetry.trace import Span, SpanKind, _Links
|
||||
from typing import Sequence
|
||||
from pydantic import PrivateAttr
|
||||
from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor
|
||||
from opentelemetry.metrics import Counter, Histogram
|
||||
from opentelemetry.sdk.metrics import MeterProvider
|
||||
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
|
||||
from opentelemetry.sdk.resources import Resource
|
||||
from opentelemetry.sdk.trace import TracerProvider
|
||||
from opentelemetry.sdk.trace.export import (
|
||||
BatchSpanProcessor,
|
||||
SimpleSpanProcessor,
|
||||
SpanExporter,
|
||||
SpanExportResult,
|
||||
)
|
||||
from sqlalchemy import Engine
|
||||
from starlette.types import ASGIApp, Message, Receive, Scope, Send
|
||||
|
||||
from llama_stack.core.telemetry.tracing import TelemetryProvider
|
||||
from llama_stack.core.telemetry.telemetry import TelemetryProvider
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
from .config import OTelTelemetryConfig
|
||||
from fastapi import FastAPI
|
||||
|
||||
|
||||
logger = get_logger(name=__name__, category="telemetry::otel")
|
||||
|
||||
|
||||
class StreamingMetricsMiddleware:
|
||||
"""
|
||||
Pure ASGI middleware to track streaming response metrics.
|
||||
|
||||
This follows Starlette best practices by implementing pure ASGI,
|
||||
which is more efficient and less prone to bugs than BaseHTTPMiddleware.
|
||||
"""
|
||||
|
||||
def __init__(self, app: ASGIApp):
|
||||
self.app = app
|
||||
|
||||
async def __call__(self, scope: Scope, receive: Receive, send: Send):
|
||||
if scope["type"] != "http":
|
||||
await self.app(scope, receive, send)
|
||||
return
|
||||
|
||||
logger.debug(f"StreamingMetricsMiddleware called for {scope.get('method')} {scope.get('path')}")
|
||||
start_time = time.time()
|
||||
|
||||
# Track if this is a streaming response
|
||||
is_streaming = False
|
||||
|
||||
async def send_wrapper(message: Message):
|
||||
nonlocal is_streaming
|
||||
|
||||
# Detect streaming responses by headers
|
||||
if message["type"] == "http.response.start":
|
||||
headers = message.get("headers", [])
|
||||
for name, value in headers:
|
||||
if name == b"content-type" and b"text/event-stream" in value:
|
||||
is_streaming = True
|
||||
# Add streaming attribute to current span
|
||||
current_span = trace.get_current_span()
|
||||
if current_span and current_span.is_recording():
|
||||
current_span.set_attribute("http.response.is_streaming", True)
|
||||
break
|
||||
|
||||
# Record total duration when response body completes
|
||||
elif message["type"] == "http.response.body" and not message.get("more_body", False):
|
||||
if is_streaming:
|
||||
current_span = trace.get_current_span()
|
||||
if current_span and current_span.is_recording():
|
||||
total_duration_ms = (time.time() - start_time) * 1000
|
||||
current_span.set_attribute("http.streaming.total_duration_ms", total_duration_ms)
|
||||
|
||||
await send(message)
|
||||
|
||||
await self.app(scope, receive, send_wrapper)
|
||||
|
||||
|
||||
class MetricsSpanExporter(SpanExporter):
|
||||
"""Records HTTP metrics from span data."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
request_duration: Histogram,
|
||||
streaming_duration: Histogram,
|
||||
streaming_requests: Counter,
|
||||
request_count: Counter,
|
||||
):
|
||||
self.request_duration = request_duration
|
||||
self.streaming_duration = streaming_duration
|
||||
self.streaming_requests = streaming_requests
|
||||
self.request_count = request_count
|
||||
|
||||
def export(self, spans):
|
||||
logger.debug(f"MetricsSpanExporter.export called with {len(spans)} spans")
|
||||
for span in spans:
|
||||
if not span.attributes or not span.attributes.get("http.method"):
|
||||
continue
|
||||
logger.debug(f"Processing span: {span.name}")
|
||||
|
||||
if span.end_time is None or span.start_time is None:
|
||||
continue
|
||||
|
||||
# Calculate time-to-first-byte duration
|
||||
duration_ns = span.end_time - span.start_time
|
||||
duration_ms = duration_ns / 1_000_000
|
||||
|
||||
# Check if this was a streaming response
|
||||
is_streaming = span.attributes.get("http.response.is_streaming", False)
|
||||
|
||||
attributes = {
|
||||
"http.method": str(span.attributes.get("http.method", "UNKNOWN")),
|
||||
"http.route": str(span.attributes.get("http.route", span.attributes.get("http.target", "/"))),
|
||||
"http.status_code": str(span.attributes.get("http.status_code", 0)),
|
||||
}
|
||||
|
||||
# set distributed trace attributes
|
||||
if span.attributes.get("trace_id"):
|
||||
attributes["trace_id"] = str(span.attributes.get("trace_id"))
|
||||
if span.attributes.get("span_id"):
|
||||
attributes["span_id"] = str(span.attributes.get("span_id"))
|
||||
|
||||
# Record request count and duration
|
||||
logger.debug(f"Recording metrics: duration={duration_ms}ms, attributes={attributes}")
|
||||
self.request_count.add(1, attributes)
|
||||
self.request_duration.record(duration_ms, attributes)
|
||||
logger.debug("Metrics recorded successfully")
|
||||
|
||||
# For streaming, record separately
|
||||
if is_streaming:
|
||||
logger.debug(f"MetricsSpanExporter: Recording streaming metrics for {span.name}")
|
||||
self.streaming_requests.add(1, attributes)
|
||||
|
||||
# If full streaming duration is available
|
||||
stream_total_duration = span.attributes.get("http.streaming.total_duration_ms")
|
||||
if stream_total_duration and isinstance(stream_total_duration, int | float):
|
||||
logger.debug(f"MetricsSpanExporter: Recording streaming duration: {stream_total_duration}ms")
|
||||
self.streaming_duration.record(float(stream_total_duration), attributes)
|
||||
else:
|
||||
logger.warning(
|
||||
"MetricsSpanExporter: Streaming span has no http.streaming.total_duration_ms attribute"
|
||||
)
|
||||
|
||||
return SpanExportResult.SUCCESS
|
||||
|
||||
def shutdown(self):
|
||||
pass
|
||||
|
||||
|
||||
# NOTE: DO NOT ALLOW LLM TO MODIFY THIS WITHOUT TESTING AND SUPERVISION: it frequently breaks otel integrations
|
||||
class OTelTelemetryProvider(TelemetryProvider):
|
||||
"""
|
||||
A simple Open Telemetry native telemetry provider.
|
||||
"""
|
||||
config: OTelTelemetryConfig
|
||||
_counters: dict[str, Counter] = PrivateAttr(default_factory=dict)
|
||||
_up_down_counters: dict[str, UpDownCounter] = PrivateAttr(default_factory=dict)
|
||||
_histograms: dict[str, Histogram] = PrivateAttr(default_factory=dict)
|
||||
_gauges: dict[str, ObservableGauge] = PrivateAttr(default_factory=dict)
|
||||
|
||||
config: OTelTelemetryConfig
|
||||
|
||||
def model_post_init(self, __context):
|
||||
"""Initialize provider after Pydantic validation."""
|
||||
self._lock = threading.Lock()
|
||||
|
||||
attributes: Attributes = {
|
||||
key: value
|
||||
for key, value in {
|
||||
"service.name": self.config.service_name,
|
||||
"service.version": self.config.service_version,
|
||||
"deployment.environment": self.config.deployment_environment,
|
||||
}.items()
|
||||
if value is not None
|
||||
}
|
||||
|
||||
resource = Resource.create(attributes)
|
||||
|
||||
# Configure the tracer provider
|
||||
tracer_provider = TracerProvider(resource=resource)
|
||||
trace.set_tracer_provider(tracer_provider)
|
||||
|
||||
otlp_span_exporter = OTLPSpanExporter()
|
||||
|
||||
# Configure the span processor
|
||||
# Enable batching of spans to reduce the number of requests to the collector
|
||||
if self.config.span_processor == "batch":
|
||||
tracer_provider.add_span_processor(BatchSpanProcessor(otlp_span_exporter))
|
||||
elif self.config.span_processor == "simple":
|
||||
tracer_provider.add_span_processor(SimpleSpanProcessor(otlp_span_exporter))
|
||||
|
||||
meter_provider = MeterProvider(resource=resource)
|
||||
metrics.set_meter_provider(meter_provider)
|
||||
|
||||
# Do not fail the application, but warn the user if the endpoints are not set properly.
|
||||
if not os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT"):
|
||||
if not os.environ.get("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT"):
|
||||
logger.warning("OTEL_EXPORTER_OTLP_ENDPOINT or OTEL_EXPORTER_OTLP_TRACES_ENDPOINT is not set. Traces will not be exported.")
|
||||
logger.warning(
|
||||
"OTEL_EXPORTER_OTLP_ENDPOINT or OTEL_EXPORTER_OTLP_TRACES_ENDPOINT is not set. Traces will not be exported."
|
||||
)
|
||||
if not os.environ.get("OTEL_EXPORTER_OTLP_METRICS_ENDPOINT"):
|
||||
logger.warning("OTEL_EXPORTER_OTLP_ENDPOINT or OTEL_EXPORTER_OTLP_METRICS_ENDPOINT is not set. Metrics will not be exported.")
|
||||
logger.warning(
|
||||
"OTEL_EXPORTER_OTLP_ENDPOINT or OTEL_EXPORTER_OTLP_METRICS_ENDPOINT is not set. Metrics will not be exported."
|
||||
)
|
||||
|
||||
# Respect OTEL design standards where environment variables get highest precedence
|
||||
service_name = os.environ.get("OTEL_SERVICE_NAME")
|
||||
if not service_name:
|
||||
service_name = self.config.service_name
|
||||
|
||||
# Create resource with service name
|
||||
resource = Resource.create({"service.name": service_name})
|
||||
|
||||
# Configure the tracer provider (always, since llama stack run spawns subprocess without opentelemetry-instrument)
|
||||
tracer_provider = TracerProvider(resource=resource)
|
||||
trace.set_tracer_provider(tracer_provider)
|
||||
|
||||
# Configure OTLP span exporter
|
||||
otlp_span_exporter = OTLPSpanExporter()
|
||||
|
||||
# Add span processor (simple for immediate export, batch for performance)
|
||||
span_processor_type = os.environ.get("OTEL_SPAN_PROCESSOR", "batch")
|
||||
if span_processor_type == "batch":
|
||||
tracer_provider.add_span_processor(BatchSpanProcessor(otlp_span_exporter))
|
||||
else:
|
||||
tracer_provider.add_span_processor(SimpleSpanProcessor(otlp_span_exporter))
|
||||
|
||||
# Configure meter provider with OTLP exporter for metrics
|
||||
metric_reader = PeriodicExportingMetricReader(OTLPMetricExporter())
|
||||
meter_provider = MeterProvider(resource=resource, metric_readers=[metric_reader])
|
||||
metrics.set_meter_provider(meter_provider)
|
||||
|
||||
logger.info(
|
||||
f"Initialized OpenTelemetry provider with service.name={service_name}, span_processor={span_processor_type}"
|
||||
)
|
||||
|
||||
def fastapi_middleware(self, app: FastAPI):
|
||||
FastAPIInstrumentor.instrument_app(app)
|
||||
|
||||
def custom_trace(self,
|
||||
name: str,
|
||||
context: Context | None = None,
|
||||
kind: SpanKind = SpanKind.INTERNAL,
|
||||
attributes: Attributes = {},
|
||||
links: _Links = None,
|
||||
start_time: int | None = None,
|
||||
record_exception: bool = True,
|
||||
set_status_on_exception: bool = True) -> Span:
|
||||
"""
|
||||
Creates a custom tracing span using the Open Telemetry SDK.
|
||||
Instrument FastAPI with OTel for automatic tracing and metrics.
|
||||
|
||||
Captures telemetry for both regular and streaming HTTP requests:
|
||||
- Distributed traces (via FastAPIInstrumentor)
|
||||
- HTTP request metrics (count, duration, status)
|
||||
- Streaming-specific metrics (time-to-first-byte, total stream duration)
|
||||
"""
|
||||
tracer = trace.get_tracer(__name__)
|
||||
return tracer.start_span(name, context, kind, attributes, links, start_time, record_exception, set_status_on_exception)
|
||||
|
||||
# Create meter for HTTP metrics
|
||||
meter = metrics.get_meter("llama_stack.http.server")
|
||||
|
||||
def record_count(self, name: str, amount: int|float, context: Context | None = None, attributes: dict[str, str] | None = None, unit: str = "", description: str = ""):
|
||||
"""
|
||||
Increments a counter metric using the Open Telemetry SDK that are indexed by the meter name.
|
||||
This function is designed to be compatible with other popular telemetry providers design patterns,
|
||||
like Datadog and New Relic.
|
||||
"""
|
||||
meter = metrics.get_meter(__name__)
|
||||
# HTTP Metrics following OTel semantic conventions
|
||||
# https://opentelemetry.io/docs/specs/semconv/http/http-metrics/
|
||||
request_duration = meter.create_histogram(
|
||||
"http.server.request.duration",
|
||||
unit="ms",
|
||||
description="Duration of HTTP requests (time-to-first-byte for streaming)",
|
||||
)
|
||||
|
||||
with self._lock:
|
||||
if name not in self._counters:
|
||||
self._counters[name] = meter.create_counter(name, unit=unit, description=description)
|
||||
counter = self._counters[name]
|
||||
streaming_duration = meter.create_histogram(
|
||||
"http.server.streaming.duration",
|
||||
unit="ms",
|
||||
description="Total duration of streaming responses (from start to stream completion)",
|
||||
)
|
||||
|
||||
counter.add(amount, attributes=attributes, context=context)
|
||||
request_count = meter.create_counter(
|
||||
"http.server.request.count", unit="requests", description="Total number of HTTP requests"
|
||||
)
|
||||
|
||||
streaming_requests = meter.create_counter(
|
||||
"http.server.streaming.count", unit="requests", description="Number of streaming requests"
|
||||
)
|
||||
|
||||
def record_histogram(self, name: str, value: int|float, context: Context | None = None, attributes: dict[str, str] | None = None, unit: str = "", description: str = "", explicit_bucket_boundaries_advisory: Sequence[float] | None = None):
|
||||
"""
|
||||
Records a histogram metric using the Open Telemetry SDK that are indexed by the meter name.
|
||||
This function is designed to be compatible with other popular telemetry providers design patterns,
|
||||
like Datadog and New Relic.
|
||||
"""
|
||||
meter = metrics.get_meter(__name__)
|
||||
# Hook to enrich spans and record initial metrics
|
||||
def server_request_hook(span, scope):
|
||||
"""
|
||||
Called by FastAPIInstrumentor for each request.
|
||||
|
||||
with self._lock:
|
||||
if name not in self._histograms:
|
||||
self._histograms[name] = meter.create_histogram(name, unit=unit, description=description, explicit_bucket_boundaries_advisory=explicit_bucket_boundaries_advisory)
|
||||
histogram = self._histograms[name]
|
||||
This only reads from scope (ASGI dict), never touches request body.
|
||||
Safe to use without interfering with body parsing.
|
||||
"""
|
||||
method = scope.get("method", "UNKNOWN")
|
||||
path = scope.get("path", "/")
|
||||
|
||||
histogram.record(value, attributes=attributes, context=context)
|
||||
# Add custom attributes
|
||||
span.set_attribute("service.component", "llama-stack-api")
|
||||
span.set_attribute("http.request", path)
|
||||
span.set_attribute("http.method", method)
|
||||
|
||||
attributes = {
|
||||
"http.request": path,
|
||||
"http.method": method,
|
||||
"trace_id": span.attributes.get("trace_id", ""),
|
||||
"span_id": span.attributes.get("span_id", ""),
|
||||
}
|
||||
|
||||
def record_up_down_counter(self, name: str, value: int|float, context: Context | None = None, attributes: dict[str, str] | None = None, unit: str = "", description: str = ""):
|
||||
"""
|
||||
Records an up/down counter metric using the Open Telemetry SDK that are indexed by the meter name.
|
||||
This function is designed to be compatible with other popular telemetry providers design patterns,
|
||||
like Datadog and New Relic.
|
||||
"""
|
||||
meter = metrics.get_meter(__name__)
|
||||
request_count.add(1, attributes)
|
||||
logger.debug(f"server_request_hook: recorded request_count for {method} {path}, attributes={attributes}")
|
||||
|
||||
with self._lock:
|
||||
if name not in self._up_down_counters:
|
||||
self._up_down_counters[name] = meter.create_up_down_counter(name, unit=unit, description=description)
|
||||
up_down_counter = self._up_down_counters[name]
|
||||
# NOTE: This is called BEFORE routes are added to the app
|
||||
# FastAPIInstrumentor.instrument_app() patches build_middleware_stack(),
|
||||
# which will be called on first request (after routes are added), so hooks should work.
|
||||
logger.debug("Instrumenting FastAPI (routes will be added later)")
|
||||
FastAPIInstrumentor.instrument_app(
|
||||
app,
|
||||
server_request_hook=server_request_hook,
|
||||
)
|
||||
logger.debug(f"FastAPI instrumented: {getattr(app, '_is_instrumented_by_opentelemetry', False)}")
|
||||
|
||||
up_down_counter.add(value, attributes=attributes, context=context)
|
||||
# Add pure ASGI middleware for streaming metrics (always add, regardless of instrumentation)
|
||||
app.add_middleware(StreamingMetricsMiddleware)
|
||||
|
||||
# Add metrics span processor
|
||||
provider = trace.get_tracer_provider()
|
||||
logger.debug(f"TracerProvider: {provider}")
|
||||
if isinstance(provider, TracerProvider):
|
||||
metrics_exporter = MetricsSpanExporter(
|
||||
request_duration=request_duration,
|
||||
streaming_duration=streaming_duration,
|
||||
streaming_requests=streaming_requests,
|
||||
request_count=request_count,
|
||||
)
|
||||
provider.add_span_processor(BatchSpanProcessor(metrics_exporter))
|
||||
logger.debug("Added MetricsSpanExporter as BatchSpanProcessor")
|
||||
else:
|
||||
logger.warning(
|
||||
f"TracerProvider is not TracerProvider instance, it's {type(provider)}. MetricsSpanExporter not added."
|
||||
)
|
||||
|
|
|
|||
|
|
@ -26,4 +26,16 @@ def available_providers() -> list[ProviderSpec]:
|
|||
config_class="llama_stack.providers.inline.telemetry.meta_reference.config.TelemetryConfig",
|
||||
description="Meta's reference implementation of telemetry and observability using OpenTelemetry.",
|
||||
),
|
||||
InlineProviderSpec(
|
||||
api=Api.telemetry,
|
||||
provider_type="inline::otel",
|
||||
pip_packages=[
|
||||
"opentelemetry-sdk",
|
||||
"opentelemetry-exporter-otlp-proto-http",
|
||||
"opentelemetry-instrumentation-fastapi",
|
||||
],
|
||||
module="llama_stack.providers.inline.telemetry.otel",
|
||||
config_class="llama_stack.providers.inline.telemetry.otel.config.OTelTelemetryConfig",
|
||||
description="Native OpenTelemetry provider with full access to OTel Tracer and Meter APIs for advanced instrumentation.",
|
||||
),
|
||||
]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue