diff --git a/docs/docs/providers/telemetry/inline_meta-reference.mdx b/docs/docs/providers/telemetry/inline_meta-reference.mdx index ea2a690b3..bed9c98df 100644 --- a/docs/docs/providers/telemetry/inline_meta-reference.mdx +++ b/docs/docs/providers/telemetry/inline_meta-reference.mdx @@ -14,7 +14,7 @@ Meta's reference implementation of telemetry and observability using OpenTelemet | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `otel_exporter_otlp_endpoint` | `str \| None` | No | | The OpenTelemetry collector endpoint URL (base URL for traces, metrics, and logs). If not set, the SDK will use OTEL_EXPORTER_OTLP_ENDPOINT environment variable. | +| `otel_exporter_otlp_endpoint` | `str \| None` | No | | Deprecated.Please set the exporter using open telemetry environment variables instead. See https://opentelemetry.io/docs/languages/sdk-configuration/otlp-exporter/. | | `service_name` | `` | No | ​ | The service name to use for telemetry | | `sinks` | `list[inline.telemetry.meta_reference.config.TelemetrySink` | No | [<TelemetrySink.SQLITE: 'sqlite'>] | List of telemetry sinks to enable (possible values: otel_trace, otel_metric, sqlite, console) | | `sqlite_db_path` | `` | No | ~/.llama/runtime/trace_store.db | The path to the SQLite database to use for storing traces | diff --git a/docs/docs/providers/telemetry/inline_otel.mdx b/docs/docs/providers/telemetry/inline_otel.mdx new file mode 100644 index 000000000..0c0491e8a --- /dev/null +++ b/docs/docs/providers/telemetry/inline_otel.mdx @@ -0,0 +1,33 @@ +--- +description: "Native OpenTelemetry provider with full access to OTel Tracer and Meter APIs for advanced instrumentation." +sidebar_label: Otel +title: inline::otel +--- + +# inline::otel + +## Description + +Native OpenTelemetry provider with full access to OTel Tracer and Meter APIs for advanced instrumentation. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `service_name` | `` | No | | The name of the service to be monitored. + Is overridden by the OTEL_SERVICE_NAME or OTEL_RESOURCE_ATTRIBUTES environment variables. | +| `service_version` | `str \| None` | No | | The version of the service to be monitored. + Is overriden by the OTEL_RESOURCE_ATTRIBUTES environment variable. | +| `deployment_environment` | `str \| None` | No | | The name of the environment of the service to be monitored. + Is overriden by the OTEL_RESOURCE_ATTRIBUTES environment variable. | +| `span_processor` | `BatchSpanProcessor \| SimpleSpanProcessor \| None` | No | batch | The span processor to use. + Is overriden by the OTEL_SPAN_PROCESSOR environment variable. | + +## Sample Configuration + +```yaml +service_name: ${env.OTEL_SERVICE_NAME:=llama-stack} +service_version: ${env.OTEL_SERVICE_VERSION:=} +deployment_environment: ${env.OTEL_DEPLOYMENT_ENVIRONMENT:=} +span_processor: ${env.OTEL_SPAN_PROCESSOR:=batch} +``` diff --git a/llama_stack/core/library_client.py b/llama_stack/core/library_client.py index e722e4de6..a422bc9d0 100644 --- a/llama_stack/core/library_client.py +++ b/llama_stack/core/library_client.py @@ -32,7 +32,7 @@ from termcolor import cprint from llama_stack.core.build import print_pip_install_help from llama_stack.core.configure import parse_and_maybe_upgrade_config -from llama_stack.core.datatypes import Api, BuildConfig, BuildProvider, DistributionSpec +from llama_stack.core.datatypes import BuildConfig, BuildProvider, DistributionSpec from llama_stack.core.request_headers import ( PROVIDER_DATA_VAR, request_provider_data_context, @@ -48,12 +48,6 @@ from llama_stack.core.utils.config import redact_sensitive_fields from llama_stack.core.utils.context import preserve_contexts_async_generator from llama_stack.core.utils.exec import in_notebook from llama_stack.log import get_logger -from llama_stack.providers.utils.telemetry.tracing import ( - CURRENT_TRACE_CONTEXT, - end_trace, - setup_logger, - start_trace, -) logger = get_logger(name=__name__, category="core") @@ -293,8 +287,6 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient): raise _e assert self.impls is not None - if Api.telemetry in self.impls: - setup_logger(self.impls[Api.telemetry]) if not os.environ.get("PYTEST_CURRENT_TEST"): console = Console() @@ -380,13 +372,7 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient): body, field_names = self._handle_file_uploads(options, body) body = self._convert_body(path, options.method, body, exclude_params=set(field_names)) - - trace_path = webmethod.descriptive_name or route_path - await start_trace(trace_path, {"__location__": "library_client"}) - try: - result = await matched_func(**body) - finally: - await end_trace() + result = await matched_func(**body) # Handle FastAPI Response objects (e.g., from file content retrieval) if isinstance(result, FastAPIResponse): @@ -444,9 +430,6 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient): body = self._convert_body(path, options.method, body) - trace_path = webmethod.descriptive_name or route_path - await start_trace(trace_path, {"__location__": "library_client"}) - async def gen(): try: async for chunk in await func(**body): @@ -454,9 +437,9 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient): sse_event = f"data: {data}\n\n" yield sse_event.encode("utf-8") finally: - await end_trace() + pass - wrapped_gen = preserve_contexts_async_generator(gen(), [CURRENT_TRACE_CONTEXT, PROVIDER_DATA_VAR]) + wrapped_gen = preserve_contexts_async_generator(gen(), [PROVIDER_DATA_VAR]) mock_response = httpx.Response( status_code=httpx.codes.OK, diff --git a/llama_stack/core/server/server.py b/llama_stack/core/server/server.py index 32be57880..a1702ff13 100644 --- a/llama_stack/core/server/server.py +++ b/llama_stack/core/server/server.py @@ -62,18 +62,9 @@ from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_dis from llama_stack.core.utils.context import preserve_contexts_async_generator from llama_stack.log import get_logger from llama_stack.providers.datatypes import Api -from llama_stack.providers.inline.telemetry.meta_reference.config import TelemetryConfig -from llama_stack.providers.inline.telemetry.meta_reference.telemetry import ( - TelemetryAdapter, -) -from llama_stack.providers.utils.telemetry.tracing import ( - CURRENT_TRACE_CONTEXT, - setup_logger, -) from .auth import AuthenticationMiddleware from .quota import QuotaMiddleware -from .tracing import TracingMiddleware REPO_ROOT = Path(__file__).parent.parent.parent.parent @@ -82,6 +73,8 @@ logger = get_logger(name=__name__, category="core::server") def warn_with_traceback(message, category, filename, lineno, file=None, line=None): log = file if hasattr(file, "write") else sys.stderr + if log is None: + return traceback.print_stack(file=log) log.write(warnings.formatwarning(message, category, filename, lineno, line)) @@ -242,9 +235,7 @@ def create_dynamic_typed_route(func: Any, method: str, route: str) -> Callable: try: if is_streaming: - gen = preserve_contexts_async_generator( - sse_generator(func(**kwargs)), [CURRENT_TRACE_CONTEXT, PROVIDER_DATA_VAR] - ) + gen = preserve_contexts_async_generator(sse_generator(func(**kwargs)), [PROVIDER_DATA_VAR]) return StreamingResponse(gen, media_type="text/event-stream") else: value = func(**kwargs) @@ -289,7 +280,6 @@ def create_dynamic_typed_route(func: Any, method: str, route: str) -> Callable: ) route_handler.__signature__ = sig.replace(parameters=new_params) - return route_handler @@ -351,11 +341,12 @@ def create_app( if config_file is None: raise ValueError("No config file provided and LLAMA_STACK_CONFIG env var is not set") - config_file = resolve_config_or_distro(config_file, Mode.RUN) + config_path = resolve_config_or_distro(config_file, Mode.RUN) # Load and process configuration logger_config = None - with open(config_file) as fp: + + with open(config_path) as fp: config_contents = yaml.safe_load(fp) if isinstance(config_contents, dict) and (cfg := config_contents.get("logging_config")): logger_config = LoggingConfig(**cfg) @@ -387,7 +378,7 @@ def create_app( if not os.environ.get("LLAMA_STACK_DISABLE_VERSION_CHECK"): app.add_middleware(ClientVersionMiddleware) - impls = app.stack.impls + impls = app.stack.get_impls() if config.server.auth: logger.info(f"Enabling authentication with provider: {config.server.auth.provider_config.type.value}") @@ -429,9 +420,8 @@ def create_app( app.add_middleware(CORSMiddleware, **cors_config.model_dump()) if Api.telemetry in impls: - setup_logger(impls[Api.telemetry]) - else: - setup_logger(TelemetryAdapter(TelemetryConfig(), {})) + impls[Api.telemetry].fastapi_middleware(app) + impls[Api.telemetry].sqlalchemy_instrumentation() # Load external APIs if configured external_apis = load_external_apis(config) @@ -440,7 +430,7 @@ def create_app( if config.apis: apis_to_serve = set(config.apis) else: - apis_to_serve = set(impls.keys()) + apis_to_serve = {api.value for api in impls.keys()} for inf in builtin_automatically_routed_apis(): # if we do not serve the corresponding router API, we should not serve the routing table API @@ -468,7 +458,8 @@ def create_app( impl_method = getattr(impl, route.name) # Filter out HEAD method since it's automatically handled by FastAPI for GET routes - available_methods = [m for m in route.methods if m != "HEAD"] + route_methods = route.methods or [] + available_methods = [m for m in route_methods if m != "HEAD"] if not available_methods: raise ValueError(f"No methods found for {route.name} on {impl}") method = available_methods[0] @@ -489,8 +480,6 @@ def create_app( app.exception_handler(RequestValidationError)(global_exception_handler) app.exception_handler(Exception)(global_exception_handler) - app.add_middleware(TracingMiddleware, impls=impls, external_apis=external_apis) - return app @@ -528,8 +517,8 @@ def main(args: argparse.Namespace | None = None): logger.error(f"Error creating app: {str(e)}") sys.exit(1) - config_file = resolve_config_or_distro(config_or_distro, Mode.RUN) - with open(config_file) as fp: + config_path = resolve_config_or_distro(config_or_distro, Mode.RUN) + with open(config_path) as fp: config_contents = yaml.safe_load(fp) if isinstance(config_contents, dict) and (cfg := config_contents.get("logging_config")): logger_config = LoggingConfig(**cfg) diff --git a/llama_stack/core/stack.py b/llama_stack/core/stack.py index d5d55319a..0413e47c5 100644 --- a/llama_stack/core/stack.py +++ b/llama_stack/core/stack.py @@ -359,6 +359,12 @@ class Stack: await refresh_registry_once(impls) self.impls = impls + # safely access impls without raising an exception + def get_impls(self) -> dict[Api, Any]: + if self.impls is None: + return {} + return self.impls + def create_registry_refresh_task(self): assert self.impls is not None, "Must call initialize() before starting" diff --git a/llama_stack/core/telemetry/__init__.py b/llama_stack/core/telemetry/__init__.py new file mode 100644 index 000000000..b5e7174df --- /dev/null +++ b/llama_stack/core/telemetry/__init__.py @@ -0,0 +1,10 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. diff --git a/llama_stack/core/telemetry/telemetry.py b/llama_stack/core/telemetry/telemetry.py new file mode 100644 index 000000000..3e833786f --- /dev/null +++ b/llama_stack/core/telemetry/telemetry.py @@ -0,0 +1,30 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. +from abc import abstractmethod + +from fastapi import FastAPI +from pydantic import BaseModel +from sqlalchemy import Engine + + +class TelemetryProvider(BaseModel): + """ + TelemetryProvider standardizes how telemetry is provided to the application. + """ + + @abstractmethod + def fastapi_middleware(self, app: FastAPI, *args, **kwargs): + """ + Injects FastAPI middleware that instruments the application for telemetry. + """ + ... + + @abstractmethod + def sqlalchemy_instrumentation(self, engine: Engine | None = None): + """ + Injects SQLAlchemy instrumentation that instruments the application for telemetry. + """ + ... diff --git a/llama_stack/providers/inline/telemetry/meta_reference/config.py b/llama_stack/providers/inline/telemetry/meta_reference/config.py index 06420c671..bec46324b 100644 --- a/llama_stack/providers/inline/telemetry/meta_reference/config.py +++ b/llama_stack/providers/inline/telemetry/meta_reference/config.py @@ -22,7 +22,8 @@ class TelemetrySink(StrEnum): class TelemetryConfig(BaseModel): otel_exporter_otlp_endpoint: str | None = Field( default=None, - description="The OpenTelemetry collector endpoint URL (base URL for traces, metrics, and logs). If not set, the SDK will use OTEL_EXPORTER_OTLP_ENDPOINT environment variable.", + deprecated=True, + description="Deprecated.Please set the exporter using open telemetry environment variables instead. See https://opentelemetry.io/docs/languages/sdk-configuration/otlp-exporter/.", ) service_name: str = Field( # service name is always the same, use zero-width space to avoid clutter diff --git a/llama_stack/core/server/tracing.py b/llama_stack/providers/inline/telemetry/meta_reference/middleware.py similarity index 89% rename from llama_stack/core/server/tracing.py rename to llama_stack/providers/inline/telemetry/meta_reference/middleware.py index 4c6df5b42..219c344ef 100644 --- a/llama_stack/core/server/tracing.py +++ b/llama_stack/providers/inline/telemetry/meta_reference/middleware.py @@ -3,18 +3,27 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. + +from typing import Any + from aiohttp import hdrs +from llama_stack.apis.datatypes import Api from llama_stack.core.external import ExternalApiSpec from llama_stack.core.server.routes import find_matching_route, initialize_route_impls from llama_stack.log import get_logger from llama_stack.providers.utils.telemetry.tracing import end_trace, start_trace -logger = get_logger(name=__name__, category="core::server") +logger = get_logger(name=__name__, category="telemetry::meta_reference") class TracingMiddleware: - def __init__(self, app, impls, external_apis: dict[str, ExternalApiSpec]): + def __init__( + self, + app, + impls: dict[Api, Any], + external_apis: dict[str, ExternalApiSpec], + ): self.app = app self.impls = impls self.external_apis = external_apis @@ -34,7 +43,8 @@ class TracingMiddleware: return await self.app(scope, receive, send) if not hasattr(self, "route_impls"): - self.route_impls = initialize_route_impls(self.impls, self.external_apis) + external_api_map = {Api(api_name): spec for api_name, spec in self.external_apis.items()} + self.route_impls = initialize_route_impls(self.impls, external_api_map) try: _, _, route_path, webmethod = find_matching_route( diff --git a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py index 4d30cbba3..596b93551 100644 --- a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +++ b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py @@ -5,9 +5,11 @@ # the root directory of this source tree. import datetime +import os import threading -from typing import Any +from typing import Any, cast +from fastapi import FastAPI from opentelemetry import metrics, trace from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter @@ -16,8 +18,9 @@ from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader from opentelemetry.sdk.resources import Resource from opentelemetry.sdk.trace import TracerProvider from opentelemetry.sdk.trace.export import BatchSpanProcessor -from opentelemetry.semconv.resource import ResourceAttributes +from opentelemetry.semconv.attributes import service_attributes from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator +from opentelemetry.util.types import Attributes from llama_stack.apis.telemetry import ( Event, @@ -38,10 +41,13 @@ from llama_stack.apis.telemetry import ( UnstructuredLogEvent, ) from llama_stack.core.datatypes import Api +from llama_stack.core.external import ExternalApiSpec +from llama_stack.core.server.tracing import TelemetryProvider from llama_stack.log import get_logger from llama_stack.providers.inline.telemetry.meta_reference.console_span_processor import ( ConsoleSpanProcessor, ) +from llama_stack.providers.inline.telemetry.meta_reference.middleware import TracingMiddleware from llama_stack.providers.inline.telemetry.meta_reference.sqlite_span_processor import ( SQLiteSpanProcessor, ) @@ -68,15 +74,15 @@ def is_tracing_enabled(tracer): return span.is_recording() -class TelemetryAdapter(TelemetryDatasetMixin, Telemetry): +class TelemetryAdapter(TelemetryDatasetMixin, Telemetry, TelemetryProvider): def __init__(self, config: TelemetryConfig, deps: dict[Api, Any]) -> None: self.config = config self.datasetio_api = deps.get(Api.datasetio) self.meter = None resource = Resource.create( - { - ResourceAttributes.SERVICE_NAME: self.config.service_name, + attributes={ + service_attributes.SERVICE_NAME: self.config.service_name, } ) @@ -93,28 +99,36 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry): # Use single OTLP endpoint for all telemetry signals if TelemetrySink.OTEL_TRACE in self.config.sinks or TelemetrySink.OTEL_METRIC in self.config.sinks: - if self.config.otel_exporter_otlp_endpoint is None: - raise ValueError( - "otel_exporter_otlp_endpoint is required when OTEL_TRACE or OTEL_METRIC is enabled" - ) - # Let OpenTelemetry SDK handle endpoint construction automatically # The SDK will read OTEL_EXPORTER_OTLP_ENDPOINT and construct appropriate URLs # https://opentelemetry.io/docs/languages/sdk-configuration/otlp-exporter if TelemetrySink.OTEL_TRACE in self.config.sinks: + if not os.environ.get("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT") and not os.environ.get( + "OTEL_EXPORTER_OTLP_ENDPOINT" + ): + logger.warning( + "OTEL_EXPORTER_OTLP_TRACES_ENDPOINT or OTEL_EXPORTER_OTLP_ENDPOINT is not set. Traces will not be exported." + ) span_exporter = OTLPSpanExporter() span_processor = BatchSpanProcessor(span_exporter) - trace.get_tracer_provider().add_span_processor(span_processor) + provider.add_span_processor(span_processor) if TelemetrySink.OTEL_METRIC in self.config.sinks: - metric_reader = PeriodicExportingMetricReader(OTLPMetricExporter()) + if not os.environ.get("OTEL_EXPORTER_OTLP_METRICS_ENDPOINT") and not os.environ.get( + "OTEL_EXPORTER_OTLP_ENDPOINT" + ): + logger.warning( + "OTEL_EXPORTER_OTLP_METRICS_ENDPOINT or OTEL_EXPORTER_OTLP_ENDPOINT is not set. Metrics will not be exported." + ) + metric_exporter = OTLPMetricExporter() + metric_reader = PeriodicExportingMetricReader(metric_exporter) metric_provider = MeterProvider(resource=resource, metric_readers=[metric_reader]) metrics.set_meter_provider(metric_provider) if TelemetrySink.SQLITE in self.config.sinks: - trace.get_tracer_provider().add_span_processor(SQLiteSpanProcessor(self.config.sqlite_db_path)) + provider.add_span_processor(SQLiteSpanProcessor(self.config.sqlite_db_path)) if TelemetrySink.CONSOLE in self.config.sinks: - trace.get_tracer_provider().add_span_processor(ConsoleSpanProcessor(print_attributes=True)) + provider.add_span_processor(ConsoleSpanProcessor(print_attributes=True)) if TelemetrySink.OTEL_METRIC in self.config.sinks: self.meter = metrics.get_meter(__name__) @@ -127,7 +141,8 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry): pass async def shutdown(self) -> None: - trace.get_tracer_provider().force_flush() + if isinstance(_TRACER_PROVIDER, TracerProvider): + _TRACER_PROVIDER.force_flush() async def log_event(self, event: Event, ttl_seconds: int = 604800) -> None: if isinstance(event, UnstructuredLogEvent): @@ -252,12 +267,13 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry): # Log to OpenTelemetry meter if available if self.meter is None: return + normalized_attributes = self._normalize_attributes(event.attributes) if isinstance(event.value, int): counter = self._get_or_create_counter(event.metric, event.unit) - counter.add(event.value, attributes=event.attributes) + counter.add(event.value, attributes=normalized_attributes) elif isinstance(event.value, float): up_down_counter = self._get_or_create_up_down_counter(event.metric, event.unit) - up_down_counter.add(event.value, attributes=event.attributes) + up_down_counter.add(event.value, attributes=normalized_attributes) def _get_or_create_up_down_counter(self, name: str, unit: str) -> metrics.UpDownCounter: assert self.meter is not None @@ -273,18 +289,17 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry): with self._lock: span_id = int(event.span_id, 16) tracer = trace.get_tracer(__name__) - if event.attributes is None: - event.attributes = {} - event.attributes["__ttl__"] = ttl_seconds + event_attributes = dict(event.attributes or {}) + event_attributes["__ttl__"] = ttl_seconds # Extract these W3C trace context attributes so they are not written to # underlying storage, as we just need them to propagate the trace context. - traceparent = event.attributes.pop("traceparent", None) - tracestate = event.attributes.pop("tracestate", None) + traceparent = event_attributes.pop("traceparent", None) + tracestate = event_attributes.pop("tracestate", None) if traceparent: # If we have a traceparent header value, we're not the root span. for root_attribute in ROOT_SPAN_MARKERS: - event.attributes.pop(root_attribute, None) + event_attributes.pop(root_attribute, None) if isinstance(event.payload, SpanStartPayload): # Check if span already exists to prevent duplicates @@ -295,7 +310,8 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry): if event.payload.parent_span_id: parent_span_id = int(event.payload.parent_span_id, 16) parent_span = _GLOBAL_STORAGE["active_spans"].get(parent_span_id) - context = trace.set_span_in_context(parent_span) + if parent_span: + context = trace.set_span_in_context(parent_span) elif traceparent: carrier = { "traceparent": traceparent, @@ -306,15 +322,15 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry): span = tracer.start_span( name=event.payload.name, context=context, - attributes=event.attributes or {}, + attributes=self._normalize_attributes(event_attributes), ) _GLOBAL_STORAGE["active_spans"][span_id] = span elif isinstance(event.payload, SpanEndPayload): span = _GLOBAL_STORAGE["active_spans"].get(span_id) if span: - if event.attributes: - span.set_attributes(event.attributes) + if event_attributes: + span.set_attributes(self._normalize_attributes(event_attributes)) status = ( trace.Status(status_code=trace.StatusCode.OK) @@ -362,3 +378,15 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry): max_depth=max_depth, ) ) + + def fastapi_middleware( + self, + app: FastAPI, + impls: dict[Api, Any], + external_apis: dict[str, ExternalApiSpec], + ): + TracingMiddleware(app, impls, external_apis) + + @staticmethod + def _normalize_attributes(attributes: dict[str, Any] | None) -> Attributes: + return cast(Attributes, dict(attributes) if attributes else {}) diff --git a/llama_stack/providers/inline/telemetry/otel/README.md b/llama_stack/providers/inline/telemetry/otel/README.md new file mode 100644 index 000000000..73089dd04 --- /dev/null +++ b/llama_stack/providers/inline/telemetry/otel/README.md @@ -0,0 +1,32 @@ +# Open Telemetry Native Instrumentation + +This instrumentation package is simple, and follows expected open telemetry standards. It injects middleware for distributed tracing into all ingress and egress points into the application, and can be tuned and configured with OTEL environment variables. + +## Set Up + +First, bootstrap and install all necessary libraries for open telemtry: + +``` +uv run opentelemetry-bootstrap -a requirements | uv pip install --requirement - +``` + +Make sure you export required environment variables for open telemetry: +``` +export OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf +export OTEL_EXPORTER_OTLP_ENDPOINT="http://localhost:4318" +``` + +If you want certian endpoints to be ignored from the fast API telemetry, set the following environment variable: + +``` +export OTEL_PYTHON_FASTAPI_EXCLUDED_URLS="client/.*/info,healthcheck" +``` + +Finaly, run Llama Stack with automatic code injection: + +``` +uv run opentelemetry-instrument llama stack run --config myconfig.yaml +``` + +#### Open Telemetry Configuration Environment Variables +Environment Variables: https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/ diff --git a/llama_stack/providers/inline/telemetry/otel/__init__.py b/llama_stack/providers/inline/telemetry/otel/__init__.py new file mode 100644 index 000000000..2370b0752 --- /dev/null +++ b/llama_stack/providers/inline/telemetry/otel/__init__.py @@ -0,0 +1,23 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from .config import OTelTelemetryConfig + +__all__ = ["OTelTelemetryConfig"] + + +async def get_provider_impl(config: OTelTelemetryConfig, deps): + """ + Get the OTel telemetry provider implementation. + + This function is called by the Llama Stack registry to instantiate + the provider. + """ + from .otel import OTelTelemetryProvider + + # The provider is synchronously initialized via Pydantic model_post_init + # No async initialization needed + return OTelTelemetryProvider(config=config) diff --git a/llama_stack/providers/inline/telemetry/otel/config.py b/llama_stack/providers/inline/telemetry/otel/config.py new file mode 100644 index 000000000..709944cd4 --- /dev/null +++ b/llama_stack/providers/inline/telemetry/otel/config.py @@ -0,0 +1,50 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any, Literal + +from pydantic import BaseModel, Field + +type BatchSpanProcessor = Literal["batch"] +type SimpleSpanProcessor = Literal["simple"] + + +class OTelTelemetryConfig(BaseModel): + """ + The configuration for the OpenTelemetry telemetry provider. + Most configuration is set using environment variables. + See https://opentelemetry.io/docs/specs/otel/configuration/sdk-configuration-variables/ for more information. + """ + + service_name: str = Field( + description="""The name of the service to be monitored. + Is overridden by the OTEL_SERVICE_NAME or OTEL_RESOURCE_ATTRIBUTES environment variables.""", + ) + service_version: str | None = Field( + default=None, + description="""The version of the service to be monitored. + Is overriden by the OTEL_RESOURCE_ATTRIBUTES environment variable.""", + ) + deployment_environment: str | None = Field( + default=None, + description="""The name of the environment of the service to be monitored. + Is overriden by the OTEL_RESOURCE_ATTRIBUTES environment variable.""", + ) + span_processor: BatchSpanProcessor | SimpleSpanProcessor | None = Field( + description="""The span processor to use. + Is overriden by the OTEL_SPAN_PROCESSOR environment variable.""", + default="batch", + ) + + @classmethod + def sample_run_config(cls, __distro_dir__: str = "") -> dict[str, Any]: + """Sample configuration for use in distributions.""" + return { + "service_name": "${env.OTEL_SERVICE_NAME:=llama-stack}", + "service_version": "${env.OTEL_SERVICE_VERSION:=}", + "deployment_environment": "${env.OTEL_DEPLOYMENT_ENVIRONMENT:=}", + "span_processor": "${env.OTEL_SPAN_PROCESSOR:=batch}", + } diff --git a/llama_stack/providers/inline/telemetry/otel/otel.py b/llama_stack/providers/inline/telemetry/otel/otel.py new file mode 100644 index 000000000..ad3dd77cb --- /dev/null +++ b/llama_stack/providers/inline/telemetry/otel/otel.py @@ -0,0 +1,153 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import os + +from fastapi import FastAPI +from opentelemetry import metrics, trace +from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter +from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor +from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor +from opentelemetry.sdk.metrics import MeterProvider +from opentelemetry.sdk.resources import Attributes, Resource +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import BatchSpanProcessor, SimpleSpanProcessor +from sqlalchemy import Engine + +from llama_stack.core.telemetry.telemetry import TelemetryProvider +from llama_stack.log import get_logger + +from .config import OTelTelemetryConfig + +logger = get_logger(name=__name__, category="telemetry::otel") + + +class OTelTelemetryProvider(TelemetryProvider): + """ + A simple Open Telemetry native telemetry provider. + """ + + config: OTelTelemetryConfig + + def model_post_init(self, __context): + """Initialize provider after Pydantic validation.""" + + attributes: Attributes = { + key: value + for key, value in { + "service.name": self.config.service_name, + "service.version": self.config.service_version, + "deployment.environment": self.config.deployment_environment, + }.items() + if value is not None + } + + resource = Resource.create(attributes) + + # Configure the tracer provider + tracer_provider = TracerProvider(resource=resource) + trace.set_tracer_provider(tracer_provider) + + otlp_span_exporter = OTLPSpanExporter() + + # Configure the span processor + # Enable batching of spans to reduce the number of requests to the collector + if self.config.span_processor == "batch": + tracer_provider.add_span_processor(BatchSpanProcessor(otlp_span_exporter)) + elif self.config.span_processor == "simple": + tracer_provider.add_span_processor(SimpleSpanProcessor(otlp_span_exporter)) + + meter_provider = MeterProvider(resource=resource) + metrics.set_meter_provider(meter_provider) + + # Do not fail the application, but warn the user if the endpoints are not set properly. + if not os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT"): + if not os.environ.get("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT"): + logger.warning( + "OTEL_EXPORTER_OTLP_ENDPOINT or OTEL_EXPORTER_OTLP_TRACES_ENDPOINT is not set. Traces will not be exported." + ) + if not os.environ.get("OTEL_EXPORTER_OTLP_METRICS_ENDPOINT"): + logger.warning( + "OTEL_EXPORTER_OTLP_ENDPOINT or OTEL_EXPORTER_OTLP_METRICS_ENDPOINT is not set. Metrics will not be exported." + ) + + def fastapi_middleware(self, app: FastAPI): + """ + Instrument FastAPI with OTel for automatic tracing and metrics. + + Captures: + - Distributed traces for all HTTP requests (via FastAPIInstrumentor) + - HTTP metrics following semantic conventions (custom middleware) + """ + # Enable automatic tracing + FastAPIInstrumentor.instrument_app(app) + + # Add custom middleware for HTTP metrics + meter = metrics.get_meter("llama_stack.http.server") + + # Create HTTP metrics following semantic conventions + # https://opentelemetry.io/docs/specs/semconv/http/http-metrics/ + request_duration = meter.create_histogram( + "http.server.request.duration", unit="ms", description="Duration of HTTP server requests" + ) + + active_requests = meter.create_up_down_counter( + "http.server.active_requests", unit="requests", description="Number of active HTTP server requests" + ) + + request_count = meter.create_counter( + "http.server.request.count", unit="requests", description="Total number of HTTP server requests" + ) + + # Add middleware to record metrics + @app.middleware("http") # type: ignore[misc] + async def http_metrics_middleware(request, call_next): + import time + + # Record active request + active_requests.add( + 1, + { + "http.method": request.method, + "http.route": request.url.path, + }, + ) + + start_time = time.time() + status_code = 500 # Default to error + + try: + response = await call_next(request) + status_code = response.status_code + except Exception: + raise + finally: + # Record metrics + duration_ms = (time.time() - start_time) * 1000 + + attributes = { + "http.method": request.method, + "http.route": request.url.path, + "http.status_code": status_code, + } + + request_duration.record(duration_ms, attributes) + request_count.add(1, attributes) + active_requests.add( + -1, + { + "http.method": request.method, + "http.route": request.url.path, + }, + ) + + return response + + def sqlalchemy_instrumentation(self, engine: Engine | None = None): + kwargs = {} + if engine: + kwargs["engine"] = engine + SQLAlchemyInstrumentor().instrument(**kwargs) diff --git a/llama_stack/providers/registry/telemetry.py b/llama_stack/providers/registry/telemetry.py index b50b422c1..50f73ce5f 100644 --- a/llama_stack/providers/registry/telemetry.py +++ b/llama_stack/providers/registry/telemetry.py @@ -26,4 +26,16 @@ def available_providers() -> list[ProviderSpec]: config_class="llama_stack.providers.inline.telemetry.meta_reference.config.TelemetryConfig", description="Meta's reference implementation of telemetry and observability using OpenTelemetry.", ), + InlineProviderSpec( + api=Api.telemetry, + provider_type="inline::otel", + pip_packages=[ + "opentelemetry-sdk", + "opentelemetry-exporter-otlp-proto-http", + "opentelemetry-instrumentation-fastapi", + ], + module="llama_stack.providers.inline.telemetry.otel", + config_class="llama_stack.providers.inline.telemetry.otel.config.OTelTelemetryConfig", + description="Native OpenTelemetry provider with full access to OTel Tracer and Meter APIs for advanced instrumentation.", + ), ] diff --git a/llama_stack/providers/utils/telemetry/tracing.py b/llama_stack/providers/utils/telemetry/tracing.py index 62cceb13e..8f47c6b44 100644 --- a/llama_stack/providers/utils/telemetry/tracing.py +++ b/llama_stack/providers/utils/telemetry/tracing.py @@ -3,6 +3,8 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +# +# Deprecated. Use the Open Telemetry SDK instead. import asyncio import contextvars diff --git a/pyproject.toml b/pyproject.toml index 52eb8f7c8..b842092cc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,8 +25,8 @@ classifiers = [ ] dependencies = [ "aiohttp", - "fastapi>=0.115.0,<1.0", # server - "fire", # for MCP in LLS client + "fastapi>=0.115.0,<1.0", # server + "fire", # for MCP in LLS client "httpx", "huggingface-hub>=0.34.0,<1.0", "jinja2>=3.1.6", @@ -43,13 +43,16 @@ dependencies = [ "tiktoken", "pillow", "h11>=0.16.0", - "python-multipart>=0.0.20", # For fastapi Form - "uvicorn>=0.34.0", # server - "opentelemetry-sdk>=1.30.0", # server + "python-multipart>=0.0.20", # For fastapi Form + "uvicorn>=0.34.0", # server + "opentelemetry-sdk>=1.30.0", # server "opentelemetry-exporter-otlp-proto-http>=1.30.0", # server - "aiosqlite>=0.21.0", # server - for metadata store - "asyncpg", # for metadata store + "aiosqlite>=0.21.0", # server - for metadata store + "asyncpg", # for metadata store "sqlalchemy[asyncio]>=2.0.41", # server - for conversations + "opentelemetry-semantic-conventions>=0.57b0", + "opentelemetry-instrumentation-fastapi>=0.57b0", + "opentelemetry-instrumentation-sqlalchemy>=0.57b0", ] [project.optional-dependencies] diff --git a/scripts/setup_telemetry.sh b/scripts/setup_telemetry.sh index cf235ab9d..7577ff44e 100755 --- a/scripts/setup_telemetry.sh +++ b/scripts/setup_telemetry.sh @@ -53,7 +53,7 @@ $CONTAINER_RUNTIME run -d --name otel-collector \ -p 4317:4317 \ -p 9464:9464 \ -p 13133:13133 \ - -v $(pwd)/otel-collector-config.yaml:/etc/otel-collector-config.yaml:Z \ + -v $(pwd)/otel-collector-config.yaml:/etc/otel-collector-config.yaml:${OTEL_COLLECTOR_CONFIG_PATH}:Z \ docker.io/otel/opentelemetry-collector-contrib:latest \ --config /etc/otel-collector-config.yaml @@ -62,7 +62,7 @@ echo "📈 Starting Prometheus..." $CONTAINER_RUNTIME run -d --name prometheus \ --network llama-telemetry \ -p 9090:9090 \ - -v $(pwd)/prometheus.yml:/etc/prometheus/prometheus.yml:Z \ + -v $(pwd)/prometheus.yml:/etc/prometheus/prometheus.yml:${PROMETHEUS_CONFIG_PATH}:Z \ docker.io/prom/prometheus:latest \ --config.file=/etc/prometheus/prometheus.yml \ --storage.tsdb.path=/prometheus \ diff --git a/tests/integration/telemetry/__init__.py b/tests/integration/telemetry/__init__.py new file mode 100644 index 000000000..756f351d8 --- /dev/null +++ b/tests/integration/telemetry/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. diff --git a/tests/integration/telemetry/mocking/README.md b/tests/integration/telemetry/mocking/README.md new file mode 100644 index 000000000..3fedea75d --- /dev/null +++ b/tests/integration/telemetry/mocking/README.md @@ -0,0 +1,149 @@ +# Mock Server Infrastructure + +This directory contains mock servers for E2E telemetry testing. + +## Structure + +``` +mocking/ +├── README.md ← You are here +├── __init__.py ← Module exports +├── mock_base.py ← Pydantic base class for all mocks +├── servers.py ← Mock server implementations +└── harness.py ← Async startup harness +``` + +## Files + +### `mock_base.py` - Base Class +Pydantic base model that all mock servers must inherit from. + +**Contract:** +```python +class MockServerBase(BaseModel): + async def await_start(self): + # Start server and wait until ready + ... + + def stop(self): + # Stop server and cleanup + ... +``` + +### `servers.py` - Mock Implementations +Contains: +- **MockOTLPCollector** - Receives OTLP telemetry (port 4318) +- **MockVLLMServer** - Simulates vLLM inference API (port 8000) + +### `harness.py` - Startup Orchestration +Provides: +- **MockServerConfig** - Pydantic config for server registration +- **start_mock_servers_async()** - Starts servers in parallel +- **stop_mock_servers()** - Stops all servers + +## Creating a New Mock Server + +### Step 1: Implement the Server + +Add to `servers.py`: +```python +class MockRedisServer(MockServerBase): + """Mock Redis server.""" + + port: int = Field(default=6379) + + # Non-Pydantic fields + server: Any = Field(default=None, exclude=True) + + def model_post_init(self, __context): + self.server = None + + async def await_start(self): + """Start Redis mock and wait until ready.""" + # Start your server + self.server = create_redis_server(self.port) + self.server.start() + + # Wait for port to be listening + for _ in range(10): + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + if sock.connect_ex(("localhost", self.port)) == 0: + sock.close() + return # Ready! + await asyncio.sleep(0.1) + + def stop(self): + if self.server: + self.server.stop() +``` + +### Step 2: Register in Test + +In `test_otel_e2e.py`, add to MOCK_SERVERS list: +```python +MOCK_SERVERS = [ + # ... existing servers ... + MockServerConfig( + name="Mock Redis", + server_class=MockRedisServer, + init_kwargs={"port": 6379}, + ), +] +``` + +### Step 3: Done! + +The harness automatically: +- Creates the server instance +- Calls `await_start()` in parallel with other servers +- Returns when all are ready +- Stops all servers on teardown + +## Benefits + +✅ **Parallel Startup** - All servers start simultaneously +✅ **Type-Safe** - Pydantic validation +✅ **Simple** - Just implement 2 methods +✅ **Fast** - No HTTP polling, direct port checking +✅ **Clean** - Async/await pattern + +## Usage in Tests + +```python +@pytest.fixture(scope="module") +def mock_servers(): + servers = asyncio.run(start_mock_servers_async(MOCK_SERVERS)) + yield servers + stop_mock_servers(servers) + + +# Access specific servers +@pytest.fixture(scope="module") +def mock_redis(mock_servers): + return mock_servers["Mock Redis"] +``` + +## Key Design Decisions + +### Why Pydantic? +- Type safety for server configuration +- Built-in validation +- Clear interface contract + +### Why `await_start()` instead of HTTP `/ready`? +- Faster (no HTTP round-trip) +- Simpler (direct port checking) +- More reliable (internal state, not external endpoint) + +### Why separate harness? +- Reusable across different test files +- Easy to add new servers +- Centralized error handling + +## Examples + +See `test_otel_e2e.py` for real-world usage: +- Line ~200: MOCK_SERVERS configuration +- Line ~230: Convenience fixtures +- Line ~240: Using servers in tests + diff --git a/tests/integration/telemetry/mocking/__init__.py b/tests/integration/telemetry/mocking/__init__.py new file mode 100644 index 000000000..3a934a002 --- /dev/null +++ b/tests/integration/telemetry/mocking/__init__.py @@ -0,0 +1,28 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +""" +Mock server infrastructure for telemetry E2E testing. + +This module provides: +- MockServerBase: Pydantic base class for all mock servers +- MockOTLPCollector: Mock OTLP telemetry collector +- MockVLLMServer: Mock vLLM inference server +- Mock server harness for parallel async startup +""" + +from .harness import MockServerConfig, start_mock_servers_async, stop_mock_servers +from .mock_base import MockServerBase +from .servers import MockOTLPCollector, MockVLLMServer + +__all__ = [ + "MockServerBase", + "MockOTLPCollector", + "MockVLLMServer", + "MockServerConfig", + "start_mock_servers_async", + "stop_mock_servers", +] diff --git a/tests/integration/telemetry/mocking/harness.py b/tests/integration/telemetry/mocking/harness.py new file mode 100644 index 000000000..d877abbf9 --- /dev/null +++ b/tests/integration/telemetry/mocking/harness.py @@ -0,0 +1,106 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +""" +Mock server startup harness for parallel initialization. + +HOW TO ADD A NEW MOCK SERVER: +1. Import your mock server class +2. Add it to MOCK_SERVERS list with configuration +3. Done! It will start in parallel with others. +""" + +import asyncio +from typing import Any + +from pydantic import BaseModel, Field + +from .mock_base import MockServerBase + + +class MockServerConfig(BaseModel): + """ + Configuration for a mock server to start. + + **TO ADD A NEW MOCK SERVER:** + Just create a MockServerConfig instance with your server class. + + Example: + MockServerConfig( + name="Mock MyService", + server_class=MockMyService, + init_kwargs={"port": 9000, "config_param": "value"}, + ) + """ + + model_config = {"arbitrary_types_allowed": True} + + name: str = Field(description="Display name for logging") + server_class: type = Field(description="Mock server class (must inherit from MockServerBase)") + init_kwargs: dict[str, Any] = Field(default_factory=dict, description="Kwargs to pass to server constructor") + + +async def start_mock_servers_async(mock_servers_config: list[MockServerConfig]) -> dict[str, MockServerBase]: + """ + Start all mock servers in parallel and wait for them to be ready. + + **HOW IT WORKS:** + 1. Creates all server instances + 2. Calls await_start() on all servers in parallel + 3. Returns when all are ready + + **SIMPLE TO USE:** + servers = await start_mock_servers_async([config1, config2, ...]) + + Args: + mock_servers_config: List of mock server configurations + + Returns: + Dict mapping server name to server instance + """ + servers = {} + start_tasks = [] + + # Create all servers and prepare start tasks + for config in mock_servers_config: + server = config.server_class(**config.init_kwargs) + servers[config.name] = server + start_tasks.append(server.await_start()) + + # Start all servers in parallel + try: + await asyncio.gather(*start_tasks) + + # Print readiness confirmation + for name in servers.keys(): + print(f"[INFO] {name} ready") + + except Exception as e: + # If any server fails, stop all servers + for server in servers.values(): + try: + server.stop() + except Exception: + pass + raise RuntimeError(f"Failed to start mock servers: {e}") from None + + return servers + + +def stop_mock_servers(servers: dict[str, Any]): + """ + Stop all mock servers. + + Args: + servers: Dict of server instances from start_mock_servers_async() + """ + for name, server in servers.items(): + try: + if hasattr(server, "get_request_count"): + print(f"\n[INFO] {name} received {server.get_request_count()} requests") + server.stop() + except Exception as e: + print(f"[WARN] Error stopping {name}: {e}") diff --git a/tests/integration/telemetry/mocking/mock_base.py b/tests/integration/telemetry/mocking/mock_base.py new file mode 100644 index 000000000..5eebcab7a --- /dev/null +++ b/tests/integration/telemetry/mocking/mock_base.py @@ -0,0 +1,68 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +""" +Base class for mock servers with async startup support. + +All mock servers should inherit from MockServerBase and implement await_start(). +""" + +from abc import abstractmethod + +from pydantic import BaseModel + + +class MockServerBase(BaseModel): + """ + Pydantic base model for mock servers. + + **TO CREATE A NEW MOCK SERVER:** + 1. Inherit from this class + 2. Implement async def await_start(self) + 3. Implement def stop(self) + 4. Done! + + Example: + class MyMockServer(MockServerBase): + port: int = 8080 + + async def await_start(self): + # Start your server + self.server = create_server() + self.server.start() + # Wait until ready (can check internal state, no HTTP needed) + while not self.server.is_listening(): + await asyncio.sleep(0.1) + + def stop(self): + if self.server: + self.server.stop() + """ + + model_config = {"arbitrary_types_allowed": True} + + @abstractmethod + async def await_start(self): + """ + Start the server and wait until it's ready. + + This method should: + 1. Start the server (synchronous or async) + 2. Wait until the server is fully ready to accept requests + 3. Return when ready + + Subclasses can check internal state directly - no HTTP polling needed! + """ + ... + + @abstractmethod + def stop(self): + """ + Stop the server and clean up resources. + + This method should gracefully shut down the server. + """ + ... diff --git a/tests/integration/telemetry/mocking/servers.py b/tests/integration/telemetry/mocking/servers.py new file mode 100644 index 000000000..fd63f9baf --- /dev/null +++ b/tests/integration/telemetry/mocking/servers.py @@ -0,0 +1,397 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +""" +Mock servers for OpenTelemetry E2E testing. + +This module provides mock servers for testing telemetry: +- MockOTLPCollector: Receives and stores OTLP telemetry exports +- MockVLLMServer: Simulates vLLM inference API with valid OpenAI responses + +These mocks allow E2E testing without external dependencies. +""" + +import asyncio +import http.server +import json +import socket +import threading +import time +from typing import Any + +from pydantic import Field + +from .mock_base import MockServerBase + + +class MockOTLPCollector(MockServerBase): + """ + Mock OTLP collector HTTP server. + + Receives real OTLP exports from Llama Stack and stores them for verification. + Runs on localhost:4318 (standard OTLP HTTP port). + + Usage: + collector = MockOTLPCollector() + await collector.await_start() + # ... run tests ... + print(f"Received {collector.get_trace_count()} traces") + collector.stop() + """ + + port: int = Field(default=4318, description="Port to run collector on") + + # Non-Pydantic fields (set after initialization) + traces: list[dict] = Field(default_factory=list, exclude=True) + metrics: list[dict] = Field(default_factory=list, exclude=True) + server: Any = Field(default=None, exclude=True) + server_thread: Any = Field(default=None, exclude=True) + + def model_post_init(self, __context): + """Initialize after Pydantic validation.""" + self.traces = [] + self.metrics = [] + self.server = None + self.server_thread = None + + def _create_handler_class(self): + """Create the HTTP handler class for this collector instance.""" + collector_self = self + + class OTLPHandler(http.server.BaseHTTPRequestHandler): + """HTTP request handler for OTLP requests.""" + + def log_message(self, format, *args): + """Suppress HTTP server logs.""" + pass + + def do_GET(self): # noqa: N802 + """Handle GET requests.""" + # No readiness endpoint needed - using await_start() instead + self.send_response(404) + self.end_headers() + + def do_POST(self): # noqa: N802 + """Handle OTLP POST requests.""" + content_length = int(self.headers.get("Content-Length", 0)) + body = self.rfile.read(content_length) if content_length > 0 else b"" + + # Store the export request + if "/v1/traces" in self.path: + collector_self.traces.append( + { + "body": body, + "timestamp": time.time(), + } + ) + elif "/v1/metrics" in self.path: + collector_self.metrics.append( + { + "body": body, + "timestamp": time.time(), + } + ) + + # Always return success (200 OK) + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.end_headers() + self.wfile.write(b"{}") + + return OTLPHandler + + async def await_start(self): + """ + Start the OTLP collector and wait until ready. + + This method is async and can be awaited to ensure the server is ready. + """ + # Create handler and start the HTTP server + handler_class = self._create_handler_class() + self.server = http.server.HTTPServer(("localhost", self.port), handler_class) + self.server_thread = threading.Thread(target=self.server.serve_forever, daemon=True) + self.server_thread.start() + + # Wait for server to be listening on the port + for _ in range(10): + try: + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + result = sock.connect_ex(("localhost", self.port)) + sock.close() + if result == 0: + # Port is listening + return + except Exception: + pass + await asyncio.sleep(0.1) + + raise RuntimeError(f"OTLP collector failed to start on port {self.port}") + + def stop(self): + """Stop the OTLP collector server.""" + if self.server: + self.server.shutdown() + self.server.server_close() + + def clear(self): + """Clear all captured telemetry data.""" + self.traces = [] + self.metrics = [] + + def get_trace_count(self) -> int: + """Get number of trace export requests received.""" + return len(self.traces) + + def get_metric_count(self) -> int: + """Get number of metric export requests received.""" + return len(self.metrics) + + def get_all_traces(self) -> list[dict]: + """Get all captured trace exports.""" + return self.traces + + def get_all_metrics(self) -> list[dict]: + """Get all captured metric exports.""" + return self.metrics + + +class MockVLLMServer(MockServerBase): + """ + Mock vLLM inference server with OpenAI-compatible API. + + Returns valid OpenAI Python client response objects for: + - Chat completions (/v1/chat/completions) + - Text completions (/v1/completions) + - Model listing (/v1/models) + + Runs on localhost:8000 (standard vLLM port). + + Usage: + server = MockVLLMServer(models=["my-model"]) + await server.await_start() + # ... make inference calls ... + print(f"Handled {server.get_request_count()} requests") + server.stop() + """ + + port: int = Field(default=8000, description="Port to run server on") + models: list[str] = Field( + default_factory=lambda: ["meta-llama/Llama-3.2-1B-Instruct"], description="List of model IDs to serve" + ) + + # Non-Pydantic fields + requests_received: list[dict] = Field(default_factory=list, exclude=True) + server: Any = Field(default=None, exclude=True) + server_thread: Any = Field(default=None, exclude=True) + + def model_post_init(self, __context): + """Initialize after Pydantic validation.""" + self.requests_received = [] + self.server = None + self.server_thread = None + + def _create_handler_class(self): + """Create the HTTP handler class for this vLLM instance.""" + server_self = self + + class VLLMHandler(http.server.BaseHTTPRequestHandler): + """HTTP request handler for vLLM API.""" + + def log_message(self, format, *args): + """Suppress HTTP server logs.""" + pass + + def log_request(self, code="-", size="-"): + """Log incoming requests for debugging.""" + print(f"[DEBUG] Mock vLLM received: {self.command} {self.path} -> {code}") + + def do_GET(self): # noqa: N802 + """Handle GET requests (models list, health check).""" + # Log GET requests too + server_self.requests_received.append( + { + "path": self.path, + "method": "GET", + "timestamp": time.time(), + } + ) + + if self.path == "/v1/models": + response = self._create_models_list_response() + self._send_json_response(200, response) + + elif self.path == "/health" or self.path == "/v1/health": + self._send_json_response(200, {"status": "healthy"}) + + else: + self.send_response(404) + self.end_headers() + + def do_POST(self): # noqa: N802 + """Handle POST requests (chat/text completions).""" + content_length = int(self.headers.get("Content-Length", 0)) + body = self.rfile.read(content_length) if content_length > 0 else b"{}" + + try: + request_data = json.loads(body) + except Exception: + request_data = {} + + # Log the request + server_self.requests_received.append( + { + "path": self.path, + "body": request_data, + "timestamp": time.time(), + } + ) + + # Route to appropriate handler + if "/chat/completions" in self.path: + response = self._create_chat_completion_response(request_data) + self._send_json_response(200, response) + + elif "/completions" in self.path: + response = self._create_text_completion_response(request_data) + self._send_json_response(200, response) + + else: + self._send_json_response(200, {"status": "ok"}) + + # ---------------------------------------------------------------- + # Response Generators + # **TO MODIFY RESPONSES:** Edit these methods + # ---------------------------------------------------------------- + + def _create_models_list_response(self) -> dict: + """Create OpenAI models list response with configured models.""" + return { + "object": "list", + "data": [ + { + "id": model_id, + "object": "model", + "created": int(time.time()), + "owned_by": "meta", + } + for model_id in server_self.models + ], + } + + def _create_chat_completion_response(self, request_data: dict) -> dict: + """ + Create OpenAI ChatCompletion response. + + Returns a valid response matching openai.types.ChatCompletion + """ + return { + "id": "chatcmpl-test123", + "object": "chat.completion", + "created": int(time.time()), + "model": request_data.get("model", "meta-llama/Llama-3.2-1B-Instruct"), + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "This is a test response from mock vLLM server.", + "tool_calls": None, + }, + "logprobs": None, + "finish_reason": "stop", + } + ], + "usage": { + "prompt_tokens": 25, + "completion_tokens": 15, + "total_tokens": 40, + "completion_tokens_details": None, + }, + "system_fingerprint": None, + "service_tier": None, + } + + def _create_text_completion_response(self, request_data: dict) -> dict: + """ + Create OpenAI Completion response. + + Returns a valid response matching openai.types.Completion + """ + return { + "id": "cmpl-test123", + "object": "text_completion", + "created": int(time.time()), + "model": request_data.get("model", "meta-llama/Llama-3.2-1B-Instruct"), + "choices": [ + { + "text": "This is a test completion.", + "index": 0, + "logprobs": None, + "finish_reason": "stop", + } + ], + "usage": { + "prompt_tokens": 10, + "completion_tokens": 8, + "total_tokens": 18, + "completion_tokens_details": None, + }, + "system_fingerprint": None, + } + + def _send_json_response(self, status_code: int, data: dict): + """Helper to send JSON response.""" + self.send_response(status_code) + self.send_header("Content-Type", "application/json") + self.end_headers() + self.wfile.write(json.dumps(data).encode()) + + return VLLMHandler + + async def await_start(self): + """ + Start the vLLM server and wait until ready. + + This method is async and can be awaited to ensure the server is ready. + """ + # Create handler and start the HTTP server + handler_class = self._create_handler_class() + self.server = http.server.HTTPServer(("localhost", self.port), handler_class) + self.server_thread = threading.Thread(target=self.server.serve_forever, daemon=True) + self.server_thread.start() + + # Wait for server to be listening on the port + for _ in range(10): + try: + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + result = sock.connect_ex(("localhost", self.port)) + sock.close() + if result == 0: + # Port is listening + return + except Exception: + pass + await asyncio.sleep(0.1) + + raise RuntimeError(f"vLLM server failed to start on port {self.port}") + + def stop(self): + """Stop the vLLM server.""" + if self.server: + self.server.shutdown() + self.server.server_close() + + def clear(self): + """Clear request history.""" + self.requests_received = [] + + def get_request_count(self) -> int: + """Get number of requests received.""" + return len(self.requests_received) + + def get_all_requests(self) -> list[dict]: + """Get all received requests with their bodies.""" + return self.requests_received diff --git a/tests/integration/telemetry/test_otel_e2e.py b/tests/integration/telemetry/test_otel_e2e.py new file mode 100644 index 000000000..3df36db30 --- /dev/null +++ b/tests/integration/telemetry/test_otel_e2e.py @@ -0,0 +1,464 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +""" +End-to-end integration tests for OpenTelemetry with automatic instrumentation. + +HOW THIS WORKS: +1. Starts a mock OTLP collector (HTTP server) to receive telemetry +2. Starts a mock vLLM server to handle inference requests +3. Starts REAL Llama Stack with: opentelemetry-instrument llama stack run +4. Makes REAL API calls to the stack +5. Verifies telemetry was exported to the mock collector + +WHERE TO MAKE CHANGES: +- Add test cases → See TEST_CASES list below (line ~70) +- Add mock servers → See MOCK_SERVERS list in mock_servers fixture (line ~200) +- Modify mock behavior → See mocking/servers.py +- Change stack config → See llama_stack_server fixture (line ~250) +- Add assertions → See TestOTelE2EWithRealServer class (line ~370) + +RUNNING THE TESTS: +- Quick (mock servers only): pytest test_otel_e2e.py::TestMockServers -v +- Full E2E (slow): pytest test_otel_e2e.py::TestOTelE2EWithRealServer -v -m slow +""" + +# ============================================================================ +# IMPORTS +# ============================================================================ + +import os +import socket +import subprocess +import time +from typing import Any + +import pytest +import requests +import yaml +from pydantic import BaseModel, Field + +# Mock servers are in the mocking/ subdirectory +from .mocking import ( + MockOTLPCollector, + MockServerConfig, + MockVLLMServer, + start_mock_servers_async, + stop_mock_servers, +) + +# ============================================================================ +# DATA MODELS +# ============================================================================ + + +class TelemetryTestCase(BaseModel): + """ + Pydantic model defining expected telemetry for an API call. + + **TO ADD A NEW TEST CASE:** Add to TEST_CASES list below. + """ + + name: str = Field(description="Unique test case identifier") + http_method: str = Field(description="HTTP method (GET, POST, etc.)") + api_path: str = Field(description="API path (e.g., '/v1/models')") + request_body: dict[str, Any] | None = Field(default=None) + expected_http_status: int = Field(default=200) + expected_trace_exports: int = Field(default=1, description="Minimum number of trace exports expected") + expected_metric_exports: int = Field(default=0, description="Minimum number of metric exports expected") + should_have_error_span: bool = Field(default=False) + + +# ============================================================================ +# TEST CONFIGURATION +# **TO ADD NEW TESTS:** Add TelemetryTestCase instances here +# ============================================================================ + +TEST_CASES = [ + TelemetryTestCase( + name="models_list", + http_method="GET", + api_path="/v1/models", + expected_trace_exports=1, + expected_metric_exports=1, # HTTP metrics from OTel provider middleware + ), + TelemetryTestCase( + name="chat_completion", + http_method="POST", + api_path="/v1/inference/chat_completion", + request_body={ + "model": "meta-llama/Llama-3.2-1B-Instruct", + "messages": [{"role": "user", "content": "Hello!"}], + }, + expected_trace_exports=2, # Stack request + vLLM backend call + expected_metric_exports=1, # HTTP metrics (duration, count, active_requests) + ), +] + + +# ============================================================================ +# TEST INFRASTRUCTURE +# ============================================================================ + + +class TelemetryTestRunner: + """ + Executes TelemetryTestCase instances against real Llama Stack. + + **HOW IT WORKS:** + 1. Makes real HTTP request to the stack + 2. Waits for telemetry export + 3. Verifies exports were sent to mock collector + """ + + def __init__(self, base_url: str, collector: MockOTLPCollector): + self.base_url = base_url + self.collector = collector + + def run_test_case(self, test_case: TelemetryTestCase, verbose: bool = False) -> bool: + """Execute a single test case and verify telemetry.""" + initial_traces = self.collector.get_trace_count() + initial_metrics = self.collector.get_metric_count() + + if verbose: + print(f"\n--- {test_case.name} ---") + print(f" {test_case.http_method} {test_case.api_path}") + + # Make real HTTP request to Llama Stack + try: + url = f"{self.base_url}{test_case.api_path}" + + if test_case.http_method == "GET": + response = requests.get(url, timeout=5) + elif test_case.http_method == "POST": + response = requests.post(url, json=test_case.request_body or {}, timeout=5) + else: + response = requests.request(test_case.http_method, url, timeout=5) + + if verbose: + print(f" HTTP Response: {response.status_code}") + + status_match = response.status_code == test_case.expected_http_status + + except requests.exceptions.RequestException as e: + if verbose: + print(f" Request failed: {e}") + status_match = False + + # Wait for automatic instrumentation to export telemetry + # Traces export immediately, metrics export every 1 second (configured via env var) + time.sleep(2.0) # Wait for both traces and metrics to export + + # Verify traces were exported to mock collector + new_traces = self.collector.get_trace_count() - initial_traces + traces_exported = new_traces >= test_case.expected_trace_exports + + # Verify metrics were exported (if expected) + new_metrics = self.collector.get_metric_count() - initial_metrics + metrics_exported = new_metrics >= test_case.expected_metric_exports + + if verbose: + print( + f" Expected: >={test_case.expected_trace_exports} trace exports, >={test_case.expected_metric_exports} metric exports" + ) + print(f" Actual: {new_traces} trace exports, {new_metrics} metric exports") + result = status_match and traces_exported and metrics_exported + print(f" Result: {'PASS' if result else 'FAIL'}") + + return status_match and traces_exported and metrics_exported + + def run_all_test_cases(self, test_cases: list[TelemetryTestCase], verbose: bool = True) -> dict[str, bool]: + """Run all test cases and return results.""" + results = {} + for test_case in test_cases: + results[test_case.name] = self.run_test_case(test_case, verbose=verbose) + return results + + +# ============================================================================ +# HELPER FUNCTIONS +# ============================================================================ + + +def is_port_available(port: int) -> bool: + """Check if a TCP port is available for binding.""" + try: + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock: + sock.bind(("localhost", port)) + return True + except OSError: + return False + + +# ============================================================================ +# PYTEST FIXTURES +# ============================================================================ + + +@pytest.fixture(scope="module") +def mock_servers(): + """ + Fixture: Start all mock servers in parallel using async harness. + + **TO ADD A NEW MOCK SERVER:** + Just add a MockServerConfig to the MOCK_SERVERS list below. + """ + import asyncio + + # ======================================================================== + # MOCK SERVER CONFIGURATION + # **TO ADD A NEW MOCK:** Just add a MockServerConfig instance below + # + # Example: + # MockServerConfig( + # name="Mock MyService", + # server_class=MockMyService, # Must inherit from MockServerBase + # init_kwargs={"port": 9000, "param": "value"}, + # ), + # ======================================================================== + mock_servers_config = [ + MockServerConfig( + name="Mock OTLP Collector", + server_class=MockOTLPCollector, + init_kwargs={"port": 4318}, + ), + MockServerConfig( + name="Mock vLLM Server", + server_class=MockVLLMServer, + init_kwargs={ + "port": 8000, + "models": ["meta-llama/Llama-3.2-1B-Instruct"], + }, + ), + # Add more mock servers here - they will start in parallel automatically! + ] + + # Start all servers in parallel + servers = asyncio.run(start_mock_servers_async(mock_servers_config)) + + # Verify vLLM models + models_response = requests.get("http://localhost:8000/v1/models", timeout=1) + models_data = models_response.json() + print(f"[INFO] Mock vLLM serving {len(models_data['data'])} models: {[m['id'] for m in models_data['data']]}") + + yield servers + + # Stop all servers + stop_mock_servers(servers) + + +@pytest.fixture(scope="module") +def mock_otlp_collector(mock_servers): + """Convenience fixture to get OTLP collector from mock_servers.""" + return mock_servers["Mock OTLP Collector"] + + +@pytest.fixture(scope="module") +def mock_vllm_server(mock_servers): + """Convenience fixture to get vLLM server from mock_servers.""" + return mock_servers["Mock vLLM Server"] + + +@pytest.fixture(scope="module") +def llama_stack_server(tmp_path_factory, mock_otlp_collector, mock_vllm_server): + """ + Fixture: Start real Llama Stack server with automatic OTel instrumentation. + + **THIS IS THE MAIN FIXTURE** - it runs: + opentelemetry-instrument llama stack run --config run.yaml + + **TO MODIFY STACK CONFIG:** Edit run_config dict below + """ + config_dir = tmp_path_factory.mktemp("otel-stack-config") + + # Ensure mock vLLM is ready and accessible before starting Llama Stack + print("\n[INFO] Verifying mock vLLM is accessible at http://localhost:8000...") + try: + vllm_models = requests.get("http://localhost:8000/v1/models", timeout=2) + print(f"[INFO] Mock vLLM models endpoint response: {vllm_models.status_code}") + except Exception as e: + pytest.fail(f"Mock vLLM not accessible before starting Llama Stack: {e}") + + # Create run.yaml with inference provider + # **TO ADD MORE PROVIDERS:** Add to providers dict + run_config = { + "image_name": "test-otel-e2e", + "apis": ["inference"], + "providers": { + "inference": [ + { + "provider_id": "vllm", + "provider_type": "remote::vllm", + "config": { + "url": "http://localhost:8000/v1", + }, + }, + ], + }, + "models": [ + { + "model_id": "meta-llama/Llama-3.2-1B-Instruct", + "provider_id": "vllm", + } + ], + } + + config_file = config_dir / "run.yaml" + with open(config_file, "w") as f: + yaml.dump(run_config, f) + + # Find available port for Llama Stack + port = 5555 + while not is_port_available(port) and port < 5600: + port += 1 + + if port >= 5600: + pytest.skip("No available ports for test server") + + # Set environment variables for OTel instrumentation + # NOTE: These only affect the subprocess, not other tests + env = os.environ.copy() + env["OTEL_EXPORTER_OTLP_ENDPOINT"] = "http://localhost:4318" + env["OTEL_EXPORTER_OTLP_PROTOCOL"] = "http/protobuf" # Ensure correct protocol + env["OTEL_SERVICE_NAME"] = "llama-stack-e2e-test" + env["LLAMA_STACK_PORT"] = str(port) + env["OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED"] = "true" + + # Configure fast metric export for testing (default is 60 seconds) + # This makes metrics export every 500ms instead of every 60 seconds + env["OTEL_METRIC_EXPORT_INTERVAL"] = "500" # milliseconds + env["OTEL_METRIC_EXPORT_TIMEOUT"] = "1000" # milliseconds + + # Disable inference recording to ensure real requests to our mock vLLM + # This is critical - without this, Llama Stack replays cached responses + # Safe to remove here as it only affects the subprocess environment + if "LLAMA_STACK_TEST_INFERENCE_MODE" in env: + del env["LLAMA_STACK_TEST_INFERENCE_MODE"] + + # Start server with automatic instrumentation + cmd = [ + "opentelemetry-instrument", # ← Automatic instrumentation wrapper + "llama", + "stack", + "run", + str(config_file), + "--port", + str(port), + ] + + print(f"\n[INFO] Starting Llama Stack with OTel instrumentation on port {port}") + print(f"[INFO] Command: {' '.join(cmd)}") + + process = subprocess.Popen( + cmd, + env=env, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + + # Wait for server to start + max_wait = 30 + base_url = f"http://localhost:{port}" + + for i in range(max_wait): + try: + response = requests.get(f"{base_url}/v1/health", timeout=1) + if response.status_code == 200: + print(f"[INFO] Server ready at {base_url}") + break + except requests.exceptions.RequestException: + if i == max_wait - 1: + process.terminate() + stdout, stderr = process.communicate(timeout=5) + pytest.fail(f"Server failed to start.\nStdout: {stdout}\nStderr: {stderr}") + time.sleep(1) + + yield { + "base_url": base_url, + "port": port, + "collector": mock_otlp_collector, + "vllm_server": mock_vllm_server, + } + + # Cleanup + print("\n[INFO] Stopping Llama Stack server") + process.terminate() + try: + process.wait(timeout=5) + except subprocess.TimeoutExpired: + process.kill() + + +# ============================================================================ +# TESTS: End-to-End with Real Stack +# **THESE RUN SLOW** - marked with @pytest.mark.slow +# **TO ADD NEW E2E TESTS:** Add methods to this class +# ============================================================================ + + +@pytest.mark.slow +class TestOTelE2E: + """ + End-to-end tests with real Llama Stack server. + + These tests verify the complete flow: + - Real Llama Stack with opentelemetry-instrument + - Real API calls + - Real automatic instrumentation + - Mock OTLP collector captures exports + """ + + def test_server_starts_with_auto_instrumentation(self, llama_stack_server): + """Verify server starts successfully with opentelemetry-instrument.""" + base_url = llama_stack_server["base_url"] + + # Try different health check endpoints + health_endpoints = ["/health", "/v1/health", "/"] + server_responding = False + + for endpoint in health_endpoints: + try: + response = requests.get(f"{base_url}{endpoint}", timeout=5) + print(f"\n[DEBUG] {endpoint} -> {response.status_code}") + if response.status_code == 200: + server_responding = True + break + except Exception as e: + print(f"[DEBUG] {endpoint} failed: {e}") + + assert server_responding, f"Server not responding on any endpoint at {base_url}" + + print(f"\n[PASS] Llama Stack running with OTel at {base_url}") + + def test_all_test_cases_via_runner(self, llama_stack_server): + """ + **MAIN TEST:** Run all TelemetryTestCase instances. + + This executes all test cases defined in TEST_CASES list. + **TO ADD MORE TESTS:** Add to TEST_CASES at top of file + """ + base_url = llama_stack_server["base_url"] + collector = llama_stack_server["collector"] + + # Create test runner + runner = TelemetryTestRunner(base_url, collector) + + # Execute all test cases + results = runner.run_all_test_cases(TEST_CASES, verbose=True) + + # Print summary + print(f"\n{'=' * 50}") + print("TEST CASE SUMMARY") + print(f"{'=' * 50}") + passed = sum(1 for p in results.values() if p) + total = len(results) + print(f"Passed: {passed}/{total}\n") + + for name, result in results.items(): + status = "[PASS]" if result else "[FAIL]" + print(f" {status} {name}") + print(f"{'=' * 50}\n") diff --git a/tests/unit/providers/telemetry/__init__.py b/tests/unit/providers/telemetry/__init__.py new file mode 100644 index 000000000..756f351d8 --- /dev/null +++ b/tests/unit/providers/telemetry/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. diff --git a/tests/unit/providers/telemetry/meta_reference.py b/tests/unit/providers/telemetry/meta_reference.py new file mode 100644 index 000000000..c7c81f01f --- /dev/null +++ b/tests/unit/providers/telemetry/meta_reference.py @@ -0,0 +1,98 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import pytest + +import llama_stack.providers.inline.telemetry.meta_reference.telemetry as telemetry_module +from llama_stack.log import get_logger +from llama_stack.providers.inline.telemetry.meta_reference.config import ( + TelemetryConfig, + TelemetrySink, +) + +logger = get_logger(name=__name__, category="telemetry_test_meta_reference") + + +def _reset_provider(monkeypatch: pytest.MonkeyPatch) -> None: + # Ensure the telemetry module re-runs initialization code that emits warnings + monkeypatch.setattr(telemetry_module, "_TRACER_PROVIDER", None, raising=False) + + +def _make_config_with_sinks(*sinks: TelemetrySink) -> TelemetryConfig: + return TelemetryConfig(sinks=list(sinks)) + + +def _otel_logger_records(caplog: pytest.LogCaptureFixture): + module_logger_name = "llama_stack.providers.inline.telemetry.meta_reference.telemetry" + return [r for r in caplog.records if r.name == module_logger_name] + + +def test_warns_when_traces_endpoints_missing(monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture): + _reset_provider(monkeypatch) + # Remove both endpoints to simulate incorrect configuration + monkeypatch.delenv("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT", raising=False) + monkeypatch.delenv("OTEL_EXPORTER_OTLP_ENDPOINT", raising=False) + + caplog.set_level("WARNING") + + config = _make_config_with_sinks(TelemetrySink.OTEL_TRACE) + telemetry_module.TelemetryAdapter(config=config, deps={}) + + messages = [r.getMessage() for r in _otel_logger_records(caplog)] + assert any( + "OTEL_EXPORTER_OTLP_TRACES_ENDPOINT or OTEL_EXPORTER_OTLP_ENDPOINT is not set. Traces will not be exported." + in m + for m in messages + ) + + +def test_warns_when_metrics_endpoints_missing(monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture): + _reset_provider(monkeypatch) + # Remove both endpoints to simulate incorrect configuration + monkeypatch.delenv("OTEL_EXPORTER_OTLP_METRICS_ENDPOINT", raising=False) + monkeypatch.delenv("OTEL_EXPORTER_OTLP_ENDPOINT", raising=False) + + caplog.set_level("WARNING") + + config = _make_config_with_sinks(TelemetrySink.OTEL_METRIC) + telemetry_module.TelemetryAdapter(config=config, deps={}) + + messages = [r.getMessage() for r in _otel_logger_records(caplog)] + assert any( + "OTEL_EXPORTER_OTLP_METRICS_ENDPOINT or OTEL_EXPORTER_OTLP_ENDPOINT is not set. Metrics will not be exported." + in m + for m in messages + ) + + +def test_no_warning_when_traces_endpoints_present(monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture): + _reset_provider(monkeypatch) + # Both must be present per current implementation to avoid warnings + monkeypatch.setenv("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT", "https://otel.example:4318/v1/traces") + monkeypatch.setenv("OTEL_EXPORTER_OTLP_ENDPOINT", "https://otel.example:4318") + + caplog.set_level("WARNING") + + config = _make_config_with_sinks(TelemetrySink.OTEL_TRACE) + telemetry_module.TelemetryAdapter(config=config, deps={}) + + messages = [r.getMessage() for r in _otel_logger_records(caplog)] + assert not any("Traces will not be exported." in m for m in messages) + + +def test_no_warning_when_metrics_endpoints_present(monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture): + _reset_provider(monkeypatch) + # Both must be present per current implementation to avoid warnings + monkeypatch.setenv("OTEL_EXPORTER_OTLP_METRICS_ENDPOINT", "https://otel.example:4318/v1/metrics") + monkeypatch.setenv("OTEL_EXPORTER_OTLP_ENDPOINT", "https://otel.example:4318") + + caplog.set_level("WARNING") + + config = _make_config_with_sinks(TelemetrySink.OTEL_METRIC) + telemetry_module.TelemetryAdapter(config=config, deps={}) + + messages = [r.getMessage() for r in _otel_logger_records(caplog)] + assert not any("Metrics will not be exported." in m for m in messages) diff --git a/tests/unit/providers/telemetry/test_otel.py b/tests/unit/providers/telemetry/test_otel.py new file mode 100644 index 000000000..94ad2425f --- /dev/null +++ b/tests/unit/providers/telemetry/test_otel.py @@ -0,0 +1,143 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +""" +Unit tests for OTel Telemetry Provider. + +These tests focus on the provider's functionality: +- Initialization and configuration +- FastAPI middleware setup +- SQLAlchemy instrumentation +- Environment variable handling +""" + +from unittest.mock import MagicMock + +import pytest + +from llama_stack.providers.inline.telemetry.otel.config import OTelTelemetryConfig +from llama_stack.providers.inline.telemetry.otel.otel import OTelTelemetryProvider + + +@pytest.fixture +def otel_config(): + """Fixture providing a basic OTelTelemetryConfig.""" + return OTelTelemetryConfig( + service_name="test-service", + service_version="1.0.0", + deployment_environment="test", + span_processor="simple", + ) + + +@pytest.fixture +def otel_provider(otel_config, monkeypatch): + """Fixture providing an OTelTelemetryProvider instance.""" + monkeypatch.setenv("OTEL_EXPORTER_OTLP_ENDPOINT", "http://localhost:4318") + return OTelTelemetryProvider(config=otel_config) + + +class TestOTelProviderInitialization: + """Tests for OTel provider initialization and configuration.""" + + def test_provider_initializes_with_valid_config(self, otel_config, monkeypatch): + """Test that provider initializes correctly with valid configuration.""" + monkeypatch.setenv("OTEL_EXPORTER_OTLP_ENDPOINT", "http://localhost:4318") + + provider = OTelTelemetryProvider(config=otel_config) + + assert provider.config == otel_config + assert provider.config.service_name == "test-service" + assert provider.config.service_version == "1.0.0" + assert provider.config.deployment_environment == "test" + + def test_initialization_with_batch_processor(self, monkeypatch): + """Test initialization with batch span processor.""" + monkeypatch.setenv("OTEL_EXPORTER_OTLP_ENDPOINT", "http://localhost:4318") + config = OTelTelemetryConfig( + service_name="test-service", + service_version="1.0.0", + deployment_environment="test", + span_processor="batch", + ) + + provider = OTelTelemetryProvider(config=config) + + assert provider.config.span_processor == "batch" + + def test_warns_when_endpoints_missing(self, otel_config, monkeypatch, caplog): + """Test that warnings are issued when OTLP endpoints are not set.""" + monkeypatch.delenv("OTEL_EXPORTER_OTLP_ENDPOINT", raising=False) + monkeypatch.delenv("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT", raising=False) + monkeypatch.delenv("OTEL_EXPORTER_OTLP_METRICS_ENDPOINT", raising=False) + + OTelTelemetryProvider(config=otel_config) + + # Check that warnings were logged + assert any("Traces will not be exported" in record.message for record in caplog.records) + assert any("Metrics will not be exported" in record.message for record in caplog.records) + + +class TestOTelProviderMiddleware: + """Tests for FastAPI and SQLAlchemy instrumentation.""" + + def test_fastapi_middleware_can_be_applied(self, otel_provider): + """Test that fastapi_middleware can be called without errors.""" + mock_app = MagicMock() + + # Should not raise an exception + otel_provider.fastapi_middleware(mock_app) + + # Verify FastAPIInstrumentor was called (it patches the app) + # The actual instrumentation is tested in E2E tests + + def test_sqlalchemy_instrumentation_without_engine(self, otel_provider): + """ + Test that sqlalchemy_instrumentation can be called. + + Note: Testing with a real engine would require SQLAlchemy setup. + The actual instrumentation is tested when used with real databases. + """ + # Should not raise an exception + otel_provider.sqlalchemy_instrumentation() + + +class TestOTelProviderConfiguration: + """Tests for configuration and environment variable handling.""" + + def test_service_metadata_configuration(self, otel_provider): + """Test that service metadata is properly configured.""" + assert otel_provider.config.service_name == "test-service" + assert otel_provider.config.service_version == "1.0.0" + assert otel_provider.config.deployment_environment == "test" + + def test_span_processor_configuration(self, monkeypatch): + """Test different span processor configurations.""" + monkeypatch.setenv("OTEL_EXPORTER_OTLP_ENDPOINT", "http://localhost:4318") + + # Test simple processor + config_simple = OTelTelemetryConfig( + service_name="test", + span_processor="simple", + ) + provider_simple = OTelTelemetryProvider(config=config_simple) + assert provider_simple.config.span_processor == "simple" + + # Test batch processor + config_batch = OTelTelemetryConfig( + service_name="test", + span_processor="batch", + ) + provider_batch = OTelTelemetryProvider(config=config_batch) + assert provider_batch.config.span_processor == "batch" + + def test_sample_run_config_generation(self): + """Test that sample_run_config generates valid configuration.""" + sample_config = OTelTelemetryConfig.sample_run_config() + + assert "service_name" in sample_config + assert "span_processor" in sample_config + assert "${env.OTEL_SERVICE_NAME" in sample_config["service_name"] diff --git a/uv.lock b/uv.lock index c1cd7e71c..f25bba4a0 100644 --- a/uv.lock +++ b/uv.lock @@ -152,6 +152,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/81/29/5ecc3a15d5a33e31b26c11426c45c501e439cb865d0bff96315d86443b78/appnope-0.1.4-py2.py3-none-any.whl", hash = "sha256:502575ee11cd7a28c0205f379b525beefebab9d161b7c964670864014ed7213c", size = 4321, upload-time = "2024-02-06T09:43:09.663Z" }, ] +[[package]] +name = "asgiref" +version = "3.9.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7f/bf/0f3ecda32f1cb3bf1dca480aca08a7a8a3bdc4bed2343a103f30731565c9/asgiref-3.9.2.tar.gz", hash = "sha256:a0249afacb66688ef258ffe503528360443e2b9a8d8c4581b6ebefa58c841ef1", size = 36894, upload-time = "2025-09-23T15:00:55.136Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/d1/69d02ce34caddb0a7ae088b84c356a625a93cd4ff57b2f97644c03fad905/asgiref-3.9.2-py3-none-any.whl", hash = "sha256:0b61526596219d70396548fc003635056856dba5d0d086f86476f10b33c75960", size = 23788, upload-time = "2025-09-23T15:00:53.627Z" }, +] + [[package]] name = "asttokens" version = "3.0.0" @@ -1765,7 +1774,10 @@ dependencies = [ { name = "llama-stack-client" }, { name = "openai" }, { name = "opentelemetry-exporter-otlp-proto-http" }, + { name = "opentelemetry-instrumentation-fastapi" }, + { name = "opentelemetry-instrumentation-sqlalchemy" }, { name = "opentelemetry-sdk" }, + { name = "opentelemetry-semantic-conventions" }, { name = "pillow" }, { name = "prompt-toolkit" }, { name = "pydantic" }, @@ -1890,7 +1902,10 @@ requires-dist = [ { name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.2.23" }, { name = "openai", specifier = ">=1.107" }, { name = "opentelemetry-exporter-otlp-proto-http", specifier = ">=1.30.0" }, + { name = "opentelemetry-instrumentation-fastapi", specifier = ">=0.57b0" }, + { name = "opentelemetry-instrumentation-sqlalchemy", specifier = ">=0.57b0" }, { name = "opentelemetry-sdk", specifier = ">=1.30.0" }, + { name = "opentelemetry-semantic-conventions", specifier = ">=0.57b0" }, { name = "pandas", marker = "extra == 'ui'" }, { name = "pillow" }, { name = "prompt-toolkit" }, @@ -2696,6 +2711,69 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7f/41/a680d38b34f8f5ddbd78ed9f0042e1cc712d58ec7531924d71cb1e6c629d/opentelemetry_exporter_otlp_proto_http-1.36.0-py3-none-any.whl", hash = "sha256:3d769f68e2267e7abe4527f70deb6f598f40be3ea34c6adc35789bea94a32902", size = 18752, upload-time = "2025-07-29T15:11:53.164Z" }, ] +[[package]] +name = "opentelemetry-instrumentation" +version = "0.57b0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "packaging" }, + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/12/37/cf17cf28f945a3aca5a038cfbb45ee01317d4f7f3a0e5209920883fe9b08/opentelemetry_instrumentation-0.57b0.tar.gz", hash = "sha256:f2a30135ba77cdea2b0e1df272f4163c154e978f57214795d72f40befd4fcf05", size = 30807, upload-time = "2025-07-29T15:42:44.746Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d0/6f/f20cd1542959f43fb26a5bf9bb18cd81a1ea0700e8870c8f369bd07f5c65/opentelemetry_instrumentation-0.57b0-py3-none-any.whl", hash = "sha256:9109280f44882e07cec2850db28210b90600ae9110b42824d196de357cbddf7e", size = 32460, upload-time = "2025-07-29T15:41:40.883Z" }, +] + +[[package]] +name = "opentelemetry-instrumentation-asgi" +version = "0.57b0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "asgiref" }, + { name = "opentelemetry-api" }, + { name = "opentelemetry-instrumentation" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "opentelemetry-util-http" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/97/10/7ba59b586eb099fa0155521b387d857de476687c670096597f618d889323/opentelemetry_instrumentation_asgi-0.57b0.tar.gz", hash = "sha256:a6f880b5d1838f65688fc992c65fbb1d3571f319d370990c32e759d3160e510b", size = 24654, upload-time = "2025-07-29T15:42:48.199Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9e/07/ab97dd7e8bc680b479203f7d3b2771b7a097468135a669a38da3208f96cb/opentelemetry_instrumentation_asgi-0.57b0-py3-none-any.whl", hash = "sha256:47debbde6af066a7e8e911f7193730d5e40d62effc1ac2e1119908347790a3ea", size = 16599, upload-time = "2025-07-29T15:41:48.332Z" }, +] + +[[package]] +name = "opentelemetry-instrumentation-fastapi" +version = "0.57b0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-instrumentation" }, + { name = "opentelemetry-instrumentation-asgi" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "opentelemetry-util-http" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/47/a8/7c22a33ff5986523a7f9afcb5f4d749533842c3cc77ef55b46727580edd0/opentelemetry_instrumentation_fastapi-0.57b0.tar.gz", hash = "sha256:73ac22f3c472a8f9cb21d1fbe5a4bf2797690c295fff4a1c040e9b1b1688a105", size = 20277, upload-time = "2025-07-29T15:42:58.68Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/df/f20fc21c88c7af5311bfefc15fc4e606bab5edb7c193aa8c73c354904c35/opentelemetry_instrumentation_fastapi-0.57b0-py3-none-any.whl", hash = "sha256:61e6402749ffe0bfec582e58155e0d81dd38723cd9bc4562bca1acca80334006", size = 12712, upload-time = "2025-07-29T15:42:03.332Z" }, +] + +[[package]] +name = "opentelemetry-instrumentation-sqlalchemy" +version = "0.57b0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-instrumentation" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "packaging" }, + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9c/18/ee1460dcb044b25aaedd6cfd063304d84ae641dddb8fb9287959f7644100/opentelemetry_instrumentation_sqlalchemy-0.57b0.tar.gz", hash = "sha256:95667326b7cc22bb4bc9941f98ca22dd177679f9a4d277646cc21074c0d732ff", size = 14962, upload-time = "2025-07-29T15:43:12.426Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/94/18/af35650eb029d771b8d281bea770727f1e2f662c422c5ab1a0c2b7afc152/opentelemetry_instrumentation_sqlalchemy-0.57b0-py3-none-any.whl", hash = "sha256:8a1a815331cb04fc95aa7c50e9c681cdccfb12e1fa4522f079fe4b24753ae106", size = 14202, upload-time = "2025-07-29T15:42:25.828Z" }, +] + [[package]] name = "opentelemetry-proto" version = "1.36.0" @@ -2735,6 +2813,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/05/75/7d591371c6c39c73de5ce5da5a2cc7b72d1d1cd3f8f4638f553c01c37b11/opentelemetry_semantic_conventions-0.57b0-py3-none-any.whl", hash = "sha256:757f7e76293294f124c827e514c2a3144f191ef175b069ce8d1211e1e38e9e78", size = 201627, upload-time = "2025-07-29T15:12:04.174Z" }, ] +[[package]] +name = "opentelemetry-util-http" +version = "0.57b0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/9b/1b/6229c45445e08e798fa825f5376f6d6a4211d29052a4088eed6d577fa653/opentelemetry_util_http-0.57b0.tar.gz", hash = "sha256:f7417595ead0eb42ed1863ec9b2f839fc740368cd7bbbfc1d0a47bc1ab0aba11", size = 9405, upload-time = "2025-07-29T15:43:19.916Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0b/a6/b98d508d189b9c208f5978d0906141747d7e6df7c7cafec03657ed1ed559/opentelemetry_util_http-0.57b0-py3-none-any.whl", hash = "sha256:e54c0df5543951e471c3d694f85474977cd5765a3b7654398c83bab3d2ffb8e9", size = 7643, upload-time = "2025-07-29T15:42:41.744Z" }, +] + [[package]] name = "orjson" version = "3.11.1" @@ -4752,9 +4839,9 @@ dependencies = [ { name = "typing-extensions", marker = "sys_platform == 'darwin'" }, ] wheels = [ - { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0-cp312-none-macosx_11_0_arm64.whl" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0-cp313-cp313t-macosx_14_0_arm64.whl" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0-cp313-none-macosx_11_0_arm64.whl" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:a47b7986bee3f61ad217d8a8ce24605809ab425baf349f97de758815edd2ef54" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:fbe2e149c5174ef90d29a5f84a554dfaf28e003cb4f61fa2c8c024c17ec7ca58" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:057efd30a6778d2ee5e2374cd63a63f63311aa6f33321e627c655df60abdd390" }, ] [[package]] @@ -4777,19 +4864,19 @@ dependencies = [ { name = "typing-extensions", marker = "sys_platform != 'darwin'" }, ] wheels = [ - { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp312-cp312-linux_s390x.whl" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp312-cp312-manylinux_2_28_aarch64.whl" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp312-cp312-manylinux_2_28_x86_64.whl" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp312-cp312-win_amd64.whl" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp312-cp312-win_arm64.whl" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313-linux_s390x.whl" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313-manylinux_2_28_aarch64.whl" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313-manylinux_2_28_x86_64.whl" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313-win_amd64.whl" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313-win_arm64.whl" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313t-manylinux_2_28_aarch64.whl" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313t-manylinux_2_28_x86_64.whl" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313t-win_amd64.whl" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp312-cp312-linux_s390x.whl", hash = "sha256:0e34e276722ab7dd0dffa9e12fe2135a9b34a0e300c456ed7ad6430229404eb5" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:610f600c102386e581327d5efc18c0d6edecb9820b4140d26163354a99cd800d" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:cb9a8ba8137ab24e36bf1742cb79a1294bd374db570f09fc15a5e1318160db4e" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp312-cp312-win_amd64.whl", hash = "sha256:2be20b2c05a0cce10430cc25f32b689259640d273232b2de357c35729132256d" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp312-cp312-win_arm64.whl", hash = "sha256:99fc421a5d234580e45957a7b02effbf3e1c884a5dd077afc85352c77bf41434" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313-linux_s390x.whl", hash = "sha256:8b5882276633cf91fe3d2d7246c743b94d44a7e660b27f1308007fdb1bb89f7d" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:a5064b5e23772c8d164068cc7c12e01a75faf7b948ecd95a0d4007d7487e5f25" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:8f81dedb4c6076ec325acc3b47525f9c550e5284a18eae1d9061c543f7b6e7de" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313-win_amd64.whl", hash = "sha256:e1ee1b2346ade3ea90306dfbec7e8ff17bc220d344109d189ae09078333b0856" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313-win_arm64.whl", hash = "sha256:64c187345509f2b1bb334feed4666e2c781ca381874bde589182f81247e61f88" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:af81283ac671f434b1b25c95ba295f270e72db1fad48831eb5e4748ff9840041" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:a9dbb6f64f63258bc811e2c0c99640a81e5af93c531ad96e95c5ec777ea46dab" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313t-win_amd64.whl", hash = "sha256:6d93a7165419bc4b2b907e859ccab0dea5deeab261448ae9a5ec5431f14c0e64" }, ] [[package]] @@ -5234,6 +5321,55 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/52/24/ab44c871b0f07f491e5d2ad12c9bd7358e527510618cb1b803a88e986db1/werkzeug-3.1.3-py3-none-any.whl", hash = "sha256:54b78bf3716d19a65be4fceccc0d1d7b89e608834989dfae50ea87564639213e", size = 224498, upload-time = "2024-11-08T15:52:16.132Z" }, ] +[[package]] +name = "wrapt" +version = "1.17.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/95/8f/aeb76c5b46e273670962298c23e7ddde79916cb74db802131d49a85e4b7d/wrapt-1.17.3.tar.gz", hash = "sha256:f66eb08feaa410fe4eebd17f2a2c8e2e46d3476e9f8c783daa8e09e0faa666d0", size = 55547, upload-time = "2025-08-12T05:53:21.714Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9f/41/cad1aba93e752f1f9268c77270da3c469883d56e2798e7df6240dcb2287b/wrapt-1.17.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:ab232e7fdb44cdfbf55fc3afa31bcdb0d8980b9b95c38b6405df2acb672af0e0", size = 53998, upload-time = "2025-08-12T05:51:47.138Z" }, + { url = "https://files.pythonhosted.org/packages/60/f8/096a7cc13097a1869fe44efe68dace40d2a16ecb853141394047f0780b96/wrapt-1.17.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:9baa544e6acc91130e926e8c802a17f3b16fbea0fd441b5a60f5cf2cc5c3deba", size = 39020, upload-time = "2025-08-12T05:51:35.906Z" }, + { url = "https://files.pythonhosted.org/packages/33/df/bdf864b8997aab4febb96a9ae5c124f700a5abd9b5e13d2a3214ec4be705/wrapt-1.17.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6b538e31eca1a7ea4605e44f81a48aa24c4632a277431a6ed3f328835901f4fd", size = 39098, upload-time = "2025-08-12T05:51:57.474Z" }, + { url = "https://files.pythonhosted.org/packages/9f/81/5d931d78d0eb732b95dc3ddaeeb71c8bb572fb01356e9133916cd729ecdd/wrapt-1.17.3-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:042ec3bb8f319c147b1301f2393bc19dba6e176b7da446853406d041c36c7828", size = 88036, upload-time = "2025-08-12T05:52:34.784Z" }, + { url = "https://files.pythonhosted.org/packages/ca/38/2e1785df03b3d72d34fc6252d91d9d12dc27a5c89caef3335a1bbb8908ca/wrapt-1.17.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3af60380ba0b7b5aeb329bc4e402acd25bd877e98b3727b0135cb5c2efdaefe9", size = 88156, upload-time = "2025-08-12T05:52:13.599Z" }, + { url = "https://files.pythonhosted.org/packages/b3/8b/48cdb60fe0603e34e05cffda0b2a4adab81fd43718e11111a4b0100fd7c1/wrapt-1.17.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0b02e424deef65c9f7326d8c19220a2c9040c51dc165cddb732f16198c168396", size = 87102, upload-time = "2025-08-12T05:52:14.56Z" }, + { url = "https://files.pythonhosted.org/packages/3c/51/d81abca783b58f40a154f1b2c56db1d2d9e0d04fa2d4224e357529f57a57/wrapt-1.17.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:74afa28374a3c3a11b3b5e5fca0ae03bef8450d6aa3ab3a1e2c30e3a75d023dc", size = 87732, upload-time = "2025-08-12T05:52:36.165Z" }, + { url = "https://files.pythonhosted.org/packages/9e/b1/43b286ca1392a006d5336412d41663eeef1ad57485f3e52c767376ba7e5a/wrapt-1.17.3-cp312-cp312-win32.whl", hash = "sha256:4da9f45279fff3543c371d5ababc57a0384f70be244de7759c85a7f989cb4ebe", size = 36705, upload-time = "2025-08-12T05:53:07.123Z" }, + { url = "https://files.pythonhosted.org/packages/28/de/49493f962bd3c586ab4b88066e967aa2e0703d6ef2c43aa28cb83bf7b507/wrapt-1.17.3-cp312-cp312-win_amd64.whl", hash = "sha256:e71d5c6ebac14875668a1e90baf2ea0ef5b7ac7918355850c0908ae82bcb297c", size = 38877, upload-time = "2025-08-12T05:53:05.436Z" }, + { url = "https://files.pythonhosted.org/packages/f1/48/0f7102fe9cb1e8a5a77f80d4f0956d62d97034bbe88d33e94699f99d181d/wrapt-1.17.3-cp312-cp312-win_arm64.whl", hash = "sha256:604d076c55e2fdd4c1c03d06dc1a31b95130010517b5019db15365ec4a405fc6", size = 36885, upload-time = "2025-08-12T05:52:54.367Z" }, + { url = "https://files.pythonhosted.org/packages/fc/f6/759ece88472157acb55fc195e5b116e06730f1b651b5b314c66291729193/wrapt-1.17.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a47681378a0439215912ef542c45a783484d4dd82bac412b71e59cf9c0e1cea0", size = 54003, upload-time = "2025-08-12T05:51:48.627Z" }, + { url = "https://files.pythonhosted.org/packages/4f/a9/49940b9dc6d47027dc850c116d79b4155f15c08547d04db0f07121499347/wrapt-1.17.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:54a30837587c6ee3cd1a4d1c2ec5d24e77984d44e2f34547e2323ddb4e22eb77", size = 39025, upload-time = "2025-08-12T05:51:37.156Z" }, + { url = "https://files.pythonhosted.org/packages/45/35/6a08de0f2c96dcdd7fe464d7420ddb9a7655a6561150e5fc4da9356aeaab/wrapt-1.17.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:16ecf15d6af39246fe33e507105d67e4b81d8f8d2c6598ff7e3ca1b8a37213f7", size = 39108, upload-time = "2025-08-12T05:51:58.425Z" }, + { url = "https://files.pythonhosted.org/packages/0c/37/6faf15cfa41bf1f3dba80cd3f5ccc6622dfccb660ab26ed79f0178c7497f/wrapt-1.17.3-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:6fd1ad24dc235e4ab88cda009e19bf347aabb975e44fd5c2fb22a3f6e4141277", size = 88072, upload-time = "2025-08-12T05:52:37.53Z" }, + { url = "https://files.pythonhosted.org/packages/78/f2/efe19ada4a38e4e15b6dff39c3e3f3f73f5decf901f66e6f72fe79623a06/wrapt-1.17.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0ed61b7c2d49cee3c027372df5809a59d60cf1b6c2f81ee980a091f3afed6a2d", size = 88214, upload-time = "2025-08-12T05:52:15.886Z" }, + { url = "https://files.pythonhosted.org/packages/40/90/ca86701e9de1622b16e09689fc24b76f69b06bb0150990f6f4e8b0eeb576/wrapt-1.17.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:423ed5420ad5f5529db9ce89eac09c8a2f97da18eb1c870237e84c5a5c2d60aa", size = 87105, upload-time = "2025-08-12T05:52:17.914Z" }, + { url = "https://files.pythonhosted.org/packages/fd/e0/d10bd257c9a3e15cbf5523025252cc14d77468e8ed644aafb2d6f54cb95d/wrapt-1.17.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e01375f275f010fcbf7f643b4279896d04e571889b8a5b3f848423d91bf07050", size = 87766, upload-time = "2025-08-12T05:52:39.243Z" }, + { url = "https://files.pythonhosted.org/packages/e8/cf/7d848740203c7b4b27eb55dbfede11aca974a51c3d894f6cc4b865f42f58/wrapt-1.17.3-cp313-cp313-win32.whl", hash = "sha256:53e5e39ff71b3fc484df8a522c933ea2b7cdd0d5d15ae82e5b23fde87d44cbd8", size = 36711, upload-time = "2025-08-12T05:53:10.074Z" }, + { url = "https://files.pythonhosted.org/packages/57/54/35a84d0a4d23ea675994104e667ceff49227ce473ba6a59ba2c84f250b74/wrapt-1.17.3-cp313-cp313-win_amd64.whl", hash = "sha256:1f0b2f40cf341ee8cc1a97d51ff50dddb9fcc73241b9143ec74b30fc4f44f6cb", size = 38885, upload-time = "2025-08-12T05:53:08.695Z" }, + { url = "https://files.pythonhosted.org/packages/01/77/66e54407c59d7b02a3c4e0af3783168fff8e5d61def52cda8728439d86bc/wrapt-1.17.3-cp313-cp313-win_arm64.whl", hash = "sha256:7425ac3c54430f5fc5e7b6f41d41e704db073309acfc09305816bc6a0b26bb16", size = 36896, upload-time = "2025-08-12T05:52:55.34Z" }, + { url = "https://files.pythonhosted.org/packages/02/a2/cd864b2a14f20d14f4c496fab97802001560f9f41554eef6df201cd7f76c/wrapt-1.17.3-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:cf30f6e3c077c8e6a9a7809c94551203c8843e74ba0c960f4a98cd80d4665d39", size = 54132, upload-time = "2025-08-12T05:51:49.864Z" }, + { url = "https://files.pythonhosted.org/packages/d5/46/d011725b0c89e853dc44cceb738a307cde5d240d023d6d40a82d1b4e1182/wrapt-1.17.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:e228514a06843cae89621384cfe3a80418f3c04aadf8a3b14e46a7be704e4235", size = 39091, upload-time = "2025-08-12T05:51:38.935Z" }, + { url = "https://files.pythonhosted.org/packages/2e/9e/3ad852d77c35aae7ddebdbc3b6d35ec8013af7d7dddad0ad911f3d891dae/wrapt-1.17.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:5ea5eb3c0c071862997d6f3e02af1d055f381b1d25b286b9d6644b79db77657c", size = 39172, upload-time = "2025-08-12T05:51:59.365Z" }, + { url = "https://files.pythonhosted.org/packages/c3/f7/c983d2762bcce2326c317c26a6a1e7016f7eb039c27cdf5c4e30f4160f31/wrapt-1.17.3-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:281262213373b6d5e4bb4353bc36d1ba4084e6d6b5d242863721ef2bf2c2930b", size = 87163, upload-time = "2025-08-12T05:52:40.965Z" }, + { url = "https://files.pythonhosted.org/packages/e4/0f/f673f75d489c7f22d17fe0193e84b41540d962f75fce579cf6873167c29b/wrapt-1.17.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dc4a8d2b25efb6681ecacad42fca8859f88092d8732b170de6a5dddd80a1c8fa", size = 87963, upload-time = "2025-08-12T05:52:20.326Z" }, + { url = "https://files.pythonhosted.org/packages/df/61/515ad6caca68995da2fac7a6af97faab8f78ebe3bf4f761e1b77efbc47b5/wrapt-1.17.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:373342dd05b1d07d752cecbec0c41817231f29f3a89aa8b8843f7b95992ed0c7", size = 86945, upload-time = "2025-08-12T05:52:21.581Z" }, + { url = "https://files.pythonhosted.org/packages/d3/bd/4e70162ce398462a467bc09e768bee112f1412e563620adc353de9055d33/wrapt-1.17.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d40770d7c0fd5cbed9d84b2c3f2e156431a12c9a37dc6284060fb4bec0b7ffd4", size = 86857, upload-time = "2025-08-12T05:52:43.043Z" }, + { url = "https://files.pythonhosted.org/packages/2b/b8/da8560695e9284810b8d3df8a19396a6e40e7518059584a1a394a2b35e0a/wrapt-1.17.3-cp314-cp314-win32.whl", hash = "sha256:fbd3c8319de8e1dc79d346929cd71d523622da527cca14e0c1d257e31c2b8b10", size = 37178, upload-time = "2025-08-12T05:53:12.605Z" }, + { url = "https://files.pythonhosted.org/packages/db/c8/b71eeb192c440d67a5a0449aaee2310a1a1e8eca41676046f99ed2487e9f/wrapt-1.17.3-cp314-cp314-win_amd64.whl", hash = "sha256:e1a4120ae5705f673727d3253de3ed0e016f7cd78dc463db1b31e2463e1f3cf6", size = 39310, upload-time = "2025-08-12T05:53:11.106Z" }, + { url = "https://files.pythonhosted.org/packages/45/20/2cda20fd4865fa40f86f6c46ed37a2a8356a7a2fde0773269311f2af56c7/wrapt-1.17.3-cp314-cp314-win_arm64.whl", hash = "sha256:507553480670cab08a800b9463bdb881b2edeed77dc677b0a5915e6106e91a58", size = 37266, upload-time = "2025-08-12T05:52:56.531Z" }, + { url = "https://files.pythonhosted.org/packages/77/ed/dd5cf21aec36c80443c6f900449260b80e2a65cf963668eaef3b9accce36/wrapt-1.17.3-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:ed7c635ae45cfbc1a7371f708727bf74690daedc49b4dba310590ca0bd28aa8a", size = 56544, upload-time = "2025-08-12T05:51:51.109Z" }, + { url = "https://files.pythonhosted.org/packages/8d/96/450c651cc753877ad100c7949ab4d2e2ecc4d97157e00fa8f45df682456a/wrapt-1.17.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:249f88ed15503f6492a71f01442abddd73856a0032ae860de6d75ca62eed8067", size = 40283, upload-time = "2025-08-12T05:51:39.912Z" }, + { url = "https://files.pythonhosted.org/packages/d1/86/2fcad95994d9b572db57632acb6f900695a648c3e063f2cd344b3f5c5a37/wrapt-1.17.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:5a03a38adec8066d5a37bea22f2ba6bbf39fcdefbe2d91419ab864c3fb515454", size = 40366, upload-time = "2025-08-12T05:52:00.693Z" }, + { url = "https://files.pythonhosted.org/packages/64/0e/f4472f2fdde2d4617975144311f8800ef73677a159be7fe61fa50997d6c0/wrapt-1.17.3-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:5d4478d72eb61c36e5b446e375bbc49ed002430d17cdec3cecb36993398e1a9e", size = 108571, upload-time = "2025-08-12T05:52:44.521Z" }, + { url = "https://files.pythonhosted.org/packages/cc/01/9b85a99996b0a97c8a17484684f206cbb6ba73c1ce6890ac668bcf3838fb/wrapt-1.17.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:223db574bb38637e8230eb14b185565023ab624474df94d2af18f1cdb625216f", size = 113094, upload-time = "2025-08-12T05:52:22.618Z" }, + { url = "https://files.pythonhosted.org/packages/25/02/78926c1efddcc7b3aa0bc3d6b33a822f7d898059f7cd9ace8c8318e559ef/wrapt-1.17.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e405adefb53a435f01efa7ccdec012c016b5a1d3f35459990afc39b6be4d5056", size = 110659, upload-time = "2025-08-12T05:52:24.057Z" }, + { url = "https://files.pythonhosted.org/packages/dc/ee/c414501ad518ac3e6fe184753632fe5e5ecacdcf0effc23f31c1e4f7bfcf/wrapt-1.17.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:88547535b787a6c9ce4086917b6e1d291aa8ed914fdd3a838b3539dc95c12804", size = 106946, upload-time = "2025-08-12T05:52:45.976Z" }, + { url = "https://files.pythonhosted.org/packages/be/44/a1bd64b723d13bb151d6cc91b986146a1952385e0392a78567e12149c7b4/wrapt-1.17.3-cp314-cp314t-win32.whl", hash = "sha256:41b1d2bc74c2cac6f9074df52b2efbef2b30bdfe5f40cb78f8ca22963bc62977", size = 38717, upload-time = "2025-08-12T05:53:15.214Z" }, + { url = "https://files.pythonhosted.org/packages/79/d9/7cfd5a312760ac4dd8bf0184a6ee9e43c33e47f3dadc303032ce012b8fa3/wrapt-1.17.3-cp314-cp314t-win_amd64.whl", hash = "sha256:73d496de46cd2cdbdbcce4ae4bcdb4afb6a11234a1df9c085249d55166b95116", size = 41334, upload-time = "2025-08-12T05:53:14.178Z" }, + { url = "https://files.pythonhosted.org/packages/46/78/10ad9781128ed2f99dbc474f43283b13fea8ba58723e98844367531c18e9/wrapt-1.17.3-cp314-cp314t-win_arm64.whl", hash = "sha256:f38e60678850c42461d4202739f9bf1e3a737c7ad283638251e79cc49effb6b6", size = 38471, upload-time = "2025-08-12T05:52:57.784Z" }, + { url = "https://files.pythonhosted.org/packages/1f/f6/a933bd70f98e9cf3e08167fc5cd7aaaca49147e48411c0bd5ae701bb2194/wrapt-1.17.3-py3-none-any.whl", hash = "sha256:7171ae35d2c33d326ac19dd8facb1e82e5fd04ef8c6c0e394d7af55a55051c22", size = 23591, upload-time = "2025-08-12T05:53:20.674Z" }, +] + [[package]] name = "wsproto" version = "1.2.0"