From 47d33a355561f6e9ca319c0f08ecfe460f8e4141 Mon Sep 17 00:00:00 2001 From: Emilio Garcia Date: Wed, 17 Sep 2025 17:21:16 -0400 Subject: [PATCH] fix(telemetry): otel config fix and tests --- .../telemetry/inline_meta-reference.mdx | 2 +- .../inline/telemetry/meta_reference/config.py | 3 +- .../telemetry/meta_reference/telemetry.py | 30 ++++-- tests/unit/providers/telemetry/__init__.py | 5 + .../providers/telemetry/meta_reference.py | 100 ++++++++++++++++++ 5 files changed, 128 insertions(+), 12 deletions(-) create mode 100644 tests/unit/providers/telemetry/__init__.py create mode 100644 tests/unit/providers/telemetry/meta_reference.py diff --git a/docs/docs/providers/telemetry/inline_meta-reference.mdx b/docs/docs/providers/telemetry/inline_meta-reference.mdx index ea2a690b3..bed9c98df 100644 --- a/docs/docs/providers/telemetry/inline_meta-reference.mdx +++ b/docs/docs/providers/telemetry/inline_meta-reference.mdx @@ -14,7 +14,7 @@ Meta's reference implementation of telemetry and observability using OpenTelemet | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `otel_exporter_otlp_endpoint` | `str \| None` | No | | The OpenTelemetry collector endpoint URL (base URL for traces, metrics, and logs). If not set, the SDK will use OTEL_EXPORTER_OTLP_ENDPOINT environment variable. | +| `otel_exporter_otlp_endpoint` | `str \| None` | No | | Deprecated.Please set the exporter using open telemetry environment variables instead. See https://opentelemetry.io/docs/languages/sdk-configuration/otlp-exporter/. | | `service_name` | `` | No | ​ | The service name to use for telemetry | | `sinks` | `list[inline.telemetry.meta_reference.config.TelemetrySink` | No | [<TelemetrySink.SQLITE: 'sqlite'>] | List of telemetry sinks to enable (possible values: otel_trace, otel_metric, sqlite, console) | | `sqlite_db_path` | `` | No | ~/.llama/runtime/trace_store.db | The path to the SQLite database to use for storing traces | diff --git a/llama_stack/providers/inline/telemetry/meta_reference/config.py b/llama_stack/providers/inline/telemetry/meta_reference/config.py index 06420c671..bec46324b 100644 --- a/llama_stack/providers/inline/telemetry/meta_reference/config.py +++ b/llama_stack/providers/inline/telemetry/meta_reference/config.py @@ -22,7 +22,8 @@ class TelemetrySink(StrEnum): class TelemetryConfig(BaseModel): otel_exporter_otlp_endpoint: str | None = Field( default=None, - description="The OpenTelemetry collector endpoint URL (base URL for traces, metrics, and logs). If not set, the SDK will use OTEL_EXPORTER_OTLP_ENDPOINT environment variable.", + deprecated=True, + description="Deprecated.Please set the exporter using open telemetry environment variables instead. See https://opentelemetry.io/docs/languages/sdk-configuration/otlp-exporter/.", ) service_name: str = Field( # service name is always the same, use zero-width space to avoid clutter diff --git a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py index 4d30cbba3..e6428755c 100644 --- a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +++ b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py @@ -5,6 +5,7 @@ # the root directory of this source tree. import datetime +import os import threading from typing import Any @@ -93,28 +94,36 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry): # Use single OTLP endpoint for all telemetry signals if TelemetrySink.OTEL_TRACE in self.config.sinks or TelemetrySink.OTEL_METRIC in self.config.sinks: - if self.config.otel_exporter_otlp_endpoint is None: - raise ValueError( - "otel_exporter_otlp_endpoint is required when OTEL_TRACE or OTEL_METRIC is enabled" - ) - # Let OpenTelemetry SDK handle endpoint construction automatically # The SDK will read OTEL_EXPORTER_OTLP_ENDPOINT and construct appropriate URLs # https://opentelemetry.io/docs/languages/sdk-configuration/otlp-exporter if TelemetrySink.OTEL_TRACE in self.config.sinks: + if not os.environ.get("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT") or not os.environ.get( + "OTEL_EXPORTER_OTLP_ENDPOINT" + ): + logger.warning( + "OTEL_EXPORTER_OTLP_TRACES_ENDPOINT or OTEL_EXPORTER_OTLP_ENDPOINT is not set. Traces will not be exported." + ) span_exporter = OTLPSpanExporter() span_processor = BatchSpanProcessor(span_exporter) - trace.get_tracer_provider().add_span_processor(span_processor) + provider.add_span_processor(span_processor) if TelemetrySink.OTEL_METRIC in self.config.sinks: - metric_reader = PeriodicExportingMetricReader(OTLPMetricExporter()) + if not os.environ.get("OTEL_EXPORTER_OTLP_METRICS_ENDPOINT") or not os.environ.get( + "OTEL_EXPORTER_OTLP_ENDPOINT" + ): + logger.warning( + "OTEL_EXPORTER_OTLP_METRICS_ENDPOINT or OTEL_EXPORTER_OTLP_ENDPOINT is not set. Metrics will not be exported." + ) + metric_exporter = OTLPMetricExporter() + metric_reader = PeriodicExportingMetricReader(metric_exporter) metric_provider = MeterProvider(resource=resource, metric_readers=[metric_reader]) metrics.set_meter_provider(metric_provider) if TelemetrySink.SQLITE in self.config.sinks: - trace.get_tracer_provider().add_span_processor(SQLiteSpanProcessor(self.config.sqlite_db_path)) + provider.add_span_processor(SQLiteSpanProcessor(self.config.sqlite_db_path)) if TelemetrySink.CONSOLE in self.config.sinks: - trace.get_tracer_provider().add_span_processor(ConsoleSpanProcessor(print_attributes=True)) + provider.add_span_processor(ConsoleSpanProcessor(print_attributes=True)) if TelemetrySink.OTEL_METRIC in self.config.sinks: self.meter = metrics.get_meter(__name__) @@ -127,7 +136,8 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry): pass async def shutdown(self) -> None: - trace.get_tracer_provider().force_flush() + if isinstance(_TRACER_PROVIDER, TracerProvider): + _TRACER_PROVIDER.force_flush() async def log_event(self, event: Event, ttl_seconds: int = 604800) -> None: if isinstance(event, UnstructuredLogEvent): diff --git a/tests/unit/providers/telemetry/__init__.py b/tests/unit/providers/telemetry/__init__.py new file mode 100644 index 000000000..756f351d8 --- /dev/null +++ b/tests/unit/providers/telemetry/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. diff --git a/tests/unit/providers/telemetry/meta_reference.py b/tests/unit/providers/telemetry/meta_reference.py new file mode 100644 index 000000000..26146e133 --- /dev/null +++ b/tests/unit/providers/telemetry/meta_reference.py @@ -0,0 +1,100 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import logging + +import pytest + +import llama_stack.providers.inline.telemetry.meta_reference.telemetry as telemetry_module +from llama_stack.log import get_logger +from llama_stack.providers.inline.telemetry.meta_reference.config import ( + TelemetryConfig, + TelemetrySink, +) + +logger = get_logger(name=__name__, category="telemetry_test_meta_reference") + + +def _reset_provider(monkeypatch: pytest.MonkeyPatch) -> None: + # Ensure the telemetry module re-runs initialization code that emits warnings + monkeypatch.setattr(telemetry_module, "_TRACER_PROVIDER", None, raising=False) + + +def _make_config_with_sinks(*sinks: TelemetrySink) -> TelemetryConfig: + return TelemetryConfig(sinks=list(sinks)) + + +def _otel_logger_records(caplog: pytest.LogCaptureFixture): + module_logger_name = "llama_stack.providers.inline.telemetry.meta_reference.telemetry" + return [r for r in caplog.records if r.name == module_logger_name] + + +def test_warns_when_traces_endpoints_missing(monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture): + _reset_provider(monkeypatch) + # Remove both endpoints to simulate incorrect configuration + monkeypatch.delenv("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT", raising=False) + monkeypatch.delenv("OTEL_EXPORTER_OTLP_ENDPOINT", raising=False) + + caplog.set_level(logging.WARNING) + + config = _make_config_with_sinks(TelemetrySink.OTEL_TRACE) + telemetry_module.TelemetryAdapter(config=config, deps={}) + + messages = [r.getMessage() for r in _otel_logger_records(caplog)] + assert any( + "OTEL_EXPORTER_OTLP_TRACES_ENDPOINT or OTEL_EXPORTER_OTLP_ENDPOINT is not set. Traces will not be exported." + in m + for m in messages + ) + + +def test_warns_when_metrics_endpoints_missing(monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture): + _reset_provider(monkeypatch) + # Remove both endpoints to simulate incorrect configuration + monkeypatch.delenv("OTEL_EXPORTER_OTLP_METRICS_ENDPOINT", raising=False) + monkeypatch.delenv("OTEL_EXPORTER_OTLP_ENDPOINT", raising=False) + + caplog.set_level(logging.WARNING) + + config = _make_config_with_sinks(TelemetrySink.OTEL_METRIC) + telemetry_module.TelemetryAdapter(config=config, deps={}) + + messages = [r.getMessage() for r in _otel_logger_records(caplog)] + assert any( + "OTEL_EXPORTER_OTLP_METRICS_ENDPOINT or OTEL_EXPORTER_OTLP_ENDPOINT is not set. Metrics will not be exported." + in m + for m in messages + ) + + +def test_no_warning_when_traces_endpoints_present(monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture): + _reset_provider(monkeypatch) + # Both must be present per current implementation to avoid warnings + monkeypatch.setenv("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT", "https://otel.example:4318/v1/traces") + monkeypatch.setenv("OTEL_EXPORTER_OTLP_ENDPOINT", "https://otel.example:4318") + + caplog.set_level(logging.WARNING) + + config = _make_config_with_sinks(TelemetrySink.OTEL_TRACE) + telemetry_module.TelemetryAdapter(config=config, deps={}) + + messages = [r.getMessage() for r in _otel_logger_records(caplog)] + assert not any("Traces will not be exported." in m for m in messages) + + +def test_no_warning_when_metrics_endpoints_present(monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture): + _reset_provider(monkeypatch) + # Both must be present per current implementation to avoid warnings + monkeypatch.setenv("OTEL_EXPORTER_OTLP_METRICS_ENDPOINT", "https://otel.example:4318/v1/metrics") + monkeypatch.setenv("OTEL_EXPORTER_OTLP_ENDPOINT", "https://otel.example:4318") + + caplog.set_level(logging.WARNING) + + config = _make_config_with_sinks(TelemetrySink.OTEL_METRIC) + telemetry_module.TelemetryAdapter(config=config, deps={}) + + messages = [r.getMessage() for r in _otel_logger_records(caplog)] + assert not any("Metrics will not be exported." in m for m in messages)