fix(telemetry): otel config fix and tests

This commit is contained in:
Emilio Garcia 2025-09-17 17:21:16 -04:00
parent ce77c27ff8
commit 47d33a3555
5 changed files with 128 additions and 12 deletions

View file

@ -14,7 +14,7 @@ Meta's reference implementation of telemetry and observability using OpenTelemet
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `otel_exporter_otlp_endpoint` | `str \| None` | No | | The OpenTelemetry collector endpoint URL (base URL for traces, metrics, and logs). If not set, the SDK will use OTEL_EXPORTER_OTLP_ENDPOINT environment variable. |
| `otel_exporter_otlp_endpoint` | `str \| None` | No | | Deprecated.Please set the exporter using open telemetry environment variables instead. See https://opentelemetry.io/docs/languages/sdk-configuration/otlp-exporter/. |
| `service_name` | `<class 'str'>` | No | | The service name to use for telemetry |
| `sinks` | `list[inline.telemetry.meta_reference.config.TelemetrySink` | No | [&lt;TelemetrySink.SQLITE: 'sqlite'&gt;] | List of telemetry sinks to enable (possible values: otel_trace, otel_metric, sqlite, console) |
| `sqlite_db_path` | `<class 'str'>` | No | ~/.llama/runtime/trace_store.db | The path to the SQLite database to use for storing traces |

View file

@ -22,7 +22,8 @@ class TelemetrySink(StrEnum):
class TelemetryConfig(BaseModel):
otel_exporter_otlp_endpoint: str | None = Field(
default=None,
description="The OpenTelemetry collector endpoint URL (base URL for traces, metrics, and logs). If not set, the SDK will use OTEL_EXPORTER_OTLP_ENDPOINT environment variable.",
deprecated=True,
description="Deprecated.Please set the exporter using open telemetry environment variables instead. See https://opentelemetry.io/docs/languages/sdk-configuration/otlp-exporter/.",
)
service_name: str = Field(
# service name is always the same, use zero-width space to avoid clutter

View file

@ -5,6 +5,7 @@
# the root directory of this source tree.
import datetime
import os
import threading
from typing import Any
@ -93,28 +94,36 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry):
# Use single OTLP endpoint for all telemetry signals
if TelemetrySink.OTEL_TRACE in self.config.sinks or TelemetrySink.OTEL_METRIC in self.config.sinks:
if self.config.otel_exporter_otlp_endpoint is None:
raise ValueError(
"otel_exporter_otlp_endpoint is required when OTEL_TRACE or OTEL_METRIC is enabled"
)
# Let OpenTelemetry SDK handle endpoint construction automatically
# The SDK will read OTEL_EXPORTER_OTLP_ENDPOINT and construct appropriate URLs
# https://opentelemetry.io/docs/languages/sdk-configuration/otlp-exporter
if TelemetrySink.OTEL_TRACE in self.config.sinks:
if not os.environ.get("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT") or not os.environ.get(
"OTEL_EXPORTER_OTLP_ENDPOINT"
):
logger.warning(
"OTEL_EXPORTER_OTLP_TRACES_ENDPOINT or OTEL_EXPORTER_OTLP_ENDPOINT is not set. Traces will not be exported."
)
span_exporter = OTLPSpanExporter()
span_processor = BatchSpanProcessor(span_exporter)
trace.get_tracer_provider().add_span_processor(span_processor)
provider.add_span_processor(span_processor)
if TelemetrySink.OTEL_METRIC in self.config.sinks:
metric_reader = PeriodicExportingMetricReader(OTLPMetricExporter())
if not os.environ.get("OTEL_EXPORTER_OTLP_METRICS_ENDPOINT") or not os.environ.get(
"OTEL_EXPORTER_OTLP_ENDPOINT"
):
logger.warning(
"OTEL_EXPORTER_OTLP_METRICS_ENDPOINT or OTEL_EXPORTER_OTLP_ENDPOINT is not set. Metrics will not be exported."
)
metric_exporter = OTLPMetricExporter()
metric_reader = PeriodicExportingMetricReader(metric_exporter)
metric_provider = MeterProvider(resource=resource, metric_readers=[metric_reader])
metrics.set_meter_provider(metric_provider)
if TelemetrySink.SQLITE in self.config.sinks:
trace.get_tracer_provider().add_span_processor(SQLiteSpanProcessor(self.config.sqlite_db_path))
provider.add_span_processor(SQLiteSpanProcessor(self.config.sqlite_db_path))
if TelemetrySink.CONSOLE in self.config.sinks:
trace.get_tracer_provider().add_span_processor(ConsoleSpanProcessor(print_attributes=True))
provider.add_span_processor(ConsoleSpanProcessor(print_attributes=True))
if TelemetrySink.OTEL_METRIC in self.config.sinks:
self.meter = metrics.get_meter(__name__)
@ -127,7 +136,8 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry):
pass
async def shutdown(self) -> None:
trace.get_tracer_provider().force_flush()
if isinstance(_TRACER_PROVIDER, TracerProvider):
_TRACER_PROVIDER.force_flush()
async def log_event(self, event: Event, ttl_seconds: int = 604800) -> None:
if isinstance(event, UnstructuredLogEvent):

View file

@ -0,0 +1,5 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.

View file

@ -0,0 +1,100 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import logging
import pytest
import llama_stack.providers.inline.telemetry.meta_reference.telemetry as telemetry_module
from llama_stack.log import get_logger
from llama_stack.providers.inline.telemetry.meta_reference.config import (
TelemetryConfig,
TelemetrySink,
)
logger = get_logger(name=__name__, category="telemetry_test_meta_reference")
def _reset_provider(monkeypatch: pytest.MonkeyPatch) -> None:
# Ensure the telemetry module re-runs initialization code that emits warnings
monkeypatch.setattr(telemetry_module, "_TRACER_PROVIDER", None, raising=False)
def _make_config_with_sinks(*sinks: TelemetrySink) -> TelemetryConfig:
return TelemetryConfig(sinks=list(sinks))
def _otel_logger_records(caplog: pytest.LogCaptureFixture):
module_logger_name = "llama_stack.providers.inline.telemetry.meta_reference.telemetry"
return [r for r in caplog.records if r.name == module_logger_name]
def test_warns_when_traces_endpoints_missing(monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture):
_reset_provider(monkeypatch)
# Remove both endpoints to simulate incorrect configuration
monkeypatch.delenv("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT", raising=False)
monkeypatch.delenv("OTEL_EXPORTER_OTLP_ENDPOINT", raising=False)
caplog.set_level(logging.WARNING)
config = _make_config_with_sinks(TelemetrySink.OTEL_TRACE)
telemetry_module.TelemetryAdapter(config=config, deps={})
messages = [r.getMessage() for r in _otel_logger_records(caplog)]
assert any(
"OTEL_EXPORTER_OTLP_TRACES_ENDPOINT or OTEL_EXPORTER_OTLP_ENDPOINT is not set. Traces will not be exported."
in m
for m in messages
)
def test_warns_when_metrics_endpoints_missing(monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture):
_reset_provider(monkeypatch)
# Remove both endpoints to simulate incorrect configuration
monkeypatch.delenv("OTEL_EXPORTER_OTLP_METRICS_ENDPOINT", raising=False)
monkeypatch.delenv("OTEL_EXPORTER_OTLP_ENDPOINT", raising=False)
caplog.set_level(logging.WARNING)
config = _make_config_with_sinks(TelemetrySink.OTEL_METRIC)
telemetry_module.TelemetryAdapter(config=config, deps={})
messages = [r.getMessage() for r in _otel_logger_records(caplog)]
assert any(
"OTEL_EXPORTER_OTLP_METRICS_ENDPOINT or OTEL_EXPORTER_OTLP_ENDPOINT is not set. Metrics will not be exported."
in m
for m in messages
)
def test_no_warning_when_traces_endpoints_present(monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture):
_reset_provider(monkeypatch)
# Both must be present per current implementation to avoid warnings
monkeypatch.setenv("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT", "https://otel.example:4318/v1/traces")
monkeypatch.setenv("OTEL_EXPORTER_OTLP_ENDPOINT", "https://otel.example:4318")
caplog.set_level(logging.WARNING)
config = _make_config_with_sinks(TelemetrySink.OTEL_TRACE)
telemetry_module.TelemetryAdapter(config=config, deps={})
messages = [r.getMessage() for r in _otel_logger_records(caplog)]
assert not any("Traces will not be exported." in m for m in messages)
def test_no_warning_when_metrics_endpoints_present(monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture):
_reset_provider(monkeypatch)
# Both must be present per current implementation to avoid warnings
monkeypatch.setenv("OTEL_EXPORTER_OTLP_METRICS_ENDPOINT", "https://otel.example:4318/v1/metrics")
monkeypatch.setenv("OTEL_EXPORTER_OTLP_ENDPOINT", "https://otel.example:4318")
caplog.set_level(logging.WARNING)
config = _make_config_with_sinks(TelemetrySink.OTEL_METRIC)
telemetry_module.TelemetryAdapter(config=config, deps={})
messages = [r.getMessage() for r in _otel_logger_records(caplog)]
assert not any("Metrics will not be exported." in m for m in messages)