mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-03 19:57:35 +00:00
fix(telemetry): otel config fix and tests
This commit is contained in:
parent
ce77c27ff8
commit
47d33a3555
5 changed files with 128 additions and 12 deletions
|
@ -14,7 +14,7 @@ Meta's reference implementation of telemetry and observability using OpenTelemet
|
|||
|
||||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `otel_exporter_otlp_endpoint` | `str \| None` | No | | The OpenTelemetry collector endpoint URL (base URL for traces, metrics, and logs). If not set, the SDK will use OTEL_EXPORTER_OTLP_ENDPOINT environment variable. |
|
||||
| `otel_exporter_otlp_endpoint` | `str \| None` | No | | Deprecated.Please set the exporter using open telemetry environment variables instead. See https://opentelemetry.io/docs/languages/sdk-configuration/otlp-exporter/. |
|
||||
| `service_name` | `<class 'str'>` | No | | The service name to use for telemetry |
|
||||
| `sinks` | `list[inline.telemetry.meta_reference.config.TelemetrySink` | No | [<TelemetrySink.SQLITE: 'sqlite'>] | List of telemetry sinks to enable (possible values: otel_trace, otel_metric, sqlite, console) |
|
||||
| `sqlite_db_path` | `<class 'str'>` | No | ~/.llama/runtime/trace_store.db | The path to the SQLite database to use for storing traces |
|
||||
|
|
|
@ -22,7 +22,8 @@ class TelemetrySink(StrEnum):
|
|||
class TelemetryConfig(BaseModel):
|
||||
otel_exporter_otlp_endpoint: str | None = Field(
|
||||
default=None,
|
||||
description="The OpenTelemetry collector endpoint URL (base URL for traces, metrics, and logs). If not set, the SDK will use OTEL_EXPORTER_OTLP_ENDPOINT environment variable.",
|
||||
deprecated=True,
|
||||
description="Deprecated.Please set the exporter using open telemetry environment variables instead. See https://opentelemetry.io/docs/languages/sdk-configuration/otlp-exporter/.",
|
||||
)
|
||||
service_name: str = Field(
|
||||
# service name is always the same, use zero-width space to avoid clutter
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
import datetime
|
||||
import os
|
||||
import threading
|
||||
from typing import Any
|
||||
|
||||
|
@ -93,28 +94,36 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry):
|
|||
|
||||
# Use single OTLP endpoint for all telemetry signals
|
||||
if TelemetrySink.OTEL_TRACE in self.config.sinks or TelemetrySink.OTEL_METRIC in self.config.sinks:
|
||||
if self.config.otel_exporter_otlp_endpoint is None:
|
||||
raise ValueError(
|
||||
"otel_exporter_otlp_endpoint is required when OTEL_TRACE or OTEL_METRIC is enabled"
|
||||
)
|
||||
|
||||
# Let OpenTelemetry SDK handle endpoint construction automatically
|
||||
# The SDK will read OTEL_EXPORTER_OTLP_ENDPOINT and construct appropriate URLs
|
||||
# https://opentelemetry.io/docs/languages/sdk-configuration/otlp-exporter
|
||||
if TelemetrySink.OTEL_TRACE in self.config.sinks:
|
||||
if not os.environ.get("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT") or not os.environ.get(
|
||||
"OTEL_EXPORTER_OTLP_ENDPOINT"
|
||||
):
|
||||
logger.warning(
|
||||
"OTEL_EXPORTER_OTLP_TRACES_ENDPOINT or OTEL_EXPORTER_OTLP_ENDPOINT is not set. Traces will not be exported."
|
||||
)
|
||||
span_exporter = OTLPSpanExporter()
|
||||
span_processor = BatchSpanProcessor(span_exporter)
|
||||
trace.get_tracer_provider().add_span_processor(span_processor)
|
||||
provider.add_span_processor(span_processor)
|
||||
|
||||
if TelemetrySink.OTEL_METRIC in self.config.sinks:
|
||||
metric_reader = PeriodicExportingMetricReader(OTLPMetricExporter())
|
||||
if not os.environ.get("OTEL_EXPORTER_OTLP_METRICS_ENDPOINT") or not os.environ.get(
|
||||
"OTEL_EXPORTER_OTLP_ENDPOINT"
|
||||
):
|
||||
logger.warning(
|
||||
"OTEL_EXPORTER_OTLP_METRICS_ENDPOINT or OTEL_EXPORTER_OTLP_ENDPOINT is not set. Metrics will not be exported."
|
||||
)
|
||||
metric_exporter = OTLPMetricExporter()
|
||||
metric_reader = PeriodicExportingMetricReader(metric_exporter)
|
||||
metric_provider = MeterProvider(resource=resource, metric_readers=[metric_reader])
|
||||
metrics.set_meter_provider(metric_provider)
|
||||
|
||||
if TelemetrySink.SQLITE in self.config.sinks:
|
||||
trace.get_tracer_provider().add_span_processor(SQLiteSpanProcessor(self.config.sqlite_db_path))
|
||||
provider.add_span_processor(SQLiteSpanProcessor(self.config.sqlite_db_path))
|
||||
if TelemetrySink.CONSOLE in self.config.sinks:
|
||||
trace.get_tracer_provider().add_span_processor(ConsoleSpanProcessor(print_attributes=True))
|
||||
provider.add_span_processor(ConsoleSpanProcessor(print_attributes=True))
|
||||
|
||||
if TelemetrySink.OTEL_METRIC in self.config.sinks:
|
||||
self.meter = metrics.get_meter(__name__)
|
||||
|
@ -127,7 +136,8 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry):
|
|||
pass
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
trace.get_tracer_provider().force_flush()
|
||||
if isinstance(_TRACER_PROVIDER, TracerProvider):
|
||||
_TRACER_PROVIDER.force_flush()
|
||||
|
||||
async def log_event(self, event: Event, ttl_seconds: int = 604800) -> None:
|
||||
if isinstance(event, UnstructuredLogEvent):
|
||||
|
|
5
tests/unit/providers/telemetry/__init__.py
Normal file
5
tests/unit/providers/telemetry/__init__.py
Normal file
|
@ -0,0 +1,5 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
100
tests/unit/providers/telemetry/meta_reference.py
Normal file
100
tests/unit/providers/telemetry/meta_reference.py
Normal file
|
@ -0,0 +1,100 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import logging
|
||||
|
||||
import pytest
|
||||
|
||||
import llama_stack.providers.inline.telemetry.meta_reference.telemetry as telemetry_module
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.inline.telemetry.meta_reference.config import (
|
||||
TelemetryConfig,
|
||||
TelemetrySink,
|
||||
)
|
||||
|
||||
logger = get_logger(name=__name__, category="telemetry_test_meta_reference")
|
||||
|
||||
|
||||
def _reset_provider(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
# Ensure the telemetry module re-runs initialization code that emits warnings
|
||||
monkeypatch.setattr(telemetry_module, "_TRACER_PROVIDER", None, raising=False)
|
||||
|
||||
|
||||
def _make_config_with_sinks(*sinks: TelemetrySink) -> TelemetryConfig:
|
||||
return TelemetryConfig(sinks=list(sinks))
|
||||
|
||||
|
||||
def _otel_logger_records(caplog: pytest.LogCaptureFixture):
|
||||
module_logger_name = "llama_stack.providers.inline.telemetry.meta_reference.telemetry"
|
||||
return [r for r in caplog.records if r.name == module_logger_name]
|
||||
|
||||
|
||||
def test_warns_when_traces_endpoints_missing(monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture):
|
||||
_reset_provider(monkeypatch)
|
||||
# Remove both endpoints to simulate incorrect configuration
|
||||
monkeypatch.delenv("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT", raising=False)
|
||||
monkeypatch.delenv("OTEL_EXPORTER_OTLP_ENDPOINT", raising=False)
|
||||
|
||||
caplog.set_level(logging.WARNING)
|
||||
|
||||
config = _make_config_with_sinks(TelemetrySink.OTEL_TRACE)
|
||||
telemetry_module.TelemetryAdapter(config=config, deps={})
|
||||
|
||||
messages = [r.getMessage() for r in _otel_logger_records(caplog)]
|
||||
assert any(
|
||||
"OTEL_EXPORTER_OTLP_TRACES_ENDPOINT or OTEL_EXPORTER_OTLP_ENDPOINT is not set. Traces will not be exported."
|
||||
in m
|
||||
for m in messages
|
||||
)
|
||||
|
||||
|
||||
def test_warns_when_metrics_endpoints_missing(monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture):
|
||||
_reset_provider(monkeypatch)
|
||||
# Remove both endpoints to simulate incorrect configuration
|
||||
monkeypatch.delenv("OTEL_EXPORTER_OTLP_METRICS_ENDPOINT", raising=False)
|
||||
monkeypatch.delenv("OTEL_EXPORTER_OTLP_ENDPOINT", raising=False)
|
||||
|
||||
caplog.set_level(logging.WARNING)
|
||||
|
||||
config = _make_config_with_sinks(TelemetrySink.OTEL_METRIC)
|
||||
telemetry_module.TelemetryAdapter(config=config, deps={})
|
||||
|
||||
messages = [r.getMessage() for r in _otel_logger_records(caplog)]
|
||||
assert any(
|
||||
"OTEL_EXPORTER_OTLP_METRICS_ENDPOINT or OTEL_EXPORTER_OTLP_ENDPOINT is not set. Metrics will not be exported."
|
||||
in m
|
||||
for m in messages
|
||||
)
|
||||
|
||||
|
||||
def test_no_warning_when_traces_endpoints_present(monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture):
|
||||
_reset_provider(monkeypatch)
|
||||
# Both must be present per current implementation to avoid warnings
|
||||
monkeypatch.setenv("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT", "https://otel.example:4318/v1/traces")
|
||||
monkeypatch.setenv("OTEL_EXPORTER_OTLP_ENDPOINT", "https://otel.example:4318")
|
||||
|
||||
caplog.set_level(logging.WARNING)
|
||||
|
||||
config = _make_config_with_sinks(TelemetrySink.OTEL_TRACE)
|
||||
telemetry_module.TelemetryAdapter(config=config, deps={})
|
||||
|
||||
messages = [r.getMessage() for r in _otel_logger_records(caplog)]
|
||||
assert not any("Traces will not be exported." in m for m in messages)
|
||||
|
||||
|
||||
def test_no_warning_when_metrics_endpoints_present(monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture):
|
||||
_reset_provider(monkeypatch)
|
||||
# Both must be present per current implementation to avoid warnings
|
||||
monkeypatch.setenv("OTEL_EXPORTER_OTLP_METRICS_ENDPOINT", "https://otel.example:4318/v1/metrics")
|
||||
monkeypatch.setenv("OTEL_EXPORTER_OTLP_ENDPOINT", "https://otel.example:4318")
|
||||
|
||||
caplog.set_level(logging.WARNING)
|
||||
|
||||
config = _make_config_with_sinks(TelemetrySink.OTEL_METRIC)
|
||||
telemetry_module.TelemetryAdapter(config=config, deps={})
|
||||
|
||||
messages = [r.getMessage() for r in _otel_logger_records(caplog)]
|
||||
assert not any("Metrics will not be exported." in m for m in messages)
|
Loading…
Add table
Add a link
Reference in a new issue