From 6104bd06a06c0308f83e51425dba565078557f48 Mon Sep 17 00:00:00 2001 From: Dinesh Yeduguru Date: Thu, 20 Mar 2025 15:51:41 -0700 Subject: [PATCH] feat: add different sinks for otel traces and metrics (#1731) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # What does this PR do? Since we now start recording and exporting metrics, we no longer can use single OTEL endpoint to export both traces and metrics. This PR adds two sinks: OTEL_TRACE and OTEL_METRIC to be able to selectively enable the exporters. ## Test Plan Start server with OTEL_TRACE as sink and verify traces show up in jaeger ![Screenshot 2025-03-20 at 3 12 25 PM](https://github.com/user-attachments/assets/51007f28-b5ed-4853-912a-965a5cfe83af) --- .../inline/telemetry/meta_reference/config.py | 11 ++++++++--- .../inline/telemetry/meta_reference/telemetry.py | 13 +++++++------ 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/llama_stack/providers/inline/telemetry/meta_reference/config.py b/llama_stack/providers/inline/telemetry/meta_reference/config.py index 67f8cc6ee..12777fa31 100644 --- a/llama_stack/providers/inline/telemetry/meta_reference/config.py +++ b/llama_stack/providers/inline/telemetry/meta_reference/config.py @@ -13,15 +13,20 @@ from llama_stack.distribution.utils.config_dirs import RUNTIME_BASE_DIR class TelemetrySink(str, Enum): - OTEL = "otel" + OTEL_TRACE = "otel_trace" + OTEL_METRIC = "otel_metric" SQLITE = "sqlite" CONSOLE = "console" class TelemetryConfig(BaseModel): - otel_endpoint: str = Field( + otel_trace_endpoint: str = Field( default="http://localhost:4318/v1/traces", - description="The OpenTelemetry collector endpoint URL", + description="The OpenTelemetry collector endpoint URL for traces", + ) + otel_metric_endpoint: str = Field( + default="http://localhost:4318/v1/metrics", + description="The OpenTelemetry collector endpoint URL for metrics", ) service_name: str = Field( default="llama-stack", diff --git a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py index 766bc0fc0..cf2f0c82e 100644 --- a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +++ b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py @@ -91,15 +91,16 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry): provider = TracerProvider(resource=resource) trace.set_tracer_provider(provider) _TRACER_PROVIDER = provider - if TelemetrySink.OTEL in self.config.sinks: - otlp_exporter = OTLPSpanExporter( - endpoint=self.config.otel_endpoint, + if TelemetrySink.OTEL_TRACE in self.config.sinks: + span_exporter = OTLPSpanExporter( + endpoint=self.config.otel_trace_endpoint, ) - span_processor = BatchSpanProcessor(otlp_exporter) + span_processor = BatchSpanProcessor(span_exporter) trace.get_tracer_provider().add_span_processor(span_processor) + if TelemetrySink.OTEL_METRIC in self.config.sinks: metric_reader = PeriodicExportingMetricReader( OTLPMetricExporter( - endpoint=self.config.otel_endpoint, + endpoint=self.config.otel_metric_endpoint, ) ) metric_provider = MeterProvider(resource=resource, metric_readers=[metric_reader]) @@ -109,7 +110,7 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry): if TelemetrySink.CONSOLE in self.config.sinks: trace.get_tracer_provider().add_span_processor(ConsoleSpanProcessor()) - if TelemetrySink.OTEL in self.config.sinks: + if TelemetrySink.OTEL_METRIC in self.config.sinks: self.meter = metrics.get_meter(__name__) if TelemetrySink.SQLITE in self.config.sinks: self.trace_store = SQLiteTraceStore(self.config.sqlite_db_path)