From 07ff15d91797c23e23ea7fc9e8716c0bf5b55bd8 Mon Sep 17 00:00:00 2001 From: ehhuang Date: Thu, 16 Oct 2025 11:29:51 -0700 Subject: [PATCH 1/2] chore: distrogen enables telemetry by default (#3828) # What does this PR do? leftover from #3815 ## Test Plan CI --- [//]: # (BEGIN SAPLING FOOTER) Stack created with [Sapling](https://sapling-scm.com). Best reviewed with [ReviewStack](https://reviewstack.dev/llamastack/llama-stack/pull/3828). * #3830 * __->__ #3828 --- llama_stack/core/resolver.py | 2 ++ llama_stack/distributions/ci-tests/run.yaml | 2 ++ llama_stack/distributions/dell/run-with-safety.yaml | 2 ++ llama_stack/distributions/dell/run.yaml | 2 ++ .../distributions/meta-reference-gpu/run-with-safety.yaml | 2 ++ llama_stack/distributions/meta-reference-gpu/run.yaml | 2 ++ llama_stack/distributions/nvidia/run-with-safety.yaml | 2 ++ llama_stack/distributions/nvidia/run.yaml | 2 ++ llama_stack/distributions/open-benchmark/run.yaml | 2 ++ llama_stack/distributions/postgres-demo/run.yaml | 2 ++ llama_stack/distributions/starter-gpu/run.yaml | 2 ++ llama_stack/distributions/starter/run.yaml | 2 ++ llama_stack/distributions/template.py | 3 +++ llama_stack/distributions/watsonx/run.yaml | 2 ++ 14 files changed, 29 insertions(+) diff --git a/llama_stack/core/resolver.py b/llama_stack/core/resolver.py index f2d7089a6..73c047979 100644 --- a/llama_stack/core/resolver.py +++ b/llama_stack/core/resolver.py @@ -26,6 +26,7 @@ from llama_stack.apis.safety import Safety from llama_stack.apis.scoring import Scoring from llama_stack.apis.scoring_functions import ScoringFunctions from llama_stack.apis.shields import Shields +from llama_stack.apis.telemetry import Telemetry from llama_stack.apis.tools import ToolGroups, ToolRuntime from llama_stack.apis.vector_io import VectorIO from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA @@ -94,6 +95,7 @@ def api_protocol_map(external_apis: dict[Api, ExternalApiSpec] | None = None) -> Api.files: Files, Api.prompts: Prompts, Api.conversations: Conversations, + Api.telemetry: Telemetry, } if external_apis: diff --git a/llama_stack/distributions/ci-tests/run.yaml b/llama_stack/distributions/ci-tests/run.yaml index e964c044c..a6a6b7c0d 100644 --- a/llama_stack/distributions/ci-tests/run.yaml +++ b/llama_stack/distributions/ci-tests/run.yaml @@ -237,3 +237,5 @@ tool_groups: provider_id: rag-runtime server: port: 8321 +telemetry: + enabled: true diff --git a/llama_stack/distributions/dell/run-with-safety.yaml b/llama_stack/distributions/dell/run-with-safety.yaml index fa8e63107..5da3cf511 100644 --- a/llama_stack/distributions/dell/run-with-safety.yaml +++ b/llama_stack/distributions/dell/run-with-safety.yaml @@ -122,3 +122,5 @@ tool_groups: provider_id: rag-runtime server: port: 8321 +telemetry: + enabled: true diff --git a/llama_stack/distributions/dell/run.yaml b/llama_stack/distributions/dell/run.yaml index ac6ce22b8..ac0fdc0fa 100644 --- a/llama_stack/distributions/dell/run.yaml +++ b/llama_stack/distributions/dell/run.yaml @@ -113,3 +113,5 @@ tool_groups: provider_id: rag-runtime server: port: 8321 +telemetry: + enabled: true diff --git a/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml b/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml index 3467fffa4..874c5050f 100644 --- a/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +++ b/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml @@ -135,3 +135,5 @@ tool_groups: provider_id: rag-runtime server: port: 8321 +telemetry: + enabled: true diff --git a/llama_stack/distributions/meta-reference-gpu/run.yaml b/llama_stack/distributions/meta-reference-gpu/run.yaml index f8546205a..50553d2c7 100644 --- a/llama_stack/distributions/meta-reference-gpu/run.yaml +++ b/llama_stack/distributions/meta-reference-gpu/run.yaml @@ -120,3 +120,5 @@ tool_groups: provider_id: rag-runtime server: port: 8321 +telemetry: + enabled: true diff --git a/llama_stack/distributions/nvidia/run-with-safety.yaml b/llama_stack/distributions/nvidia/run-with-safety.yaml index c56d9a7c1..e0482f67d 100644 --- a/llama_stack/distributions/nvidia/run-with-safety.yaml +++ b/llama_stack/distributions/nvidia/run-with-safety.yaml @@ -118,3 +118,5 @@ tool_groups: provider_id: rag-runtime server: port: 8321 +telemetry: + enabled: true diff --git a/llama_stack/distributions/nvidia/run.yaml b/llama_stack/distributions/nvidia/run.yaml index 8608ca425..950782eed 100644 --- a/llama_stack/distributions/nvidia/run.yaml +++ b/llama_stack/distributions/nvidia/run.yaml @@ -97,3 +97,5 @@ tool_groups: provider_id: rag-runtime server: port: 8321 +telemetry: + enabled: true diff --git a/llama_stack/distributions/open-benchmark/run.yaml b/llama_stack/distributions/open-benchmark/run.yaml index 067584649..a738887b4 100644 --- a/llama_stack/distributions/open-benchmark/run.yaml +++ b/llama_stack/distributions/open-benchmark/run.yaml @@ -233,3 +233,5 @@ tool_groups: provider_id: rag-runtime server: port: 8321 +telemetry: + enabled: true diff --git a/llama_stack/distributions/postgres-demo/run.yaml b/llama_stack/distributions/postgres-demo/run.yaml index 69032becf..62faf3f62 100644 --- a/llama_stack/distributions/postgres-demo/run.yaml +++ b/llama_stack/distributions/postgres-demo/run.yaml @@ -104,3 +104,5 @@ tool_groups: provider_id: rag-runtime server: port: 8321 +telemetry: + enabled: true diff --git a/llama_stack/distributions/starter-gpu/run.yaml b/llama_stack/distributions/starter-gpu/run.yaml index d55e5e4be..370d4b516 100644 --- a/llama_stack/distributions/starter-gpu/run.yaml +++ b/llama_stack/distributions/starter-gpu/run.yaml @@ -240,3 +240,5 @@ tool_groups: provider_id: rag-runtime server: port: 8321 +telemetry: + enabled: true diff --git a/llama_stack/distributions/starter/run.yaml b/llama_stack/distributions/starter/run.yaml index f15be3cc5..2f4e7f350 100644 --- a/llama_stack/distributions/starter/run.yaml +++ b/llama_stack/distributions/starter/run.yaml @@ -237,3 +237,5 @@ tool_groups: provider_id: rag-runtime server: port: 8321 +telemetry: + enabled: true diff --git a/llama_stack/distributions/template.py b/llama_stack/distributions/template.py index 59beb8a8a..807829999 100644 --- a/llama_stack/distributions/template.py +++ b/llama_stack/distributions/template.py @@ -25,6 +25,7 @@ from llama_stack.core.datatypes import ( ModelInput, Provider, ShieldInput, + TelemetryConfig, ToolGroupInput, ) from llama_stack.core.distribution import get_provider_registry @@ -182,6 +183,7 @@ class RunConfigSettings(BaseModel): metadata_store: dict | None = None inference_store: dict | None = None conversations_store: dict | None = None + telemetry: TelemetryConfig = Field(default_factory=lambda: TelemetryConfig(enabled=True)) def run_config( self, @@ -256,6 +258,7 @@ class RunConfigSettings(BaseModel): "server": { "port": 8321, }, + "telemetry": self.telemetry.model_dump(exclude_none=True) if self.telemetry else None, } diff --git a/llama_stack/distributions/watsonx/run.yaml b/llama_stack/distributions/watsonx/run.yaml index 6b925e180..c3db4eeb8 100644 --- a/llama_stack/distributions/watsonx/run.yaml +++ b/llama_stack/distributions/watsonx/run.yaml @@ -114,3 +114,5 @@ tool_groups: provider_id: rag-runtime server: port: 8321 +telemetry: + enabled: true From 6fbbb3e78bcf87a1b21ea0f836dfb40420f7061b Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Thu, 16 Oct 2025 11:33:23 -0700 Subject: [PATCH 2/2] fix(telemetry): remove dependency on old telemetry config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # What does this PR do? old telemetry config was removed in #3815 ## Test Plan ❯ OTEL_SERVICE_NAME=aloha OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 uv run llama stack run starter --- llama_stack/core/datatypes.py | 12 +++++ .../telemetry/meta_reference/telemetry.py | 49 +++++-------------- 2 files changed, 25 insertions(+), 36 deletions(-) diff --git a/llama_stack/core/datatypes.py b/llama_stack/core/datatypes.py index b376901fd..94222d49e 100644 --- a/llama_stack/core/datatypes.py +++ b/llama_stack/core/datatypes.py @@ -177,6 +177,18 @@ class DistributionSpec(BaseModel): class TelemetryConfig(BaseModel): + """ + Configuration for telemetry. + + Llama Stack uses OpenTelemetry for telemetry. Please refer to https://opentelemetry.io/docs/languages/sdk-configuration/ + for env variables to configure the OpenTelemetry SDK. + + Example: + ```bash + OTEL_SERVICE_NAME=llama-stack OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 uv run llama stack run starter + ``` + """ + enabled: bool = Field(default=False, description="enable or disable telemetry") diff --git a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py index f56609cab..7a993b891 100644 --- a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +++ b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py @@ -12,10 +12,8 @@ from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExp from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter from opentelemetry.sdk.metrics import MeterProvider from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader -from opentelemetry.sdk.resources import Resource from opentelemetry.sdk.trace import TracerProvider from opentelemetry.sdk.trace.export import BatchSpanProcessor -from opentelemetry.semconv.resource import ResourceAttributes from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator from llama_stack.apis.telemetry import ( @@ -30,12 +28,9 @@ from llama_stack.apis.telemetry import ( ) from llama_stack.core.datatypes import Api from llama_stack.log import get_logger -from llama_stack.providers.inline.telemetry.meta_reference.console_span_processor import ( - ConsoleSpanProcessor, -) from llama_stack.providers.utils.telemetry.tracing import ROOT_SPAN_MARKERS -from .config import TelemetryConfig, TelemetrySink +from .config import TelemetryConfig _GLOBAL_STORAGE: dict[str, dict[str | int, Any]] = { "active_spans": {}, @@ -55,17 +50,10 @@ def is_tracing_enabled(tracer): class TelemetryAdapter(Telemetry): - def __init__(self, config: TelemetryConfig, deps: dict[Api, Any]) -> None: - self.config = config + def __init__(self, _config: TelemetryConfig, deps: dict[Api, Any]) -> None: self.datasetio_api = deps.get(Api.datasetio) self.meter = None - resource = Resource.create( - { - ResourceAttributes.SERVICE_NAME: self.config.service_name, - } - ) - global _TRACER_PROVIDER # Initialize the correct span processor based on the provider state. # This is needed since once the span processor is set, it cannot be unset. @@ -73,35 +61,24 @@ class TelemetryAdapter(Telemetry): # Since the library client can be recreated multiple times in a notebook, # the kernel will hold on to the span processor and cause duplicate spans to be written. if _TRACER_PROVIDER is None: - provider = TracerProvider(resource=resource) + provider = TracerProvider() trace.set_tracer_provider(provider) _TRACER_PROVIDER = provider # Use single OTLP endpoint for all telemetry signals - if TelemetrySink.OTEL_TRACE in self.config.sinks or TelemetrySink.OTEL_METRIC in self.config.sinks: - if self.config.otel_exporter_otlp_endpoint is None: - raise ValueError( - "otel_exporter_otlp_endpoint is required when OTEL_TRACE or OTEL_METRIC is enabled" - ) - # Let OpenTelemetry SDK handle endpoint construction automatically - # The SDK will read OTEL_EXPORTER_OTLP_ENDPOINT and construct appropriate URLs - # https://opentelemetry.io/docs/languages/sdk-configuration/otlp-exporter - if TelemetrySink.OTEL_TRACE in self.config.sinks: - span_exporter = OTLPSpanExporter() - span_processor = BatchSpanProcessor(span_exporter) - trace.get_tracer_provider().add_span_processor(span_processor) + # Let OpenTelemetry SDK handle endpoint construction automatically + # The SDK will read OTEL_EXPORTER_OTLP_ENDPOINT and construct appropriate URLs + # https://opentelemetry.io/docs/languages/sdk-configuration/otlp-exporter + span_exporter = OTLPSpanExporter() + span_processor = BatchSpanProcessor(span_exporter) + trace.get_tracer_provider().add_span_processor(span_processor) - if TelemetrySink.OTEL_METRIC in self.config.sinks: - metric_reader = PeriodicExportingMetricReader(OTLPMetricExporter()) - metric_provider = MeterProvider(resource=resource, metric_readers=[metric_reader]) - metrics.set_meter_provider(metric_provider) + metric_reader = PeriodicExportingMetricReader(OTLPMetricExporter()) + metric_provider = MeterProvider(metric_readers=[metric_reader]) + metrics.set_meter_provider(metric_provider) - if TelemetrySink.CONSOLE in self.config.sinks: - trace.get_tracer_provider().add_span_processor(ConsoleSpanProcessor(print_attributes=True)) - - if TelemetrySink.OTEL_METRIC in self.config.sinks: - self.meter = metrics.get_meter(__name__) + self.meter = metrics.get_meter(__name__) self._lock = _global_lock