diff --git a/benchmarking/k8s-benchmark/stack-configmap.yaml b/benchmarking/k8s-benchmark/stack-configmap.yaml index aed3b97c2..58518ec18 100644 --- a/benchmarking/k8s-benchmark/stack-configmap.yaml +++ b/benchmarking/k8s-benchmark/stack-configmap.yaml @@ -9,6 +9,7 @@ data: - inference - files - safety + - telemetry - tool_runtime - vector_io providers: @@ -66,6 +67,12 @@ data: db: ${env.POSTGRES_DB:=llamastack} user: ${env.POSTGRES_USER:=llamastack} password: ${env.POSTGRES_PASSWORD:=llamastack} + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search diff --git a/docs/docs/distributions/k8s/stack-configmap.yaml b/docs/docs/distributions/k8s/stack-configmap.yaml index d0e083d29..255e39ac2 100644 --- a/docs/docs/distributions/k8s/stack-configmap.yaml +++ b/docs/docs/distributions/k8s/stack-configmap.yaml @@ -8,6 +8,7 @@ data: - inference - files - safety + - telemetry - tool_runtime - vector_io providers: @@ -72,6 +73,12 @@ data: db: ${env.POSTGRES_DB:=llamastack} user: ${env.POSTGRES_USER:=llamastack} password: ${env.POSTGRES_PASSWORD:=llamastack} + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console} tool_runtime: - provider_id: brave-search provider_type: remote::brave-search diff --git a/docs/docs/distributions/self_hosted_distro/starter.md b/docs/docs/distributions/self_hosted_distro/starter.md index 3d4c1fc1a..84c35f3d3 100644 --- a/docs/docs/distributions/self_hosted_distro/starter.md +++ b/docs/docs/distributions/self_hosted_distro/starter.md @@ -116,6 +116,10 @@ The following environment variables can be configured: - `BRAVE_SEARCH_API_KEY`: Brave Search API key - `TAVILY_SEARCH_API_KEY`: Tavily Search API key +### Telemetry Configuration +- `OTEL_SERVICE_NAME`: OpenTelemetry service name +- `OTEL_EXPORTER_OTLP_ENDPOINT`: OpenTelemetry collector endpoint URL + ## Enabling Providers You can enable specific providers by setting appropriate environment variables. For example, diff --git a/docs/docs/references/python_sdk_reference/index.md b/docs/docs/references/python_sdk_reference/index.md index 532341a4d..686567458 100644 --- a/docs/docs/references/python_sdk_reference/index.md +++ b/docs/docs/references/python_sdk_reference/index.md @@ -360,6 +360,32 @@ Methods: - client.synthetic_data_generation.generate(\*\*params) -> SyntheticDataGenerationResponse +## Telemetry + +Types: + +```python +from llama_stack_client.types import ( + QuerySpansResponse, + SpanWithStatus, + Trace, + TelemetryGetSpanResponse, + TelemetryGetSpanTreeResponse, + TelemetryQuerySpansResponse, + TelemetryQueryTracesResponse, +) +``` + +Methods: + +- client.telemetry.get_span(span_id, \*, trace_id) -> TelemetryGetSpanResponse +- client.telemetry.get_span_tree(span_id, \*\*params) -> TelemetryGetSpanTreeResponse +- client.telemetry.get_trace(trace_id) -> Trace +- client.telemetry.log_event(\*\*params) -> None +- client.telemetry.query_spans(\*\*params) -> TelemetryQuerySpansResponse +- client.telemetry.query_traces(\*\*params) -> TelemetryQueryTracesResponse +- client.telemetry.save_spans_to_dataset(\*\*params) -> None + ## Datasetio Types: diff --git a/pyproject.toml b/pyproject.toml index 654819e1c..3e16dc08f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -346,6 +346,7 @@ exclude = [ "^src/llama_stack/providers/utils/scoring/aggregation_utils\\.py$", "^src/llama_stack/providers/utils/scoring/base_scoring_fn\\.py$", "^src/llama_stack/providers/utils/telemetry/dataset_mixin\\.py$", + "^src/llama_stack/providers/utils/telemetry/trace_protocol\\.py$", "^src/llama_stack/providers/utils/telemetry/tracing\\.py$", "^src/llama_stack/distributions/template\\.py$", ] diff --git a/scripts/openapi_generator/schema_collection.py b/scripts/openapi_generator/schema_collection.py index 127f6da9c..51a70c62a 100644 --- a/scripts/openapi_generator/schema_collection.py +++ b/scripts/openapi_generator/schema_collection.py @@ -8,6 +8,7 @@ Schema discovery and collection for OpenAPI generation. """ +import importlib from typing import Any @@ -19,6 +20,23 @@ def _ensure_components_schemas(openapi_schema: dict[str, Any]) -> None: openapi_schema["components"]["schemas"] = {} +def _load_extra_schema_modules() -> None: + """ + Import modules outside llama_stack_api that use schema_utils to register schemas. + + The API package already imports its submodules via __init__, but server-side modules + like telemetry need to be imported explicitly so their decorator side effects run. + """ + extra_modules = [ + "llama_stack.core.telemetry.telemetry", + ] + for module_name in extra_modules: + try: + importlib.import_module(module_name) + except ImportError: + continue + + def _extract_and_fix_defs(schema: dict[str, Any], openapi_schema: dict[str, Any]) -> None: """ Extract $defs from a schema, move them to components/schemas, and fix references. @@ -61,6 +79,9 @@ def _ensure_json_schema_types_included(openapi_schema: dict[str, Any]) -> dict[s iter_registered_schema_types, ) + # Import extra modules (e.g., telemetry) whose schema registrations live outside llama_stack_api + _load_extra_schema_modules() + # Handle explicitly registered schemas first (union types, Annotated structs, etc.) for registration_info in iter_registered_schema_types(): schema_type = registration_info.type diff --git a/src/llama_stack/core/datatypes.py b/src/llama_stack/core/datatypes.py index f64286ef5..3a0f891b0 100644 --- a/src/llama_stack/core/datatypes.py +++ b/src/llama_stack/core/datatypes.py @@ -371,6 +371,12 @@ class SafetyConfig(BaseModel): ) +class TelemetryConfig(BaseModel): + """Configuration for telemetry collection.""" + + enabled: bool = Field(default=False, description="Whether telemetry collection is enabled") + + class QuotaPeriod(StrEnum): DAY = "day" @@ -536,6 +542,11 @@ can be instantiated multiple times (with different configs) if necessary. description="Configuration for default moderations model", ) + telemetry: TelemetryConfig | None = Field( + default=None, + description="Configuration for telemetry collection", + ) + @field_validator("external_providers_dir") @classmethod def validate_external_providers_dir(cls, v): diff --git a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml index 8d270aea6..d942c23a4 100644 --- a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml +++ b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml @@ -281,6 +281,8 @@ registered_resources: provider_id: rag-runtime server: port: 8321 +telemetry: + enabled: true vector_stores: default_provider_id: faiss default_embedding_model: diff --git a/src/llama_stack/distributions/ci-tests/run.yaml b/src/llama_stack/distributions/ci-tests/run.yaml index 2c628fbf9..8b1cd2bb2 100644 --- a/src/llama_stack/distributions/ci-tests/run.yaml +++ b/src/llama_stack/distributions/ci-tests/run.yaml @@ -272,6 +272,8 @@ registered_resources: provider_id: rag-runtime server: port: 8321 +telemetry: + enabled: true vector_stores: default_provider_id: faiss default_embedding_model: diff --git a/src/llama_stack/distributions/dell/run-with-safety.yaml b/src/llama_stack/distributions/dell/run-with-safety.yaml index 63bd95168..e0da8060d 100644 --- a/src/llama_stack/distributions/dell/run-with-safety.yaml +++ b/src/llama_stack/distributions/dell/run-with-safety.yaml @@ -140,3 +140,5 @@ registered_resources: provider_id: rag-runtime server: port: 8321 +telemetry: + enabled: true diff --git a/src/llama_stack/distributions/dell/run.yaml b/src/llama_stack/distributions/dell/run.yaml index 93f0c35bc..bc3117d88 100644 --- a/src/llama_stack/distributions/dell/run.yaml +++ b/src/llama_stack/distributions/dell/run.yaml @@ -131,3 +131,5 @@ registered_resources: provider_id: rag-runtime server: port: 8321 +telemetry: + enabled: true diff --git a/src/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml b/src/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml index 63fc3b1d2..2fa9d198b 100644 --- a/src/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +++ b/src/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml @@ -153,3 +153,5 @@ registered_resources: provider_id: rag-runtime server: port: 8321 +telemetry: + enabled: true diff --git a/src/llama_stack/distributions/meta-reference-gpu/run.yaml b/src/llama_stack/distributions/meta-reference-gpu/run.yaml index ba8235398..5c7f75ca8 100644 --- a/src/llama_stack/distributions/meta-reference-gpu/run.yaml +++ b/src/llama_stack/distributions/meta-reference-gpu/run.yaml @@ -138,3 +138,5 @@ registered_resources: provider_id: rag-runtime server: port: 8321 +telemetry: + enabled: true diff --git a/src/llama_stack/distributions/nvidia/run-with-safety.yaml b/src/llama_stack/distributions/nvidia/run-with-safety.yaml index 7d95565e5..d2c7dd090 100644 --- a/src/llama_stack/distributions/nvidia/run-with-safety.yaml +++ b/src/llama_stack/distributions/nvidia/run-with-safety.yaml @@ -135,3 +135,5 @@ registered_resources: provider_id: rag-runtime server: port: 8321 +telemetry: + enabled: true diff --git a/src/llama_stack/distributions/nvidia/run.yaml b/src/llama_stack/distributions/nvidia/run.yaml index 8c80b8303..c267587c7 100644 --- a/src/llama_stack/distributions/nvidia/run.yaml +++ b/src/llama_stack/distributions/nvidia/run.yaml @@ -114,3 +114,5 @@ registered_resources: provider_id: rag-runtime server: port: 8321 +telemetry: + enabled: true diff --git a/src/llama_stack/distributions/oci/run.yaml b/src/llama_stack/distributions/oci/run.yaml index ff0c818be..e385ec606 100644 --- a/src/llama_stack/distributions/oci/run.yaml +++ b/src/llama_stack/distributions/oci/run.yaml @@ -132,3 +132,5 @@ registered_resources: provider_id: tavily-search server: port: 8321 +telemetry: + enabled: true diff --git a/src/llama_stack/distributions/open-benchmark/run.yaml b/src/llama_stack/distributions/open-benchmark/run.yaml index 43aa45b51..7ebc58841 100644 --- a/src/llama_stack/distributions/open-benchmark/run.yaml +++ b/src/llama_stack/distributions/open-benchmark/run.yaml @@ -251,3 +251,5 @@ registered_resources: provider_id: rag-runtime server: port: 8321 +telemetry: + enabled: true diff --git a/src/llama_stack/distributions/postgres-demo/run.yaml b/src/llama_stack/distributions/postgres-demo/run.yaml index c9316f923..049f519cd 100644 --- a/src/llama_stack/distributions/postgres-demo/run.yaml +++ b/src/llama_stack/distributions/postgres-demo/run.yaml @@ -114,3 +114,5 @@ registered_resources: provider_id: rag-runtime server: port: 8321 +telemetry: + enabled: true diff --git a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml index 17ef2ad22..75cc9d188 100644 --- a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +++ b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml @@ -284,6 +284,8 @@ registered_resources: provider_id: rag-runtime server: port: 8321 +telemetry: + enabled: true vector_stores: default_provider_id: faiss default_embedding_model: diff --git a/src/llama_stack/distributions/starter-gpu/run.yaml b/src/llama_stack/distributions/starter-gpu/run.yaml index 58b619972..09c7be5a1 100644 --- a/src/llama_stack/distributions/starter-gpu/run.yaml +++ b/src/llama_stack/distributions/starter-gpu/run.yaml @@ -275,6 +275,8 @@ registered_resources: provider_id: rag-runtime server: port: 8321 +telemetry: + enabled: true vector_stores: default_provider_id: faiss default_embedding_model: diff --git a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml index 8c0362864..f59c809d2 100644 --- a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml +++ b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml @@ -281,6 +281,8 @@ registered_resources: provider_id: rag-runtime server: port: 8321 +telemetry: + enabled: true vector_stores: default_provider_id: faiss default_embedding_model: diff --git a/src/llama_stack/distributions/starter/run.yaml b/src/llama_stack/distributions/starter/run.yaml index b5bd5f18c..435bb22a7 100644 --- a/src/llama_stack/distributions/starter/run.yaml +++ b/src/llama_stack/distributions/starter/run.yaml @@ -272,6 +272,8 @@ registered_resources: provider_id: rag-runtime server: port: 8321 +telemetry: + enabled: true vector_stores: default_provider_id: faiss default_embedding_model: diff --git a/src/llama_stack/distributions/template.py b/src/llama_stack/distributions/template.py index bab3211e9..90b458805 100644 --- a/src/llama_stack/distributions/template.py +++ b/src/llama_stack/distributions/template.py @@ -24,6 +24,7 @@ from llama_stack.core.datatypes import ( Provider, SafetyConfig, ShieldInput, + TelemetryConfig, ToolGroupInput, VectorStoresConfig, ) @@ -188,6 +189,7 @@ class RunConfigSettings(BaseModel): default_benchmarks: list[BenchmarkInput] | None = None vector_stores_config: VectorStoresConfig | None = None safety_config: SafetyConfig | None = None + telemetry: TelemetryConfig = Field(default_factory=lambda: TelemetryConfig(enabled=True)) storage_backends: dict[str, Any] | None = None storage_stores: dict[str, Any] | None = None @@ -287,6 +289,7 @@ class RunConfigSettings(BaseModel): "server": { "port": 8321, }, + "telemetry": self.telemetry.model_dump(exclude_none=True) if self.telemetry else None, } if self.vector_stores_config: diff --git a/src/llama_stack/distributions/watsonx/run.yaml b/src/llama_stack/distributions/watsonx/run.yaml index 55ea34cb6..f8c489fe3 100644 --- a/src/llama_stack/distributions/watsonx/run.yaml +++ b/src/llama_stack/distributions/watsonx/run.yaml @@ -132,3 +132,5 @@ registered_resources: provider_id: rag-runtime server: port: 8321 +telemetry: + enabled: true diff --git a/src/llama_stack_api/common/tracing.py b/src/llama_stack_api/common/tracing.py new file mode 100644 index 000000000..830c2945a --- /dev/null +++ b/src/llama_stack_api/common/tracing.py @@ -0,0 +1,22 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + + +def telemetry_traceable(cls): + """ + Mark a protocol for automatic tracing when telemetry is enabled. + + This is a metadata-only decorator with no dependencies on core. + Actual tracing is applied by core routers at runtime if telemetry is enabled. + + Usage: + @runtime_checkable + @telemetry_traceable + class MyProtocol(Protocol): + ... + """ + cls.__marked_for_tracing__ = True + return cls diff --git a/src/llama_stack_api/conversations.py b/src/llama_stack_api/conversations.py index 81b5ab2c4..4854181d1 100644 --- a/src/llama_stack_api/conversations.py +++ b/src/llama_stack_api/conversations.py @@ -9,6 +9,7 @@ from typing import Annotated, Literal, Protocol, runtime_checkable from pydantic import BaseModel, Field +from llama_stack_api.common.tracing import telemetry_traceable from llama_stack_api.openai_responses import ( OpenAIResponseInputFunctionToolCallOutput, OpenAIResponseMCPApprovalRequest, @@ -156,6 +157,7 @@ class ConversationItemDeletedResource(BaseModel): @runtime_checkable +@telemetry_traceable class Conversations(Protocol): """Conversations diff --git a/src/llama_stack_api/datatypes.py b/src/llama_stack_api/datatypes.py index c12fbc778..f024068f3 100644 --- a/src/llama_stack_api/datatypes.py +++ b/src/llama_stack_api/datatypes.py @@ -102,6 +102,7 @@ class Api(Enum, metaclass=DynamicApiMeta): :cvar eval: Model evaluation and benchmarking framework :cvar post_training: Fine-tuning and model training :cvar tool_runtime: Tool execution and management + :cvar telemetry: Observability and system monitoring :cvar models: Model metadata and management :cvar shields: Safety shield implementations :cvar datasets: Dataset creation and management diff --git a/src/llama_stack_api/files.py b/src/llama_stack_api/files.py index e515fe0ae..8a75a1c39 100644 --- a/src/llama_stack_api/files.py +++ b/src/llama_stack_api/files.py @@ -11,6 +11,7 @@ from fastapi import File, Form, Response, UploadFile from pydantic import BaseModel, Field from llama_stack_api.common.responses import Order +from llama_stack_api.common.tracing import telemetry_traceable from llama_stack_api.schema_utils import json_schema_type, webmethod from llama_stack_api.version import LLAMA_STACK_API_V1 @@ -101,6 +102,7 @@ class OpenAIFileDeleteResponse(BaseModel): @runtime_checkable +@telemetry_traceable class Files(Protocol): """Files diff --git a/src/llama_stack_api/inference.py b/src/llama_stack_api/inference.py index 4a169486a..b42de95be 100644 --- a/src/llama_stack_api/inference.py +++ b/src/llama_stack_api/inference.py @@ -22,6 +22,7 @@ from llama_stack_api.common.content_types import InterleavedContent from llama_stack_api.common.responses import ( Order, ) +from llama_stack_api.common.tracing import telemetry_traceable from llama_stack_api.models import Model from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod from llama_stack_api.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA @@ -988,6 +989,7 @@ class OpenAIEmbeddingsRequestWithExtraBody(BaseModel, extra="allow"): @runtime_checkable +@telemetry_traceable class InferenceProvider(Protocol): """ This protocol defines the interface that should be implemented by all inference providers. diff --git a/src/llama_stack_api/models.py b/src/llama_stack_api/models.py index 3efdfe66b..98c16b6c2 100644 --- a/src/llama_stack_api/models.py +++ b/src/llama_stack_api/models.py @@ -9,6 +9,7 @@ from typing import Any, Literal, Protocol, runtime_checkable from pydantic import BaseModel, ConfigDict, Field, field_validator +from llama_stack_api.common.tracing import telemetry_traceable from llama_stack_api.resource import Resource, ResourceType from llama_stack_api.schema_utils import json_schema_type, webmethod from llama_stack_api.version import LLAMA_STACK_API_V1 @@ -105,6 +106,7 @@ class OpenAIListModelsResponse(BaseModel): @runtime_checkable +@telemetry_traceable class Models(Protocol): async def list_models(self) -> ListModelsResponse: """List all models. diff --git a/src/llama_stack_api/prompts.py b/src/llama_stack_api/prompts.py index 2054ccd30..8562e4704 100644 --- a/src/llama_stack_api/prompts.py +++ b/src/llama_stack_api/prompts.py @@ -10,6 +10,7 @@ from typing import Protocol, runtime_checkable from pydantic import BaseModel, Field, field_validator, model_validator +from llama_stack_api.common.tracing import telemetry_traceable from llama_stack_api.schema_utils import json_schema_type, webmethod from llama_stack_api.version import LLAMA_STACK_API_V1 @@ -92,6 +93,7 @@ class ListPromptsResponse(BaseModel): @runtime_checkable +@telemetry_traceable class Prompts(Protocol): """Prompts diff --git a/src/llama_stack_api/safety.py b/src/llama_stack_api/safety.py index 7b4f2af5c..ef84be2ea 100644 --- a/src/llama_stack_api/safety.py +++ b/src/llama_stack_api/safety.py @@ -9,6 +9,7 @@ from typing import Any, Protocol, runtime_checkable from pydantic import BaseModel, Field +from llama_stack_api.common.tracing import telemetry_traceable from llama_stack_api.inference import OpenAIMessageParam from llama_stack_api.schema_utils import json_schema_type, webmethod from llama_stack_api.shields import Shield @@ -93,6 +94,7 @@ class ShieldStore(Protocol): @runtime_checkable +@telemetry_traceable class Safety(Protocol): """Safety diff --git a/src/llama_stack_api/shields.py b/src/llama_stack_api/shields.py index 36ad2351b..19e412a5a 100644 --- a/src/llama_stack_api/shields.py +++ b/src/llama_stack_api/shields.py @@ -8,6 +8,7 @@ from typing import Any, Literal, Protocol, runtime_checkable from pydantic import BaseModel +from llama_stack_api.common.tracing import telemetry_traceable from llama_stack_api.resource import Resource, ResourceType from llama_stack_api.schema_utils import json_schema_type, webmethod from llama_stack_api.version import LLAMA_STACK_API_V1 @@ -48,6 +49,7 @@ class ListShieldsResponse(BaseModel): @runtime_checkable +@telemetry_traceable class Shields(Protocol): @webmethod(route="/shields", method="GET", level=LLAMA_STACK_API_V1) async def list_shields(self) -> ListShieldsResponse: diff --git a/src/llama_stack_api/tools.py b/src/llama_stack_api/tools.py index 94f2251b0..4dd5d55d2 100644 --- a/src/llama_stack_api/tools.py +++ b/src/llama_stack_api/tools.py @@ -11,6 +11,7 @@ from pydantic import BaseModel from typing_extensions import runtime_checkable from llama_stack_api.common.content_types import URL, InterleavedContent +from llama_stack_api.common.tracing import telemetry_traceable from llama_stack_api.resource import Resource, ResourceType from llama_stack_api.schema_utils import json_schema_type, webmethod from llama_stack_api.version import LLAMA_STACK_API_V1 @@ -108,6 +109,7 @@ class ListToolDefsResponse(BaseModel): @runtime_checkable +@telemetry_traceable class ToolGroups(Protocol): @webmethod(route="/toolgroups", method="POST", level=LLAMA_STACK_API_V1, deprecated=True) async def register_tool_group( @@ -189,6 +191,7 @@ class SpecialToolGroup(Enum): @runtime_checkable +@telemetry_traceable class ToolRuntime(Protocol): tool_store: ToolStore | None = None diff --git a/src/llama_stack_api/vector_io.py b/src/llama_stack_api/vector_io.py index 188ea3307..135468d19 100644 --- a/src/llama_stack_api/vector_io.py +++ b/src/llama_stack_api/vector_io.py @@ -13,6 +13,7 @@ from typing import Annotated, Any, Literal, Protocol, runtime_checkable from fastapi import Body, Query from pydantic import BaseModel, Field, field_validator +from llama_stack_api.common.tracing import telemetry_traceable from llama_stack_api.inference import InterleavedContent from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod from llama_stack_api.vector_stores import VectorStore @@ -571,6 +572,7 @@ class VectorStoreTable(Protocol): @runtime_checkable +@telemetry_traceable class VectorIO(Protocol): vector_store_table: VectorStoreTable | None = None