remove telemetry API completely

2025-12-03 09:53:45 +00:00 · 2025-11-26 10:23:21 -05:00 · 2025-11-26 10:23:21 -05:00 · 08408a2585
commit 08408a2585
parent f3f4b2a6b5
29 changed files with 4 additions and 124 deletions
--- a/benchmarking/k8s-benchmark/stack-configmap.yaml
+++ b/benchmarking/k8s-benchmark/stack-configmap.yaml
@ -9,7 +9,6 @@ data:
    - inference
    - files
    - safety
-    - telemetry
    - tool_runtime
    - vector_io
    providers:
@ -67,12 +66,6 @@ data:
            db: ${env.POSTGRES_DB:=llamastack}
            user: ${env.POSTGRES_USER:=llamastack}
            password: ${env.POSTGRES_PASSWORD:=llamastack}
-      telemetry:
-      - provider_id: meta-reference
-        provider_type: inline::meta-reference
-        config:
-          service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
-          sinks: ${env.TELEMETRY_SINKS:=console}
      tool_runtime:
      - provider_id: brave-search
        provider_type: remote::brave-search
--- a/benchmarking/k8s-benchmark/stack_run_config.yaml
+++ b/benchmarking/k8s-benchmark/stack_run_config.yaml
@ -126,8 +126,6 @@ registered_resources:
    provider_id: rag-runtime
 server:
  port: 8323
-telemetry:
-  enabled: true
 vector_stores:
  default_provider_id: chromadb
  default_embedding_model:
--- a/docs/docs/concepts/apis/index.mdx
+++ b/docs/docs/concepts/apis/index.mdx
@ -17,7 +17,6 @@ A Llama Stack API is described as a collection of REST endpoints following OpenA
 - **Eval**: generate outputs (via Inference or Agents) and perform scoring
 - **VectorIO**: perform operations on vector stores, such as adding documents, searching, and deleting documents
 - **Files**: manage file uploads, storage, and retrieval
- **Telemetry**: collect telemetry data from the system
 - **Post Training**: fine-tune a model
 - **Tool Runtime**: interact with various tools and protocols
 - **Responses**: generate responses from an LLM
--- a/docs/docs/distributions/k8s/stack-configmap.yaml
+++ b/docs/docs/distributions/k8s/stack-configmap.yaml
@ -8,7 +8,6 @@ data:
    - inference
    - files
    - safety
-    - telemetry
    - tool_runtime
    - vector_io
    providers:
@ -73,12 +72,6 @@ data:
            db: ${env.POSTGRES_DB:=llamastack}
            user: ${env.POSTGRES_USER:=llamastack}
            password: ${env.POSTGRES_PASSWORD:=llamastack}
-      telemetry:
-      - provider_id: meta-reference
-        provider_type: inline::meta-reference
-        config:
-          service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
-          sinks: ${env.TELEMETRY_SINKS:=console}
      tool_runtime:
      - provider_id: brave-search
        provider_type: remote::brave-search
--- a/docs/docs/distributions/k8s/stack_run_config.yaml
+++ b/docs/docs/distributions/k8s/stack_run_config.yaml
@ -140,8 +140,6 @@ server:
  auth:
    provider_config:
      type: github_token
-telemetry:
-  enabled: true
 vector_stores:
  default_provider_id: chromadb
  default_embedding_model:
--- a/docs/docs/distributions/self_hosted_distro/starter.md
+++ b/docs/docs/distributions/self_hosted_distro/starter.md
@ -116,10 +116,6 @@ The following environment variables can be configured:
 - `BRAVE_SEARCH_API_KEY`: Brave Search API key
 - `TAVILY_SEARCH_API_KEY`: Tavily Search API key

-### Telemetry Configuration
- `OTEL_SERVICE_NAME`: OpenTelemetry service name
- `OTEL_EXPORTER_OTLP_ENDPOINT`: OpenTelemetry collector endpoint URL
-
 ## Enabling Providers

 You can enable specific providers by setting appropriate environment variables. For example,
@ -265,7 +261,7 @@ The starter distribution uses SQLite for local storage of various components:
 2. **Flexible Configuration**: Easy to enable/disable providers based on your needs
 3. **No Local GPU Required**: Most providers are cloud-based, making it accessible to developers without high-end hardware
 4. **Easy Migration**: Start with hosted providers and gradually move to local ones as needed
-5. **Production Ready**: Includes safety, evaluation, and telemetry components
+5. **Production Ready**: Includes safety and evaluation
 6. **Tool Integration**: Comes with web search, RAG, and model context protocol tools

 The starter distribution is ideal for developers who want to experiment with different AI providers, build prototypes quickly, or create applications that can work with multiple AI backends.
--- a/docs/docs/references/python_sdk_reference/index.md
+++ b/docs/docs/references/python_sdk_reference/index.md
@ -360,32 +360,6 @@ Methods:

 - <code title="post /v1/synthetic-data-generation/generate">client.synthetic_data_generation.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/synthetic_data_generation.py">generate</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/synthetic_data_generation_generate_params.py">params</a>) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/synthetic_data_generation_response.py">SyntheticDataGenerationResponse</a></code>

-## Telemetry
-
-Types:
-
-```python
-from llama_stack_client.types import (
-    QuerySpansResponse,
-    SpanWithStatus,
-    Trace,
-    TelemetryGetSpanResponse,
-    TelemetryGetSpanTreeResponse,
-    TelemetryQuerySpansResponse,
-    TelemetryQueryTracesResponse,
-)
-```
-
-Methods:
-
- <code title="get /v1/telemetry/traces/{trace_id}/spans/{span_id}">client.telemetry.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/telemetry.py">get_span</a>(span_id, \*, trace_id) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/telemetry_get_span_response.py">TelemetryGetSpanResponse</a></code>
- <code title="get /v1/telemetry/spans/{span_id}/tree">client.telemetry.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/telemetry.py">get_span_tree</a>(span_id, \*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/telemetry_get_span_tree_params.py">params</a>) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/telemetry_get_span_tree_response.py">TelemetryGetSpanTreeResponse</a></code>
- <code title="get /v1/telemetry/traces/{trace_id}">client.telemetry.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/telemetry.py">get_trace</a>(trace_id) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/trace.py">Trace</a></code>
- <code title="post /v1/telemetry/events">client.telemetry.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/telemetry.py">log_event</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/telemetry_log_event_params.py">params</a>) -> None</code>
- <code title="get /v1/telemetry/spans">client.telemetry.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/telemetry.py">query_spans</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/telemetry_query_spans_params.py">params</a>) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/telemetry_query_spans_response.py">TelemetryQuerySpansResponse</a></code>
- <code title="get /v1/telemetry/traces">client.telemetry.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/telemetry.py">query_traces</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/telemetry_query_traces_params.py">params</a>) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/telemetry_query_traces_response.py">TelemetryQueryTracesResponse</a></code>
- <code title="post /v1/telemetry/spans/export">client.telemetry.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/telemetry.py">save_spans_to_dataset</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/telemetry_save_spans_to_dataset_params.py">params</a>) -> None</code>
-
 ## Datasetio

 Types:
--- a/docs/src/pages/index.js
+++ b/docs/src/pages/index.js
@ -13,7 +13,7 @@ function HomepageHeader() {
        <div className={styles.heroContent}>
          <h1 className={styles.heroTitle}>Build AI Applications with Llama Stack</h1>
          <p className={styles.heroSubtitle}>
-            Unified APIs for Inference, RAG, Agents, Tools, Safety, and Telemetry
+            Unified APIs for Inference, RAG, Agents, Tools, and Safety
          </p>
          <div className={styles.buttons}>
            <Link
@ -206,7 +206,7 @@ export default function Home() {
  return (
    <Layout
      title="Build AI Applications"
-      description="The open-source framework for building generative AI applications with unified APIs for Inference, RAG, Agents, Tools, Safety, and Telemetry.">
+      description="The open-source framework for building generative AI applications with unified APIs for Inference, RAG, Agents, Tools, Safety, and Evals.">
      <HomepageHeader />
      <main>
        <QuickStart />
--- a/scripts/openapi_generator/schema_collection.py
+++ b/scripts/openapi_generator/schema_collection.py
@ -8,7 +8,6 @@
 Schema discovery and collection for OpenAPI generation.
 """

-import importlib
 from typing import Any


@ -20,23 +19,6 @@ def _ensure_components_schemas(openapi_schema: dict[str, Any]) -> None:
        openapi_schema["components"]["schemas"] = {}


-def _load_extra_schema_modules() -> None:
-    """
-    Import modules outside llama_stack_api that use schema_utils to register schemas.
-
-    The API package already imports its submodules via __init__, but server-side modules
-    like telemetry need to be imported explicitly so their decorator side effects run.
-    """
-    extra_modules = [
-        "llama_stack.core.telemetry.telemetry",
-    ]
-    for module_name in extra_modules:
-        try:
-            importlib.import_module(module_name)
-        except ImportError:
-            continue
-
-
 def _extract_and_fix_defs(schema: dict[str, Any], openapi_schema: dict[str, Any]) -> None:
    """
    Extract $defs from a schema, move them to components/schemas, and fix references.
@ -79,9 +61,6 @@ def _ensure_json_schema_types_included(openapi_schema: dict[str, Any]) -> dict[s
        iter_registered_schema_types,
    )

-    # Import extra modules (e.g., telemetry) whose schema registrations live outside llama_stack_api
-    _load_extra_schema_modules()
-
    # Handle explicitly registered schemas first (union types, Annotated structs, etc.)
    for registration_info in iter_registered_schema_types():
        schema_type = registration_info.type
--- a/src/llama_stack/core/datatypes.py
+++ b/src/llama_stack/core/datatypes.py
@ -371,12 +371,6 @@ class SafetyConfig(BaseModel):
    )


-class TelemetryConfig(BaseModel):
-    """Configuration for telemetry collection."""
-
-    enabled: bool = Field(default=False, description="Whether telemetry collection is enabled")
-
-
 class QuotaPeriod(StrEnum):
    DAY = "day"

@ -542,11 +536,6 @@ can be instantiated multiple times (with different configs) if necessary.
        description="Configuration for default moderations model",
    )

-    telemetry: TelemetryConfig | None = Field(
-        default=None,
-        description="Configuration for telemetry collection",
-    )
-
    @field_validator("external_providers_dir")
    @classmethod
    def validate_external_providers_dir(cls, v):
--- a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml
+++ b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml
@ -281,8 +281,6 @@ registered_resources:
    provider_id: rag-runtime
 server:
  port: 8321
-telemetry:
-  enabled: true
 vector_stores:
  default_provider_id: faiss
  default_embedding_model:
--- a/src/llama_stack/distributions/ci-tests/run.yaml
+++ b/src/llama_stack/distributions/ci-tests/run.yaml
@ -272,8 +272,6 @@ registered_resources:
    provider_id: rag-runtime
 server:
  port: 8321
-telemetry:
-  enabled: true
 vector_stores:
  default_provider_id: faiss
  default_embedding_model:
--- a/src/llama_stack/distributions/dell/run-with-safety.yaml
+++ b/src/llama_stack/distributions/dell/run-with-safety.yaml
@ -140,5 +140,3 @@ registered_resources:
    provider_id: rag-runtime
 server:
  port: 8321
-telemetry:
-  enabled: true
--- a/src/llama_stack/distributions/dell/run.yaml
+++ b/src/llama_stack/distributions/dell/run.yaml
@ -131,5 +131,3 @@ registered_resources:
    provider_id: rag-runtime
 server:
  port: 8321
-telemetry:
-  enabled: true
--- a/src/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml
+++ b/src/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml
@ -153,5 +153,3 @@ registered_resources:
    provider_id: rag-runtime
 server:
  port: 8321
-telemetry:
-  enabled: true
--- a/src/llama_stack/distributions/meta-reference-gpu/run.yaml
+++ b/src/llama_stack/distributions/meta-reference-gpu/run.yaml
@ -138,5 +138,3 @@ registered_resources:
    provider_id: rag-runtime
 server:
  port: 8321
-telemetry:
-  enabled: true
--- a/src/llama_stack/distributions/nvidia/run-with-safety.yaml
+++ b/src/llama_stack/distributions/nvidia/run-with-safety.yaml
@ -135,5 +135,3 @@ registered_resources:
    provider_id: rag-runtime
 server:
  port: 8321
-telemetry:
-  enabled: true
--- a/src/llama_stack/distributions/nvidia/run.yaml
+++ b/src/llama_stack/distributions/nvidia/run.yaml
@ -114,5 +114,3 @@ registered_resources:
    provider_id: rag-runtime
 server:
  port: 8321
-telemetry:
-  enabled: true
--- a/src/llama_stack/distributions/oci/run.yaml
+++ b/src/llama_stack/distributions/oci/run.yaml
@ -132,5 +132,3 @@ registered_resources:
    provider_id: tavily-search
 server:
  port: 8321
-telemetry:
-  enabled: true
--- a/src/llama_stack/distributions/open-benchmark/run.yaml
+++ b/src/llama_stack/distributions/open-benchmark/run.yaml
@ -251,5 +251,3 @@ registered_resources:
    provider_id: rag-runtime
 server:
  port: 8321
-telemetry:
-  enabled: true
--- a/src/llama_stack/distributions/postgres-demo/run.yaml
+++ b/src/llama_stack/distributions/postgres-demo/run.yaml
@ -114,5 +114,3 @@ registered_resources:
    provider_id: rag-runtime
 server:
  port: 8321
-telemetry:
-  enabled: true
--- a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml
+++ b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml
@ -284,8 +284,6 @@ registered_resources:
    provider_id: rag-runtime
 server:
  port: 8321
-telemetry:
-  enabled: true
 vector_stores:
  default_provider_id: faiss
  default_embedding_model:
--- a/src/llama_stack/distributions/starter-gpu/run.yaml
+++ b/src/llama_stack/distributions/starter-gpu/run.yaml
@ -275,8 +275,6 @@ registered_resources:
    provider_id: rag-runtime
 server:
  port: 8321
-telemetry:
-  enabled: true
 vector_stores:
  default_provider_id: faiss
  default_embedding_model:
--- a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
+++ b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
@ -281,8 +281,6 @@ registered_resources:
    provider_id: rag-runtime
 server:
  port: 8321
-telemetry:
-  enabled: true
 vector_stores:
  default_provider_id: faiss
  default_embedding_model:
--- a/src/llama_stack/distributions/starter/run.yaml
+++ b/src/llama_stack/distributions/starter/run.yaml
@ -272,8 +272,6 @@ registered_resources:
    provider_id: rag-runtime
 server:
  port: 8321
-telemetry:
-  enabled: true
 vector_stores:
  default_provider_id: faiss
  default_embedding_model:
--- a/src/llama_stack/distributions/template.py
+++ b/src/llama_stack/distributions/template.py
@ -24,7 +24,6 @@ from llama_stack.core.datatypes import (
    Provider,
    SafetyConfig,
    ShieldInput,
-    TelemetryConfig,
    ToolGroupInput,
    VectorStoresConfig,
 )
@ -189,7 +188,6 @@ class RunConfigSettings(BaseModel):
    default_benchmarks: list[BenchmarkInput] | None = None
    vector_stores_config: VectorStoresConfig | None = None
    safety_config: SafetyConfig | None = None
-    telemetry: TelemetryConfig = Field(default_factory=lambda: TelemetryConfig(enabled=True))
    storage_backends: dict[str, Any] | None = None
    storage_stores: dict[str, Any] | None = None

@ -289,7 +287,6 @@ class RunConfigSettings(BaseModel):
            "server": {
                "port": 8321,
            },
-            "telemetry": self.telemetry.model_dump(exclude_none=True) if self.telemetry else None,
        }

        if self.vector_stores_config:
--- a/src/llama_stack/distributions/watsonx/run.yaml
+++ b/src/llama_stack/distributions/watsonx/run.yaml
@ -132,5 +132,3 @@ registered_resources:
    provider_id: rag-runtime
 server:
  port: 8321
-telemetry:
-  enabled: true
--- a/src/llama_stack/log.py
+++ b/src/llama_stack/log.py
@ -37,7 +37,6 @@ CATEGORIES = [
    "eval",
    "tools",
    "client",
-    "telemetry",
    "openai",
    "openai_responses",
    "openai_conversations",
--- a/tests/unit/cli/test_stack_config.py
+++ b/tests/unit/cli/test_stack_config.py
@ -155,9 +155,6 @@ def old_config():
              provider_type: inline::meta-reference
              config: {{}}
        api_providers:
-          telemetry:
-            provider_type: noop
-            config: {{}}
    """
    )

@ -181,7 +178,7 @@ def test_parse_and_maybe_upgrade_config_up_to_date(up_to_date_config):
 def test_parse_and_maybe_upgrade_config_old_format(old_config):
    result = parse_and_maybe_upgrade_config(old_config)
    assert result.version == LLAMA_STACK_RUN_CONFIG_VERSION
-    assert all(api in result.providers for api in ["inference", "safety", "memory", "telemetry"])
+    assert all(api in result.providers for api in ["inference", "safety", "memory"])
    safety_provider = result.providers["safety"][0]
    assert safety_provider.provider_type == "inline::meta-reference"
    assert "llama_guard_shield" in safety_provider.config