From b6e9f41041f5d64e4500e3d0b4112b4a9d0ebe8d Mon Sep 17 00:00:00 2001
From: ehhuang <ehhuang@users.noreply.github.com>
Date: Tue, 7 Oct 2025 19:16:51 -0700
Subject: [PATCH 1/9] chore: Revert "fix: fix nvidia provider (#3716)" (#3730)

This reverts commit c940fe79389a930923bd7f30787c467ed6f132c3.

@wukaixingxp I stamped to fast. Let's wait for @mattf's review.
---
 .../remote/inference/nvidia/nvidia.py         | 28 +++++--------------
 1 file changed, 7 insertions(+), 21 deletions(-)

diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py
index b2ad060fb..7a2697327 100644
--- a/llama_stack/providers/remote/inference/nvidia/nvidia.py
+++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py
@@ -13,7 +13,6 @@ from llama_stack.apis.inference import (
     OpenAIEmbeddingUsage,
 )
 from llama_stack.log import get_logger
-from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 
 from . import NVIDIAConfig
@@ -22,7 +21,9 @@ from .utils import _is_nvidia_hosted
 logger = get_logger(name=__name__, category="inference::nvidia")
 
 
-class NVIDIAInferenceAdapter(OpenAIMixin, ModelRegistryHelper):
+class NVIDIAInferenceAdapter(OpenAIMixin):
+    config: NVIDIAConfig
+
     """
     NVIDIA Inference Adapter for Llama Stack.
 
@@ -36,27 +37,12 @@ class NVIDIAInferenceAdapter(OpenAIMixin, ModelRegistryHelper):
     - ModelRegistryHelper.check_model_availability() just returns False and shows a warning
     """
 
-    def __init__(self, config: NVIDIAConfig) -> None:
-        """Initialize the NVIDIA inference adapter with configuration."""
-        # Initialize ModelRegistryHelper with empty model entries since NVIDIA uses dynamic model discovery
-        ModelRegistryHelper.__init__(self, model_entries=[], allowed_models=config.allowed_models)
-        self.config = config
-
     # source: https://docs.nvidia.com/nim/nemo-retriever/text-embedding/latest/support-matrix.html
     embedding_model_metadata: dict[str, dict[str, int]] = {
-        "nvidia/llama-3.2-nv-embedqa-1b-v2": {
-            "embedding_dimension": 2048,
-            "context_length": 8192,
-        },
+        "nvidia/llama-3.2-nv-embedqa-1b-v2": {"embedding_dimension": 2048, "context_length": 8192},
         "nvidia/nv-embedqa-e5-v5": {"embedding_dimension": 512, "context_length": 1024},
-        "nvidia/nv-embedqa-mistral-7b-v2": {
-            "embedding_dimension": 512,
-            "context_length": 4096,
-        },
-        "snowflake/arctic-embed-l": {
-            "embedding_dimension": 512,
-            "context_length": 1024,
-        },
+        "nvidia/nv-embedqa-mistral-7b-v2": {"embedding_dimension": 512, "context_length": 4096},
+        "snowflake/arctic-embed-l": {"embedding_dimension": 512, "context_length": 1024},
     }
 
     async def initialize(self) -> None:
@@ -109,7 +95,7 @@ class NVIDIAInferenceAdapter(OpenAIMixin, ModelRegistryHelper):
         response = await self.client.embeddings.create(
             model=await self._get_provider_model_id(model),
             input=input,
-            encoding_format=(encoding_format if encoding_format is not None else NOT_GIVEN),
+            encoding_format=encoding_format if encoding_format is not None else NOT_GIVEN,
             dimensions=dimensions if dimensions is not None else NOT_GIVEN,
             user=user if user is not None else NOT_GIVEN,
             extra_body=extra_body,

From 1ac320b7e6a44e7156c87e41fb8b98a110a1a5d1 Mon Sep 17 00:00:00 2001
From: slekkala1 <swapna942@meta.com>
Date: Tue, 7 Oct 2025 20:26:02 -0700
Subject: [PATCH 2/9] chore: remove dead code (#3729)

# What does this PR do?
Removing some dead code, found by vulture and checked by claude that
there are no references or imports for these


## Test Plan
CI
---
 llama_stack/models/llama/prompt_format.py     |  54 -----
 .../agents/meta_reference/agent_instance.py   |   7 -
 .../inline/tool_runtime/rag/memory.py         |   6 -
 .../remote/inference/nvidia/openai_utils.py   | 217 ------------------
 .../remote/inference/nvidia/utils.py          |  45 ----
 .../utils/memory/openai_vector_store_mixin.py |  17 +-
 .../providers/utils/memory/vector_store.py    |  21 --
 7 files changed, 8 insertions(+), 359 deletions(-)
 delete mode 100644 llama_stack/providers/remote/inference/nvidia/openai_utils.py

diff --git a/llama_stack/models/llama/prompt_format.py b/llama_stack/models/llama/prompt_format.py
index 6191df61a..16e4068d7 100644
--- a/llama_stack/models/llama/prompt_format.py
+++ b/llama_stack/models/llama/prompt_format.py
@@ -11,19 +11,13 @@
 # top-level folder for each specific model found within the models/ directory at
 # the top-level of this source tree.
 
-import json
 import textwrap
-from pathlib import Path
 
 from pydantic import BaseModel, Field
 
 from llama_stack.models.llama.datatypes import (
     RawContent,
-    RawMediaItem,
     RawMessage,
-    RawTextItem,
-    StopReason,
-    ToolCall,
     ToolPromptFormat,
 )
 from llama_stack.models.llama.llama4.tokenizer import Tokenizer
@@ -175,25 +169,6 @@ def llama3_1_builtin_code_interpreter_dialog(tool_prompt_format=ToolPromptFormat
     return messages
 
 
-def llama3_1_builtin_tool_call_with_image_dialog(
-    tool_prompt_format=ToolPromptFormat.json,
-):
-    this_dir = Path(__file__).parent
-    with open(this_dir / "llama3/dog.jpg", "rb") as f:
-        img = f.read()
-
-    interface = LLama31Interface(tool_prompt_format)
-
-    messages = interface.system_messages(**system_message_builtin_tools_only())
-    messages += interface.user_message(content=[RawMediaItem(data=img), RawTextItem(text="What is this dog breed?")])
-    messages += interface.assistant_response_messages(
-        "Based on the description of the dog in the image, it appears to be a small breed dog, possibly a terrier mix",
-        StopReason.end_of_turn,
-    )
-    messages += interface.user_message("Search the web for some food recommendations for the indentified breed")
-    return messages
-
-
 def llama3_1_custom_tool_call_dialog(tool_prompt_format=ToolPromptFormat.json):
     interface = LLama31Interface(tool_prompt_format)
 
@@ -202,35 +177,6 @@ def llama3_1_custom_tool_call_dialog(tool_prompt_format=ToolPromptFormat.json):
     return messages
 
 
-def llama3_1_e2e_tool_call_dialog(tool_prompt_format=ToolPromptFormat.json):
-    tool_response = json.dumps(["great song1", "awesome song2", "cool song3"])
-    interface = LLama31Interface(tool_prompt_format)
-
-    messages = interface.system_messages(**system_message_custom_tools_only())
-    messages += interface.user_message(content="Use tools to get latest trending songs")
-    messages.append(
-        RawMessage(
-            role="assistant",
-            content="",
-            stop_reason=StopReason.end_of_message,
-            tool_calls=[
-                ToolCall(
-                    call_id="call_id",
-                    tool_name="trending_songs",
-                    arguments={"n": "10", "genre": "latest"},
-                )
-            ],
-        ),
-    )
-    messages.append(
-        RawMessage(
-            role="assistant",
-            content=tool_response,
-        )
-    )
-    return messages
-
-
 def llama3_2_user_assistant_conversation():
     return UseCase(
         title="User and assistant conversation",
diff --git a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
index c2ce9aa7b..b17c720e9 100644
--- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
+++ b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
@@ -7,8 +7,6 @@
 import copy
 import json
 import re
-import secrets
-import string
 import uuid
 import warnings
 from collections.abc import AsyncGenerator
@@ -84,11 +82,6 @@ from llama_stack.providers.utils.telemetry import tracing
 from .persistence import AgentPersistence
 from .safety import SafetyException, ShieldRunnerMixin
 
-
-def make_random_string(length: int = 8):
-    return "".join(secrets.choice(string.ascii_letters + string.digits) for _ in range(length))
-
-
 TOOLS_ATTACHMENT_KEY_REGEX = re.compile(r"__tools_attachment__=(\{.*?\})")
 MEMORY_QUERY_TOOL = "knowledge_search"
 WEB_SEARCH_TOOL = "web_search"
diff --git a/llama_stack/providers/inline/tool_runtime/rag/memory.py b/llama_stack/providers/inline/tool_runtime/rag/memory.py
index aac86a056..3ccfd0bcb 100644
--- a/llama_stack/providers/inline/tool_runtime/rag/memory.py
+++ b/llama_stack/providers/inline/tool_runtime/rag/memory.py
@@ -8,8 +8,6 @@ import asyncio
 import base64
 import io
 import mimetypes
-import secrets
-import string
 from typing import Any
 
 import httpx
@@ -52,10 +50,6 @@ from .context_retriever import generate_rag_query
 log = get_logger(name=__name__, category="tool_runtime")
 
 
-def make_random_string(length: int = 8):
-    return "".join(secrets.choice(string.ascii_letters + string.digits) for _ in range(length))
-
-
 async def raw_data_from_doc(doc: RAGDocument) -> tuple[bytes, str]:
     """Get raw binary data and mime type from a RAGDocument for file upload."""
     if isinstance(doc.content, URL):
diff --git a/llama_stack/providers/remote/inference/nvidia/openai_utils.py b/llama_stack/providers/remote/inference/nvidia/openai_utils.py
deleted file mode 100644
index 0b0d7fcf3..000000000
--- a/llama_stack/providers/remote/inference/nvidia/openai_utils.py
+++ /dev/null
@@ -1,217 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import warnings
-from collections.abc import AsyncGenerator
-from typing import Any
-
-from openai import AsyncStream
-from openai.types.chat.chat_completion import (
-    Choice as OpenAIChoice,
-)
-from openai.types.completion import Completion as OpenAICompletion
-from openai.types.completion_choice import Logprobs as OpenAICompletionLogprobs
-
-from llama_stack.apis.inference import (
-    ChatCompletionRequest,
-    CompletionRequest,
-    CompletionResponse,
-    CompletionResponseStreamChunk,
-    GreedySamplingStrategy,
-    JsonSchemaResponseFormat,
-    TokenLogProbs,
-    TopKSamplingStrategy,
-    TopPSamplingStrategy,
-)
-from llama_stack.providers.utils.inference.openai_compat import (
-    _convert_openai_finish_reason,
-    convert_message_to_openai_dict_new,
-    convert_tooldef_to_openai_tool,
-)
-
-
-async def convert_chat_completion_request(
-    request: ChatCompletionRequest,
-    n: int = 1,
-) -> dict:
-    """
-    Convert a ChatCompletionRequest to an OpenAI API-compatible dictionary.
-    """
-    # model -> model
-    # messages -> messages
-    # sampling_params  TODO(mattf): review strategy
-    #  strategy=greedy -> nvext.top_k = -1, temperature = temperature
-    #  strategy=top_p -> nvext.top_k = -1, top_p = top_p
-    #  strategy=top_k -> nvext.top_k = top_k
-    #  temperature -> temperature
-    #  top_p -> top_p
-    #  top_k -> nvext.top_k
-    #  max_tokens -> max_tokens
-    #  repetition_penalty -> nvext.repetition_penalty
-    # response_format -> GrammarResponseFormat TODO(mf)
-    # response_format -> JsonSchemaResponseFormat: response_format = "json_object" & nvext["guided_json"] = json_schema
-    # tools -> tools
-    # tool_choice ("auto", "required") -> tool_choice
-    # tool_prompt_format -> TBD
-    # stream -> stream
-    # logprobs -> logprobs
-
-    if request.response_format and not isinstance(request.response_format, JsonSchemaResponseFormat):
-        raise ValueError(
-            f"Unsupported response format: {request.response_format}. Only JsonSchemaResponseFormat is supported."
-        )
-
-    nvext = {}
-    payload: dict[str, Any] = dict(
-        model=request.model,
-        messages=[await convert_message_to_openai_dict_new(message) for message in request.messages],
-        stream=request.stream,
-        n=n,
-        extra_body=dict(nvext=nvext),
-        extra_headers={
-            b"User-Agent": b"llama-stack: nvidia-inference-adapter",
-        },
-    )
-
-    if request.response_format:
-        # server bug - setting guided_json changes the behavior of response_format resulting in an error
-        # payload.update(response_format="json_object")
-        nvext.update(guided_json=request.response_format.json_schema)
-
-    if request.tools:
-        payload.update(tools=[convert_tooldef_to_openai_tool(tool) for tool in request.tools])
-        if request.tool_config.tool_choice:
-            payload.update(
-                tool_choice=request.tool_config.tool_choice.value
-            )  # we cannot include tool_choice w/o tools, server will complain
-
-    if request.logprobs:
-        payload.update(logprobs=True)
-        payload.update(top_logprobs=request.logprobs.top_k)
-
-    if request.sampling_params:
-        nvext.update(repetition_penalty=request.sampling_params.repetition_penalty)
-
-        if request.sampling_params.max_tokens:
-            payload.update(max_tokens=request.sampling_params.max_tokens)
-
-        strategy = request.sampling_params.strategy
-        if isinstance(strategy, TopPSamplingStrategy):
-            nvext.update(top_k=-1)
-            payload.update(top_p=strategy.top_p)
-            payload.update(temperature=strategy.temperature)
-        elif isinstance(strategy, TopKSamplingStrategy):
-            if strategy.top_k != -1 and strategy.top_k < 1:
-                warnings.warn("top_k must be -1 or >= 1", stacklevel=2)
-            nvext.update(top_k=strategy.top_k)
-        elif isinstance(strategy, GreedySamplingStrategy):
-            nvext.update(top_k=-1)
-        else:
-            raise ValueError(f"Unsupported sampling strategy: {strategy}")
-
-    return payload
-
-
-def convert_completion_request(
-    request: CompletionRequest,
-    n: int = 1,
-) -> dict:
-    """
-    Convert a ChatCompletionRequest to an OpenAI API-compatible dictionary.
-    """
-    # model -> model
-    # prompt -> prompt
-    # sampling_params  TODO(mattf): review strategy
-    #  strategy=greedy -> nvext.top_k = -1, temperature = temperature
-    #  strategy=top_p -> nvext.top_k = -1, top_p = top_p
-    #  strategy=top_k -> nvext.top_k = top_k
-    #  temperature -> temperature
-    #  top_p -> top_p
-    #  top_k -> nvext.top_k
-    #  max_tokens -> max_tokens
-    #  repetition_penalty -> nvext.repetition_penalty
-    # response_format -> nvext.guided_json
-    # stream -> stream
-    # logprobs.top_k -> logprobs
-
-    nvext = {}
-    payload: dict[str, Any] = dict(
-        model=request.model,
-        prompt=request.content,
-        stream=request.stream,
-        extra_body=dict(nvext=nvext),
-        extra_headers={
-            b"User-Agent": b"llama-stack: nvidia-inference-adapter",
-        },
-        n=n,
-    )
-
-    if request.response_format:
-        # this is not openai compliant, it is a nim extension
-        nvext.update(guided_json=request.response_format.json_schema)
-
-    if request.logprobs:
-        payload.update(logprobs=request.logprobs.top_k)
-
-    if request.sampling_params:
-        nvext.update(repetition_penalty=request.sampling_params.repetition_penalty)
-
-        if request.sampling_params.max_tokens:
-            payload.update(max_tokens=request.sampling_params.max_tokens)
-
-        if request.sampling_params.strategy == "top_p":
-            nvext.update(top_k=-1)
-            payload.update(top_p=request.sampling_params.top_p)
-        elif request.sampling_params.strategy == "top_k":
-            if request.sampling_params.top_k != -1 and request.sampling_params.top_k < 1:
-                warnings.warn("top_k must be -1 or >= 1", stacklevel=2)
-            nvext.update(top_k=request.sampling_params.top_k)
-        elif request.sampling_params.strategy == "greedy":
-            nvext.update(top_k=-1)
-            payload.update(temperature=request.sampling_params.temperature)
-
-    return payload
-
-
-def _convert_openai_completion_logprobs(
-    logprobs: OpenAICompletionLogprobs | None,
-) -> list[TokenLogProbs] | None:
-    """
-    Convert an OpenAI CompletionLogprobs into a list of TokenLogProbs.
-    """
-    if not logprobs:
-        return None
-
-    return [TokenLogProbs(logprobs_by_token=logprobs) for logprobs in logprobs.top_logprobs]
-
-
-def convert_openai_completion_choice(
-    choice: OpenAIChoice,
-) -> CompletionResponse:
-    """
-    Convert an OpenAI Completion Choice into a CompletionResponse.
-    """
-    return CompletionResponse(
-        content=choice.text,
-        stop_reason=_convert_openai_finish_reason(choice.finish_reason),
-        logprobs=_convert_openai_completion_logprobs(choice.logprobs),
-    )
-
-
-async def convert_openai_completion_stream(
-    stream: AsyncStream[OpenAICompletion],
-) -> AsyncGenerator[CompletionResponse, None]:
-    """
-    Convert a stream of OpenAI Completions into a stream
-    of ChatCompletionResponseStreamChunks.
-    """
-    async for chunk in stream:
-        choice = chunk.choices[0]
-        yield CompletionResponseStreamChunk(
-            delta=choice.text,
-            stop_reason=_convert_openai_finish_reason(choice.finish_reason),
-            logprobs=_convert_openai_completion_logprobs(choice.logprobs),
-        )
diff --git a/llama_stack/providers/remote/inference/nvidia/utils.py b/llama_stack/providers/remote/inference/nvidia/utils.py
index b8431e859..46ee939d9 100644
--- a/llama_stack/providers/remote/inference/nvidia/utils.py
+++ b/llama_stack/providers/remote/inference/nvidia/utils.py
@@ -4,53 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import httpx
-
-from llama_stack.log import get_logger
-
 from . import NVIDIAConfig
 
-logger = get_logger(name=__name__, category="inference::nvidia")
-
 
 def _is_nvidia_hosted(config: NVIDIAConfig) -> bool:
     return "integrate.api.nvidia.com" in config.url
-
-
-async def _get_health(url: str) -> tuple[bool, bool]:
-    """
-    Query {url}/v1/health/{live,ready} to check if the server is running and ready
-
-    Args:
-        url (str): URL of the server
-
-    Returns:
-        Tuple[bool, bool]: (is_live, is_ready)
-    """
-    async with httpx.AsyncClient() as client:
-        live = await client.get(f"{url}/v1/health/live")
-        ready = await client.get(f"{url}/v1/health/ready")
-        return live.status_code == 200, ready.status_code == 200
-
-
-async def check_health(config: NVIDIAConfig) -> None:
-    """
-    Check if the server is running and ready
-
-    Args:
-        url (str): URL of the server
-
-    Raises:
-        RuntimeError: If the server is not running or ready
-    """
-    if not _is_nvidia_hosted(config):
-        logger.info("Checking NVIDIA NIM health...")
-        try:
-            is_live, is_ready = await _get_health(config.url)
-            if not is_live:
-                raise ConnectionError("NVIDIA NIM is not running")
-            if not is_ready:
-                raise ConnectionError("NVIDIA NIM is not ready")
-            # TODO(mf): should we wait for the server to be ready?
-        except httpx.ConnectError as e:
-            raise ConnectionError(f"Failed to connect to NVIDIA NIM: {e}") from e
diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
index 2a5177f93..c179eba6c 100644
--- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@@ -296,15 +296,14 @@ class OpenAIVectorStoreMixin(ABC):
     async def shutdown(self) -> None:
         """Clean up mixin resources including background tasks."""
         # Cancel any running file batch tasks gracefully
-        if hasattr(self, "_file_batch_tasks"):
-            tasks_to_cancel = list(self._file_batch_tasks.items())
-            for _, task in tasks_to_cancel:
-                if not task.done():
-                    task.cancel()
-                    try:
-                        await task
-                    except asyncio.CancelledError:
-                        pass
+        tasks_to_cancel = list(self._file_batch_tasks.items())
+        for _, task in tasks_to_cancel:
+            if not task.done():
+                task.cancel()
+                try:
+                    await task
+                except asyncio.CancelledError:
+                    pass
 
     @abstractmethod
     async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
diff --git a/llama_stack/providers/utils/memory/vector_store.py b/llama_stack/providers/utils/memory/vector_store.py
index 857fbe910..c0534a875 100644
--- a/llama_stack/providers/utils/memory/vector_store.py
+++ b/llama_stack/providers/utils/memory/vector_store.py
@@ -20,7 +20,6 @@ from pydantic import BaseModel
 from llama_stack.apis.common.content_types import (
     URL,
     InterleavedContent,
-    TextContentItem,
 )
 from llama_stack.apis.tools import RAGDocument
 from llama_stack.apis.vector_dbs import VectorDB
@@ -129,26 +128,6 @@ def content_from_data_and_mime_type(data: bytes | str, mime_type: str | None, en
         return ""
 
 
-def concat_interleaved_content(content: list[InterleavedContent]) -> InterleavedContent:
-    """concatenate interleaved content into a single list. ensure that 'str's are converted to TextContentItem when in a list"""
-
-    ret = []
-
-    def _process(c):
-        if isinstance(c, str):
-            ret.append(TextContentItem(text=c))
-        elif isinstance(c, list):
-            for item in c:
-                _process(item)
-        else:
-            ret.append(c)
-
-    for c in content:
-        _process(c)
-
-    return ret
-
-
 async def content_from_doc(doc: RAGDocument) -> str:
     if isinstance(doc.content, URL):
         if doc.content.uri.startswith("data:"):

From a3f50727769f4a2060a0378522f7620d5d29c9c9 Mon Sep 17 00:00:00 2001
From: ehhuang <ehhuang@users.noreply.github.com>
Date: Tue, 7 Oct 2025 20:58:15 -0700
Subject: [PATCH 3/9] chore!: remove --env from `llama stack run` (#3711)

# What does this PR do?
user can simply set env vars in the beginning of the command.`FOO=BAR
llama stack run ...`

## Test Plan
Run
TELEMETRY_SINKS=coneol uv run --with llama-stack llama stack build
--distro=starter --image-type=venv --run


---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with
[ReviewStack](https://reviewstack.dev/llamastack/llama-stack/pull/3711).
* #3714
* __->__ #3711
---
 .github/workflows/integration-auth-tests.yml  |  2 +-
 .../test-external-provider-module.yml         |  2 +-
 .github/workflows/test-external.yml           |  2 +-
 docs/docs/advanced_apis/post_training.mdx     |  2 +-
 docs/docs/building_applications/tools.mdx     |  9 +--
 docs/docs/contributing/new_api_provider.mdx   |  2 +-
 docs/docs/distributions/building_distro.mdx   | 28 ++++----
 docs/docs/distributions/configuration.mdx     |  9 +--
 .../remote_hosted_distro/watsonx.md           |  8 +--
 .../distributions/self_hosted_distro/dell.md  | 44 ++++++-------
 .../self_hosted_distro/meta-reference-gpu.md  | 20 +++---
 .../self_hosted_distro/nvidia.md              | 10 +--
 .../getting_started/detailed_tutorial.mdx     |  8 +--
 docs/getting_started.ipynb                    |  4 +-
 docs/getting_started_llama4.ipynb             |  4 +-
 docs/getting_started_llama_api.ipynb          |  4 +-
 docs/quick_start.ipynb                        |  4 +-
 docs/zero_to_hero_guide/README.md             | 11 ++--
 llama_stack/cli/stack/_build.py               | 24 +++++--
 llama_stack/cli/stack/run.py                  | 66 ++++---------------
 llama_stack/core/stack.py                     | 16 -----
 llama_stack/core/start_stack.sh               | 13 +---
 .../distributions/dell/doc_template.md        | 42 ++++++------
 .../meta-reference-gpu/doc_template.md        | 20 +++---
 .../distributions/nvidia/doc_template.md      | 10 +--
 scripts/install.sh                            |  4 +-
 scripts/integration-tests.sh                  |  2 +-
 27 files changed, 152 insertions(+), 218 deletions(-)

diff --git a/.github/workflows/integration-auth-tests.yml b/.github/workflows/integration-auth-tests.yml
index 238fed683..f9c42ef8a 100644
--- a/.github/workflows/integration-auth-tests.yml
+++ b/.github/workflows/integration-auth-tests.yml
@@ -86,7 +86,7 @@ jobs:
 
           # avoid line breaks in the server log, especially because we grep it below.
           export COLUMNS=1984
-          nohup uv run llama stack run $run_dir/run.yaml --image-type venv > server.log 2>&1 &
+          nohup uv run llama stack run $run_dir/run.yaml > server.log 2>&1 &
 
       - name: Wait for Llama Stack server to be ready
         run: |
diff --git a/.github/workflows/test-external-provider-module.yml b/.github/workflows/test-external-provider-module.yml
index 8a757b068..b43cefb27 100644
--- a/.github/workflows/test-external-provider-module.yml
+++ b/.github/workflows/test-external-provider-module.yml
@@ -59,7 +59,7 @@ jobs:
           # Use the virtual environment created by the build step (name comes from build config)
           source ramalama-stack-test/bin/activate
           uv pip list
-          nohup llama stack run tests/external/ramalama-stack/run.yaml --image-type ${{ matrix.image-type }} > server.log 2>&1 &
+          nohup llama stack run tests/external/ramalama-stack/run.yaml > server.log 2>&1 &
 
       - name: Wait for Llama Stack server to be ready
         run: |
diff --git a/.github/workflows/test-external.yml b/.github/workflows/test-external.yml
index 7ee467451..a008b17af 100644
--- a/.github/workflows/test-external.yml
+++ b/.github/workflows/test-external.yml
@@ -59,7 +59,7 @@ jobs:
           # Use the virtual environment created by the build step (name comes from build config)
           source ci-test/bin/activate
           uv pip list
-          nohup llama stack run tests/external/run-byoa.yaml --image-type ${{ matrix.image-type }} > server.log 2>&1 &
+          nohup llama stack run tests/external/run-byoa.yaml > server.log 2>&1 &
 
       - name: Wait for Llama Stack server to be ready
         run: |
diff --git a/docs/docs/advanced_apis/post_training.mdx b/docs/docs/advanced_apis/post_training.mdx
index 516ac07e1..43bfaea91 100644
--- a/docs/docs/advanced_apis/post_training.mdx
+++ b/docs/docs/advanced_apis/post_training.mdx
@@ -52,7 +52,7 @@ You can access the HuggingFace trainer via the `starter` distribution:
 
 ```bash
 llama stack build --distro starter --image-type venv
-llama stack run --image-type venv ~/.llama/distributions/starter/starter-run.yaml
+llama stack run ~/.llama/distributions/starter/starter-run.yaml
 ```
 
 ### Usage Example
diff --git a/docs/docs/building_applications/tools.mdx b/docs/docs/building_applications/tools.mdx
index e5d9c46f9..3b78ec57b 100644
--- a/docs/docs/building_applications/tools.mdx
+++ b/docs/docs/building_applications/tools.mdx
@@ -219,13 +219,10 @@ group_tools = client.tools.list_tools(toolgroup_id="search_tools")
 <TabItem value="setup" label="Setup & Configuration">
 
 1. Start by registering a Tavily API key at [Tavily](https://tavily.com/).
-2. [Optional] Provide the API key directly to the Llama Stack server
+2. [Optional] Set the API key in your environment before starting the Llama Stack server
 ```bash
 export TAVILY_SEARCH_API_KEY="your key"
 ```
-```bash
---env TAVILY_SEARCH_API_KEY=${TAVILY_SEARCH_API_KEY}
-```
 
 </TabItem>
 <TabItem value="implementation" label="Implementation">
@@ -273,9 +270,9 @@ for log in EventLogger().log(response):
 <TabItem value="setup" label="Setup & Configuration">
 
 1. Start by registering for a WolframAlpha API key at [WolframAlpha Developer Portal](https://developer.wolframalpha.com/access).
-2. Provide the API key either when starting the Llama Stack server:
+2. Provide the API key either by setting it in your environment before starting the Llama Stack server:
     ```bash
-    --env WOLFRAM_ALPHA_API_KEY=${WOLFRAM_ALPHA_API_KEY}
+    export WOLFRAM_ALPHA_API_KEY="your key"
     ```
     or from the client side:
     ```python
diff --git a/docs/docs/contributing/new_api_provider.mdx b/docs/docs/contributing/new_api_provider.mdx
index 4ae6d5e72..6f9744771 100644
--- a/docs/docs/contributing/new_api_provider.mdx
+++ b/docs/docs/contributing/new_api_provider.mdx
@@ -76,7 +76,7 @@ Integration tests are located in [tests/integration](https://github.com/meta-lla
 Consult [tests/integration/README.md](https://github.com/meta-llama/llama-stack/blob/main/tests/integration/README.md) for more details on how to run the tests.
 
 Note that each provider's `sample_run_config()` method (in the configuration class for that provider)
- typically references some environment variables for specifying API keys and the like. You can set these in the environment or pass these via the `--env` flag to the test command.
+ typically references some environment variables for specifying API keys and the like. You can set these in the environment before running the test command.
 
 
 ### 2. Unit Testing
diff --git a/docs/docs/distributions/building_distro.mdx b/docs/docs/distributions/building_distro.mdx
index 5b65b7f16..a4f7e1f60 100644
--- a/docs/docs/distributions/building_distro.mdx
+++ b/docs/docs/distributions/building_distro.mdx
@@ -289,10 +289,10 @@ After this step is successful, you should be able to find the built container im
 docker run -d \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
   -v ~/.llama:/root/.llama \
+  -e INFERENCE_MODEL=$INFERENCE_MODEL \
+  -e OLLAMA_URL=http://host.docker.internal:11434 \
   localhost/distribution-ollama:dev \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env OLLAMA_URL=http://host.docker.internal:11434
+  --port $LLAMA_STACK_PORT
 ```
 
 Here are the docker flags and their uses:
@@ -305,12 +305,12 @@ Here are the docker flags and their uses:
 
 * `localhost/distribution-ollama:dev`: The name and tag of the container image to run
 
+* `-e INFERENCE_MODEL=$INFERENCE_MODEL`: Sets the INFERENCE_MODEL environment variable in the container
+
+* `-e OLLAMA_URL=http://host.docker.internal:11434`: Sets the OLLAMA_URL environment variable in the container
+
 * `--port $LLAMA_STACK_PORT`: Port number for the server to listen on
 
-* `--env INFERENCE_MODEL=$INFERENCE_MODEL`: Sets the model to use for inference
-
-* `--env OLLAMA_URL=http://host.docker.internal:11434`: Configures the URL for the Ollama service
-
 </TabItem>
 </Tabs>
 
@@ -320,23 +320,22 @@ Now, let's start the Llama Stack Distribution Server. You will need the YAML con
 
 ```
 llama stack run -h
-usage: llama stack run [-h] [--port PORT] [--image-name IMAGE_NAME] [--env KEY=VALUE]
+usage: llama stack run [-h] [--port PORT] [--image-name IMAGE_NAME]
                        [--image-type {venv}] [--enable-ui]
-                       [config | template]
+                       [config | distro]
 
 Start the server for a Llama Stack Distribution. You should have already built (or downloaded) and configured the distribution.
 
 positional arguments:
-  config | template     Path to config file to use for the run or name of known template (`llama stack list` for a list). (default: None)
+  config | distro       Path to config file to use for the run or name of known distro (`llama stack list` for a list). (default: None)
 
 options:
   -h, --help            show this help message and exit
   --port PORT           Port to run the server on. It can also be passed via the env var LLAMA_STACK_PORT. (default: 8321)
   --image-name IMAGE_NAME
-                        Name of the image to run. Defaults to the current environment (default: None)
-  --env KEY=VALUE       Environment variables to pass to the server in KEY=VALUE format. Can be specified multiple times. (default: None)
+                        [DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running. (default: None)
   --image-type {venv}
-                        Image Type used during the build. This should be venv. (default: None)
+                        [DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running. (default: None)
   --enable-ui           Start the UI server (default: False)
 ```
 
@@ -348,9 +347,6 @@ llama stack run tgi
 
 # Start using config file
 llama stack run ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml
-
-# Start using a venv
-llama stack run --image-type venv ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml
 ```
 
 ```
diff --git a/docs/docs/distributions/configuration.mdx b/docs/docs/distributions/configuration.mdx
index dbf879024..81243c97b 100644
--- a/docs/docs/distributions/configuration.mdx
+++ b/docs/docs/distributions/configuration.mdx
@@ -101,7 +101,7 @@ A few things to note:
 - The id is a string you can choose freely.
 - You can instantiate any number of provider instances of the same type.
 - The configuration dictionary is provider-specific.
-- Notice that configuration can reference environment variables (with default values), which are expanded at runtime. When you run a stack server (via docker or via `llama stack run`), you can specify `--env OLLAMA_URL=http://my-server:11434` to override the default value.
+- Notice that configuration can reference environment variables (with default values), which are expanded at runtime. When you run a stack server, you can set environment variables in your shell before running `llama stack run` to override the default values.
 
 ### Environment Variable Substitution
 
@@ -173,13 +173,10 @@ optional_token: ${env.OPTIONAL_TOKEN:+}
 
 #### Runtime Override
 
-You can override environment variables at runtime when starting the server:
+You can override environment variables at runtime by setting them in your shell before starting the server:
 
 ```bash
-# Override specific environment variables
-llama stack run --config run.yaml --env API_KEY=sk-123 --env BASE_URL=https://custom-api.com
-
-# Or set them in your shell
+# Set environment variables in your shell
 export API_KEY=sk-123
 export BASE_URL=https://custom-api.com
 llama stack run --config run.yaml
diff --git a/docs/docs/distributions/remote_hosted_distro/watsonx.md b/docs/docs/distributions/remote_hosted_distro/watsonx.md
index 977af90dd..5add678f3 100644
--- a/docs/docs/distributions/remote_hosted_distro/watsonx.md
+++ b/docs/docs/distributions/remote_hosted_distro/watsonx.md
@@ -69,10 +69,10 @@ docker run \
   -it \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
   -v ./run.yaml:/root/my-run.yaml \
+  -e WATSONX_API_KEY=$WATSONX_API_KEY \
+  -e WATSONX_PROJECT_ID=$WATSONX_PROJECT_ID \
+  -e WATSONX_BASE_URL=$WATSONX_BASE_URL \
   llamastack/distribution-watsonx \
   --config /root/my-run.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env WATSONX_API_KEY=$WATSONX_API_KEY \
-  --env WATSONX_PROJECT_ID=$WATSONX_PROJECT_ID \
-  --env WATSONX_BASE_URL=$WATSONX_BASE_URL
+  --port $LLAMA_STACK_PORT
 ```
diff --git a/docs/docs/distributions/self_hosted_distro/dell.md b/docs/docs/distributions/self_hosted_distro/dell.md
index 52d40cf9d..851eac3bf 100644
--- a/docs/docs/distributions/self_hosted_distro/dell.md
+++ b/docs/docs/distributions/self_hosted_distro/dell.md
@@ -129,11 +129,11 @@ docker run -it \
   # NOTE: mount the llama-stack / llama-model directories if testing local changes else not needed
   -v $HOME/git/llama-stack:/app/llama-stack-source -v $HOME/git/llama-models:/app/llama-models-source \
   # localhost/distribution-dell:dev if building / testing locally
-  llamastack/distribution-dell\
-  --port $LLAMA_STACK_PORT  \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env DEH_URL=$DEH_URL \
-  --env CHROMA_URL=$CHROMA_URL
+  -e INFERENCE_MODEL=$INFERENCE_MODEL \
+  -e DEH_URL=$DEH_URL \
+  -e CHROMA_URL=$CHROMA_URL \
+  llamastack/distribution-dell \
+  --port $LLAMA_STACK_PORT
 
 ```
 
@@ -154,14 +154,14 @@ docker run \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
   -v $HOME/.llama:/root/.llama \
   -v ./llama_stack/distributions/tgi/run-with-safety.yaml:/root/my-run.yaml \
+  -e INFERENCE_MODEL=$INFERENCE_MODEL \
+  -e DEH_URL=$DEH_URL \
+  -e SAFETY_MODEL=$SAFETY_MODEL \
+  -e DEH_SAFETY_URL=$DEH_SAFETY_URL \
+  -e CHROMA_URL=$CHROMA_URL \
   llamastack/distribution-dell \
   --config /root/my-run.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env DEH_URL=$DEH_URL \
-  --env SAFETY_MODEL=$SAFETY_MODEL \
-  --env DEH_SAFETY_URL=$DEH_SAFETY_URL \
-  --env CHROMA_URL=$CHROMA_URL
+  --port $LLAMA_STACK_PORT
 ```
 
 ### Via venv
@@ -170,21 +170,21 @@ Make sure you have done `pip install llama-stack` and have the Llama Stack CLI a
 
 ```bash
 llama stack build --distro dell --image-type venv
-llama stack run dell
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env DEH_URL=$DEH_URL \
-  --env CHROMA_URL=$CHROMA_URL
+INFERENCE_MODEL=$INFERENCE_MODEL \
+DEH_URL=$DEH_URL \
+CHROMA_URL=$CHROMA_URL \
+llama stack run dell \
+  --port $LLAMA_STACK_PORT
 ```
 
 If you are using Llama Stack Safety / Shield APIs, use:
 
 ```bash
+INFERENCE_MODEL=$INFERENCE_MODEL \
+DEH_URL=$DEH_URL \
+SAFETY_MODEL=$SAFETY_MODEL \
+DEH_SAFETY_URL=$DEH_SAFETY_URL \
+CHROMA_URL=$CHROMA_URL \
 llama stack run ./run-with-safety.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env DEH_URL=$DEH_URL \
-  --env SAFETY_MODEL=$SAFETY_MODEL \
-  --env DEH_SAFETY_URL=$DEH_SAFETY_URL \
-  --env CHROMA_URL=$CHROMA_URL
+  --port $LLAMA_STACK_PORT
 ```
diff --git a/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md b/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md
index 84b85b91c..1c0ef5f6e 100644
--- a/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md
+++ b/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md
@@ -84,9 +84,9 @@ docker run \
   --gpu all \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
   -v ~/.llama:/root/.llama \
+  -e INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
   llamastack/distribution-meta-reference-gpu \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
+  --port $LLAMA_STACK_PORT
 ```
 
 If you are using Llama Stack Safety / Shield APIs, use:
@@ -98,10 +98,10 @@ docker run \
   --gpu all \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
   -v ~/.llama:/root/.llama \
+  -e INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
+  -e SAFETY_MODEL=meta-llama/Llama-Guard-3-1B \
   llamastack/distribution-meta-reference-gpu \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
-  --env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
+  --port $LLAMA_STACK_PORT
 ```
 
 ### Via venv
@@ -110,16 +110,16 @@ Make sure you have done `uv pip install llama-stack` and have the Llama Stack CL
 
 ```bash
 llama stack build --distro meta-reference-gpu --image-type venv
+INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
 llama stack run distributions/meta-reference-gpu/run.yaml \
-  --port 8321 \
-  --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
+  --port 8321
 ```
 
 If you are using Llama Stack Safety / Shield APIs, use:
 
 ```bash
+INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
+SAFETY_MODEL=meta-llama/Llama-Guard-3-1B \
 llama stack run distributions/meta-reference-gpu/run-with-safety.yaml \
-  --port 8321 \
-  --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
-  --env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
+  --port 8321
 ```
diff --git a/docs/docs/distributions/self_hosted_distro/nvidia.md b/docs/docs/distributions/self_hosted_distro/nvidia.md
index 1e52797db..a6e185442 100644
--- a/docs/docs/distributions/self_hosted_distro/nvidia.md
+++ b/docs/docs/distributions/self_hosted_distro/nvidia.md
@@ -129,10 +129,10 @@ docker run \
   --pull always \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
   -v ./run.yaml:/root/my-run.yaml \
+  -e NVIDIA_API_KEY=$NVIDIA_API_KEY \
   llamastack/distribution-nvidia \
   --config /root/my-run.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env NVIDIA_API_KEY=$NVIDIA_API_KEY
+  --port $LLAMA_STACK_PORT
 ```
 
 ### Via venv
@@ -142,10 +142,10 @@ If you've set up your local development environment, you can also build the imag
 ```bash
 INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
 llama stack build --distro nvidia --image-type venv
+NVIDIA_API_KEY=$NVIDIA_API_KEY \
+INFERENCE_MODEL=$INFERENCE_MODEL \
 llama stack run ./run.yaml \
-  --port 8321 \
-  --env NVIDIA_API_KEY=$NVIDIA_API_KEY \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL
+  --port 8321
 ```
 
 ## Example Notebooks
diff --git a/docs/docs/getting_started/detailed_tutorial.mdx b/docs/docs/getting_started/detailed_tutorial.mdx
index 33786ac0e..e6c22224d 100644
--- a/docs/docs/getting_started/detailed_tutorial.mdx
+++ b/docs/docs/getting_started/detailed_tutorial.mdx
@@ -86,9 +86,9 @@ docker run -it \
   --pull always \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
   -v ~/.llama:/root/.llama \
+  -e OLLAMA_URL=http://host.docker.internal:11434 \
   llamastack/distribution-starter \
-  --port $LLAMA_STACK_PORT \
-  --env OLLAMA_URL=http://host.docker.internal:11434
+  --port $LLAMA_STACK_PORT
 ```
 Note to start the container with Podman, you can do the same but replace `docker` at the start of the command with
 `podman`. If you are using `podman` older than `4.7.0`, please also replace `host.docker.internal` in the `OLLAMA_URL`
@@ -106,9 +106,9 @@ docker run -it \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
   -v ~/.llama:/root/.llama \
   --network=host \
+  -e OLLAMA_URL=http://localhost:11434 \
   llamastack/distribution-starter \
-  --port $LLAMA_STACK_PORT \
-  --env OLLAMA_URL=http://localhost:11434
+  --port $LLAMA_STACK_PORT
 ```
 :::
 You will see output like below:
diff --git a/docs/getting_started.ipynb b/docs/getting_started.ipynb
index d7d544ad5..3dcedfed6 100644
--- a/docs/getting_started.ipynb
+++ b/docs/getting_started.ipynb
@@ -123,12 +123,12 @@
         "  del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
         "\n",
         "# this command installs all the dependencies needed for the llama stack server with the together inference provider\n",
-        "!uv run --with llama-stack llama stack build --distro together --image-type venv\n",
+        "!uv run --with llama-stack llama stack build --distro together\n",
         "\n",
         "def run_llama_stack_server_background():\n",
         "    log_file = open(\"llama_stack_server.log\", \"w\")\n",
         "    process = subprocess.Popen(\n",
-        "        \"uv run --with llama-stack llama stack run together --image-type venv\",\n",
+        "        \"uv run --with llama-stack llama stack run together\",\n",
         "        shell=True,\n",
         "        stdout=log_file,\n",
         "        stderr=log_file,\n",
diff --git a/docs/getting_started_llama4.ipynb b/docs/getting_started_llama4.ipynb
index cd5f83517..bca505b5e 100644
--- a/docs/getting_started_llama4.ipynb
+++ b/docs/getting_started_llama4.ipynb
@@ -233,12 +233,12 @@
         "  del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
         "\n",
         "# this command installs all the dependencies needed for the llama stack server\n",
-        "!uv run --with llama-stack llama stack build --distro meta-reference-gpu --image-type venv\n",
+        "!uv run --with llama-stack llama stack build --distro meta-reference-gpu\n",
         "\n",
         "def run_llama_stack_server_background():\n",
         "    log_file = open(\"llama_stack_server.log\", \"w\")\n",
         "    process = subprocess.Popen(\n",
-        "        f\"uv run --with llama-stack llama stack run meta-reference-gpu --image-type venv --env INFERENCE_MODEL={model_id}\",\n",
+        "        f\"INFERENCE_MODEL={model_id} uv run --with llama-stack llama stack run meta-reference-gpu\",\n",
         "        shell=True,\n",
         "        stdout=log_file,\n",
         "        stderr=log_file,\n",
diff --git a/docs/getting_started_llama_api.ipynb b/docs/getting_started_llama_api.ipynb
index f65566205..7680c4a0c 100644
--- a/docs/getting_started_llama_api.ipynb
+++ b/docs/getting_started_llama_api.ipynb
@@ -223,12 +223,12 @@
         "  del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
         "\n",
         "# this command installs all the dependencies needed for the llama stack server\n",
-        "!uv run --with llama-stack llama stack build --distro llama_api --image-type venv\n",
+        "!uv run --with llama-stack llama stack build --distro llama_api\n",
         "\n",
         "def run_llama_stack_server_background():\n",
         "    log_file = open(\"llama_stack_server.log\", \"w\")\n",
         "    process = subprocess.Popen(\n",
-        "        \"uv run --with llama-stack llama stack run llama_api --image-type venv\",\n",
+        "        \"uv run --with llama-stack llama stack run llama_api\",\n",
         "        shell=True,\n",
         "        stdout=log_file,\n",
         "        stderr=log_file,\n",
diff --git a/docs/quick_start.ipynb b/docs/quick_start.ipynb
index c194a901d..eebfd6686 100644
--- a/docs/quick_start.ipynb
+++ b/docs/quick_start.ipynb
@@ -145,12 +145,12 @@
         "  del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
         "\n",
         "# this command installs all the dependencies needed for the llama stack server with the ollama inference provider\n",
-        "!uv run --with llama-stack llama stack build --distro starter --image-type venv\n",
+        "!uv run --with llama-stack llama stack build --distro starter\n",
         "\n",
         "def run_llama_stack_server_background():\n",
         "    log_file = open(\"llama_stack_server.log\", \"w\")\n",
         "    process = subprocess.Popen(\n",
-        "        f\"OLLAMA_URL=http://localhost:11434 uv run --with llama-stack llama stack run starter --image-type venv\n",
+        "        f\"OLLAMA_URL=http://localhost:11434 uv run --with llama-stack llama stack run starter\n",
         "        shell=True,\n",
         "        stdout=log_file,\n",
         "        stderr=log_file,\n",
diff --git a/docs/zero_to_hero_guide/README.md b/docs/zero_to_hero_guide/README.md
index 183038a88..1b643d692 100644
--- a/docs/zero_to_hero_guide/README.md
+++ b/docs/zero_to_hero_guide/README.md
@@ -88,7 +88,7 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next
    ...
    Build Successful!
    You can find the newly-built template here: ~/.llama/distributions/starter/starter-run.yaml
-   You can run the new Llama Stack Distro via: uv run --with llama-stack llama stack run starter --image-type venv
+   You can run the new Llama Stack Distro via: uv run --with llama-stack llama stack run starter
    ```
 
 3. **Set the ENV variables by exporting them to the terminal**:
@@ -102,12 +102,11 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next
 3. **Run the Llama Stack**:
    Run the stack using uv:
    ```bash
+   INFERENCE_MODEL=$INFERENCE_MODEL \
+   SAFETY_MODEL=$SAFETY_MODEL \
+   OLLAMA_URL=$OLLAMA_URL \
    uv run --with llama-stack llama stack run starter \
-      --image-type venv \
-      --port $LLAMA_STACK_PORT \
-      --env INFERENCE_MODEL=$INFERENCE_MODEL \
-      --env SAFETY_MODEL=$SAFETY_MODEL \
-      --env OLLAMA_URL=$OLLAMA_URL
+      --port $LLAMA_STACK_PORT
    ```
    Note: Every time you run a new model with `ollama run`, you will need to restart the llama stack. Otherwise it won't see the new model.
 
diff --git a/llama_stack/cli/stack/_build.py b/llama_stack/cli/stack/_build.py
index b14e6fe55..471d5cb66 100644
--- a/llama_stack/cli/stack/_build.py
+++ b/llama_stack/cli/stack/_build.py
@@ -444,12 +444,24 @@ def _run_stack_build_command_from_build_config(
 
         cprint("Build Successful!", color="green", file=sys.stderr)
         cprint(f"You can find the newly-built distribution here: {run_config_file}", color="blue", file=sys.stderr)
-        cprint(
-            "You can run the new Llama Stack distro via: "
-            + colored(f"llama stack run {run_config_file} --image-type {build_config.image_type}", "blue"),
-            color="green",
-            file=sys.stderr,
-        )
+        if build_config.image_type == LlamaStackImageType.VENV:
+            cprint(
+                "You can run the new Llama Stack distro (after activating "
+                + colored(image_name, "cyan")
+                + ") via: "
+                + colored(f"llama stack run {run_config_file}", "blue"),
+                color="green",
+                file=sys.stderr,
+            )
+        elif build_config.image_type == LlamaStackImageType.CONTAINER:
+            cprint(
+                "You can run the container with: "
+                + colored(
+                    f"docker run -p 8321:8321 -v ~/.llama:/root/.llama localhost/{image_name} --port 8321", "blue"
+                ),
+                color="green",
+                file=sys.stderr,
+            )
         return distro_path
     else:
         return _generate_run_config(build_config, build_dir, image_name)
diff --git a/llama_stack/cli/stack/run.py b/llama_stack/cli/stack/run.py
index 19930a27b..06dae7318 100644
--- a/llama_stack/cli/stack/run.py
+++ b/llama_stack/cli/stack/run.py
@@ -16,7 +16,7 @@ import yaml
 from llama_stack.cli.stack.utils import ImageType
 from llama_stack.cli.subcommand import Subcommand
 from llama_stack.core.datatypes import LoggingConfig, StackRunConfig
-from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars, validate_env_pair
+from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars
 from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
 from llama_stack.log import get_logger
 
@@ -55,18 +55,12 @@ class StackRun(Subcommand):
             "--image-name",
             type=str,
             default=None,
-            help="Name of the image to run. Defaults to the current environment",
-        )
-        self.parser.add_argument(
-            "--env",
-            action="append",
-            help="Environment variables to pass to the server in KEY=VALUE format. Can be specified multiple times.",
-            metavar="KEY=VALUE",
+            help="[DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running.",
         )
         self.parser.add_argument(
             "--image-type",
             type=str,
-            help="Image Type used during the build. This can be only venv.",
+            help="[DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running.",
             choices=[e.value for e in ImageType if e.value != ImageType.CONTAINER.value],
         )
         self.parser.add_argument(
@@ -79,11 +73,18 @@ class StackRun(Subcommand):
         import yaml
 
         from llama_stack.core.configure import parse_and_maybe_upgrade_config
-        from llama_stack.core.utils.exec import formulate_run_args, run_command
+
+        if args.image_type or args.image_name:
+            self.parser.error(
+                "The --image-type and --image-name flags are no longer supported.\n\n"
+                "Please activate your virtual environment manually before running `llama stack run`.\n\n"
+                "For example:\n"
+                "  source /path/to/venv/bin/activate\n"
+                "  llama stack run <config>\n"
+            )
 
         if args.enable_ui:
             self._start_ui_development_server(args.port)
-        image_type, image_name = args.image_type, args.image_name
 
         if args.config:
             try:
@@ -95,10 +96,6 @@ class StackRun(Subcommand):
         else:
             config_file = None
 
-        # Check if config is required based on image type
-        if image_type == ImageType.VENV.value and not config_file:
-            self.parser.error("Config file is required for venv environment")
-
         if config_file:
             logger.info(f"Using run configuration: {config_file}")
 
@@ -113,50 +110,13 @@ class StackRun(Subcommand):
                     os.makedirs(str(config.external_providers_dir), exist_ok=True)
             except AttributeError as e:
                 self.parser.error(f"failed to parse config file '{config_file}':\n {e}")
-        else:
-            config = None
 
-        # If neither image type nor image name is provided, assume the server should be run directly
-        # using the current environment packages.
-        if not image_type and not image_name:
-            logger.info("No image type or image name provided. Assuming environment packages.")
-            self._uvicorn_run(config_file, args)
-        else:
-            run_args = formulate_run_args(image_type, image_name)
-
-            run_args.extend([str(args.port)])
-
-            if config_file:
-                run_args.extend(["--config", str(config_file)])
-
-            if args.env:
-                for env_var in args.env:
-                    if "=" not in env_var:
-                        self.parser.error(f"Environment variable '{env_var}' must be in KEY=VALUE format")
-                        return
-                    key, value = env_var.split("=", 1)  # split on first = only
-                    if not key:
-                        self.parser.error(f"Environment variable '{env_var}' has empty key")
-                        return
-                    run_args.extend(["--env", f"{key}={value}"])
-
-            run_command(run_args)
+        self._uvicorn_run(config_file, args)
 
     def _uvicorn_run(self, config_file: Path | None, args: argparse.Namespace) -> None:
         if not config_file:
             self.parser.error("Config file is required")
 
-        # Set environment variables if provided
-        if args.env:
-            for env_pair in args.env:
-                try:
-                    key, value = validate_env_pair(env_pair)
-                    logger.info(f"Setting environment variable {key} => {value}")
-                    os.environ[key] = value
-                except ValueError as e:
-                    logger.error(f"Error: {str(e)}")
-                    self.parser.error(f"Invalid environment variable format: {env_pair}")
-
         config_file = resolve_config_or_distro(str(config_file), Mode.RUN)
         with open(config_file) as fp:
             config_contents = yaml.safe_load(fp)
diff --git a/llama_stack/core/stack.py b/llama_stack/core/stack.py
index d5d55319a..acc02eeff 100644
--- a/llama_stack/core/stack.py
+++ b/llama_stack/core/stack.py
@@ -274,22 +274,6 @@ def cast_image_name_to_string(config_dict: dict[str, Any]) -> dict[str, Any]:
     return config_dict
 
 
-def validate_env_pair(env_pair: str) -> tuple[str, str]:
-    """Validate and split an environment variable key-value pair."""
-    try:
-        key, value = env_pair.split("=", 1)
-        key = key.strip()
-        if not key:
-            raise ValueError(f"Empty key in environment variable pair: {env_pair}")
-        if not all(c.isalnum() or c == "_" for c in key):
-            raise ValueError(f"Key must contain only alphanumeric characters and underscores: {key}")
-        return key, value
-    except ValueError as e:
-        raise ValueError(
-            f"Invalid environment variable format '{env_pair}': {str(e)}. Expected format: KEY=value"
-        ) from e
-
-
 def add_internal_implementations(impls: dict[Api, Any], run_config: StackRunConfig) -> None:
     """Add internal implementations (inspect and providers) to the implementations dictionary.
 
diff --git a/llama_stack/core/start_stack.sh b/llama_stack/core/start_stack.sh
index 02b1cd408..cc0ae68d8 100755
--- a/llama_stack/core/start_stack.sh
+++ b/llama_stack/core/start_stack.sh
@@ -25,7 +25,7 @@ error_handler() {
 trap 'error_handler ${LINENO}' ERR
 
 if [ $# -lt 3 ]; then
-  echo "Usage: $0 <env_type> <env_path_or_name> <port> [--config <yaml_config>] [--env KEY=VALUE]..."
+  echo "Usage: $0 <env_type> <env_path_or_name> <port> [--config <yaml_config>]"
   exit 1
 fi
 
@@ -43,7 +43,6 @@ SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
 
 # Initialize variables
 yaml_config=""
-env_vars=""
 other_args=""
 
 # Process remaining arguments
@@ -58,15 +57,6 @@ while [[ $# -gt 0 ]]; do
         exit 1
       fi
       ;;
-    --env)
-      if [[ -n "$2" ]]; then
-        env_vars="$env_vars --env $2"
-        shift 2
-      else
-        echo -e "${RED}Error: --env requires a KEY=VALUE argument${NC}" >&2
-        exit 1
-      fi
-      ;;
     *)
       other_args="$other_args $1"
       shift
@@ -119,7 +109,6 @@ if [[ "$env_type" == "venv" ]]; then
     llama stack run \
     $yaml_config_arg \
     --port "$port" \
-    $env_vars \
     $other_args
 elif [[ "$env_type" == "container" ]]; then
     echo -e "${RED}Warning: Llama Stack no longer supports running Containers via the 'llama stack run' command.${NC}"
diff --git a/llama_stack/distributions/dell/doc_template.md b/llama_stack/distributions/dell/doc_template.md
index fcec3ea14..852e78d0e 100644
--- a/llama_stack/distributions/dell/doc_template.md
+++ b/llama_stack/distributions/dell/doc_template.md
@@ -117,11 +117,11 @@ docker run -it \
   # NOTE: mount the llama-stack directory if testing local changes else not needed
   -v $HOME/git/llama-stack:/app/llama-stack-source \
   # localhost/distribution-dell:dev if building / testing locally
+  -e INFERENCE_MODEL=$INFERENCE_MODEL \
+  -e DEH_URL=$DEH_URL \
+  -e CHROMA_URL=$CHROMA_URL \
   llamastack/distribution-{{ name }}\
-  --port $LLAMA_STACK_PORT  \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env DEH_URL=$DEH_URL \
-  --env CHROMA_URL=$CHROMA_URL
+  --port $LLAMA_STACK_PORT
 
 ```
 
@@ -142,14 +142,14 @@ docker run \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
   -v $HOME/.llama:/root/.llama \
   -v ./llama_stack/distributions/tgi/run-with-safety.yaml:/root/my-run.yaml \
+  -e INFERENCE_MODEL=$INFERENCE_MODEL \
+  -e DEH_URL=$DEH_URL \
+  -e SAFETY_MODEL=$SAFETY_MODEL \
+  -e DEH_SAFETY_URL=$DEH_SAFETY_URL \
+  -e CHROMA_URL=$CHROMA_URL \
   llamastack/distribution-{{ name }} \
   --config /root/my-run.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env DEH_URL=$DEH_URL \
-  --env SAFETY_MODEL=$SAFETY_MODEL \
-  --env DEH_SAFETY_URL=$DEH_SAFETY_URL \
-  --env CHROMA_URL=$CHROMA_URL
+  --port $LLAMA_STACK_PORT
 ```
 
 ### Via Conda
@@ -158,21 +158,21 @@ Make sure you have done `pip install llama-stack` and have the Llama Stack CLI a
 
 ```bash
 llama stack build --distro {{ name }} --image-type conda
-llama stack run {{ name }}
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env DEH_URL=$DEH_URL \
-  --env CHROMA_URL=$CHROMA_URL
+INFERENCE_MODEL=$INFERENCE_MODEL \
+DEH_URL=$DEH_URL \
+CHROMA_URL=$CHROMA_URL \
+llama stack run {{ name }} \
+  --port $LLAMA_STACK_PORT
 ```
 
 If you are using Llama Stack Safety / Shield APIs, use:
 
 ```bash
+INFERENCE_MODEL=$INFERENCE_MODEL \
+DEH_URL=$DEH_URL \
+SAFETY_MODEL=$SAFETY_MODEL \
+DEH_SAFETY_URL=$DEH_SAFETY_URL \
+CHROMA_URL=$CHROMA_URL \
 llama stack run ./run-with-safety.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env DEH_URL=$DEH_URL \
-  --env SAFETY_MODEL=$SAFETY_MODEL \
-  --env DEH_SAFETY_URL=$DEH_SAFETY_URL \
-  --env CHROMA_URL=$CHROMA_URL
+  --port $LLAMA_STACK_PORT
 ```
diff --git a/llama_stack/distributions/meta-reference-gpu/doc_template.md b/llama_stack/distributions/meta-reference-gpu/doc_template.md
index 602d053c4..92dcc6102 100644
--- a/llama_stack/distributions/meta-reference-gpu/doc_template.md
+++ b/llama_stack/distributions/meta-reference-gpu/doc_template.md
@@ -72,9 +72,9 @@ docker run \
   --gpu all \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
   -v ~/.llama:/root/.llama \
+  -e INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
   llamastack/distribution-{{ name }} \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
+  --port $LLAMA_STACK_PORT
 ```
 
 If you are using Llama Stack Safety / Shield APIs, use:
@@ -86,10 +86,10 @@ docker run \
   --gpu all \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
   -v ~/.llama:/root/.llama \
+  -e INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
+  -e SAFETY_MODEL=meta-llama/Llama-Guard-3-1B \
   llamastack/distribution-{{ name }} \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
-  --env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
+  --port $LLAMA_STACK_PORT
 ```
 
 ### Via venv
@@ -98,16 +98,16 @@ Make sure you have done `uv pip install llama-stack` and have the Llama Stack CL
 
 ```bash
 llama stack build --distro {{ name }} --image-type venv
+INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
 llama stack run distributions/{{ name }}/run.yaml \
-  --port 8321 \
-  --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
+  --port 8321
 ```
 
 If you are using Llama Stack Safety / Shield APIs, use:
 
 ```bash
+INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
+SAFETY_MODEL=meta-llama/Llama-Guard-3-1B \
 llama stack run distributions/{{ name }}/run-with-safety.yaml \
-  --port 8321 \
-  --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
-  --env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
+  --port 8321
 ```
diff --git a/llama_stack/distributions/nvidia/doc_template.md b/llama_stack/distributions/nvidia/doc_template.md
index fbee17ef8..df2b68ef7 100644
--- a/llama_stack/distributions/nvidia/doc_template.md
+++ b/llama_stack/distributions/nvidia/doc_template.md
@@ -118,10 +118,10 @@ docker run \
   --pull always \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
   -v ./run.yaml:/root/my-run.yaml \
+  -e NVIDIA_API_KEY=$NVIDIA_API_KEY \
   llamastack/distribution-{{ name }} \
   --config /root/my-run.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env NVIDIA_API_KEY=$NVIDIA_API_KEY
+  --port $LLAMA_STACK_PORT
 ```
 
 ### Via venv
@@ -131,10 +131,10 @@ If you've set up your local development environment, you can also build the imag
 ```bash
 INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
 llama stack build --distro nvidia --image-type venv
+NVIDIA_API_KEY=$NVIDIA_API_KEY \
+INFERENCE_MODEL=$INFERENCE_MODEL \
 llama stack run ./run.yaml \
-  --port 8321 \
-  --env NVIDIA_API_KEY=$NVIDIA_API_KEY \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL
+  --port 8321
 ```
 
 ## Example Notebooks
diff --git a/scripts/install.sh b/scripts/install.sh
index f6fbc259c..571468dc5 100755
--- a/scripts/install.sh
+++ b/scripts/install.sh
@@ -221,8 +221,8 @@ fi
 cmd=( run -d "${PLATFORM_OPTS[@]}" --name llama-stack \
       --network llama-net \
       -p "${PORT}:${PORT}" \
-      "${SERVER_IMAGE}" --port "${PORT}" \
-      --env OLLAMA_URL="http://ollama-server:${OLLAMA_PORT}")
+      -e OLLAMA_URL="http://ollama-server:${OLLAMA_PORT}" \
+      "${SERVER_IMAGE}" --port "${PORT}")
 
 log "🦙 Starting Llama Stack..."
 if ! execute_with_log $ENGINE "${cmd[@]}"; then
diff --git a/scripts/integration-tests.sh b/scripts/integration-tests.sh
index eee60951d..af7f5cb74 100755
--- a/scripts/integration-tests.sh
+++ b/scripts/integration-tests.sh
@@ -186,7 +186,7 @@ if [[ "$STACK_CONFIG" == *"server:"* ]]; then
         echo "Llama Stack Server is already running, skipping start"
     else
         echo "=== Starting Llama Stack Server ==="
-        nohup llama stack run ci-tests --image-type venv > server.log 2>&1 &
+        nohup llama stack run ci-tests > server.log 2>&1 &
 
         echo "Waiting for Llama Stack Server to start..."
         for i in {1..30}; do

From 0cde3d956de5e03689c5fe2deb81b00ac5175352 Mon Sep 17 00:00:00 2001
From: ehhuang <ehhuang@users.noreply.github.com>
Date: Wed, 8 Oct 2025 02:10:33 -0700
Subject: [PATCH 4/9] chore: require valid logging category (#3712)

# What does this PR do?
grep'd and audited all usage of 'get_logger' with help of Claude.

## Test Plan
CI
---
 llama_stack/core/conversations/conversations.py |  2 +-
 llama_stack/log.py                              | 12 +++++++++---
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/llama_stack/core/conversations/conversations.py b/llama_stack/core/conversations/conversations.py
index bef138e69..612b2f68e 100644
--- a/llama_stack/core/conversations/conversations.py
+++ b/llama_stack/core/conversations/conversations.py
@@ -32,7 +32,7 @@ from llama_stack.providers.utils.sqlstore.sqlstore import (
     sqlstore_impl,
 )
 
-logger = get_logger(name=__name__, category="openai::conversations")
+logger = get_logger(name=__name__, category="openai_conversations")
 
 
 class ConversationServiceConfig(BaseModel):
diff --git a/llama_stack/log.py b/llama_stack/log.py
index 6f751b21d..191f158e3 100644
--- a/llama_stack/log.py
+++ b/llama_stack/log.py
@@ -31,12 +31,17 @@ CATEGORIES = [
     "client",
     "telemetry",
     "openai_responses",
+    "openai_conversations",
     "testing",
     "providers",
     "models",
     "files",
     "vector_io",
     "tool_runtime",
+    "cli",
+    "post_training",
+    "scoring",
+    "tests",
 ]
 UNCATEGORIZED = "uncategorized"
 
@@ -261,11 +266,12 @@ def get_logger(
         if root_category in _category_levels:
             log_level = _category_levels[root_category]
         else:
-            log_level = _category_levels.get("root", DEFAULT_LOG_LEVEL)
             if category != UNCATEGORIZED:
-                logging.warning(
-                    f"Unknown logging category: {category}. Falling back to default 'root' level: {log_level}"
+                raise ValueError(
+                    f"Unknown logging category: {category}. To resolve, choose a valid category from the CATEGORIES list "
+                    f"or add it to the CATEGORIES list. Available categories: {CATEGORIES}"
                 )
+            log_level = _category_levels.get("root", DEFAULT_LOG_LEVEL)
     logger.setLevel(log_level)
     return logging.LoggerAdapter(logger, {"category": category})
 

From 702fcd1abfae613a34b0cd955e155099ac1b9247 Mon Sep 17 00:00:00 2001
From: Omar Abdelwahab <omaryashraf10@gmail.com>
Date: Wed, 8 Oct 2025 03:09:23 -0700
Subject: [PATCH 5/9] fix: Raising an error message to the user when
 registering an existing provider. (#3624)

When the user wants to change the attributes (which could include model
name, dimensions,...etc) of an already registered provider, they will
get an error message asking that they first unregister the provider
before registering a new one.

# What does this PR do?
This PR updated the register function to raise an error to the user when
they attempt to register a provider that was already registered asking
them to un-register the existing provider first.

<!-- If resolving an issue, uncomment and update the line below -->
#2313

## Test Plan
Tested the change with /tests/unit/registry/test_registry.py

---------

Co-authored-by: Omar Abdelwahab <omara@fb.com>
---
 llama_stack/core/store/registry.py   | 5 ++++-
 tests/unit/registry/test_registry.py | 9 ++++++++-
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/llama_stack/core/store/registry.py b/llama_stack/core/store/registry.py
index 624dbd176..0486553d5 100644
--- a/llama_stack/core/store/registry.py
+++ b/llama_stack/core/store/registry.py
@@ -98,7 +98,10 @@ class DiskDistributionRegistry(DistributionRegistry):
         existing_obj = await self.get(obj.type, obj.identifier)
         # dont register if the object's providerid already exists
         if existing_obj and existing_obj.provider_id == obj.provider_id:
-            return False
+            raise ValueError(
+                f"Provider '{obj.provider_id}' is already registered."
+                f"Unregister the existing provider first before registering it again."
+            )
 
         await self.kvstore.set(
             KEY_FORMAT.format(type=obj.type, identifier=obj.identifier),
diff --git a/tests/unit/registry/test_registry.py b/tests/unit/registry/test_registry.py
index 4ea4a20b9..c1f834d5d 100644
--- a/tests/unit/registry/test_registry.py
+++ b/tests/unit/registry/test_registry.py
@@ -125,8 +125,15 @@ async def test_duplicate_provider_registration(cached_disk_dist_registry):
         provider_resource_id="test_vector_db_2",
         provider_id="baz",  # Same provider_id
     )
-    await cached_disk_dist_registry.register(duplicate_vector_db)
 
+    # Now we expect a ValueError to be raised for duplicate registration
+    with pytest.raises(
+        ValueError,
+        match=r"Provider 'baz' is already registered.*Unregister the existing provider first before registering it again.",
+    ):
+        await cached_disk_dist_registry.register(duplicate_vector_db)
+
+    # Verify the original registration is still intact
     result = await cached_disk_dist_registry.get("vector_db", "test_vector_db_2")
     assert result is not None
     assert result.embedding_model == original_vector_db.embedding_model  # Original values preserved

From 62bac0aad4bac67504005e2e99a048b5698b9389 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 8 Oct 2025 12:16:54 +0200
Subject: [PATCH 6/9] chore(github-deps): bump actions/stale from 10.0.0 to
 10.1.0 (#3684)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [actions/stale](https://github.com/actions/stale) from 10.0.0 to
10.1.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/actions/stale/releases">actions/stale's
releases</a>.</em></p>
<blockquote>
<h2>v10.1.0</h2>
<h2>What's Changed</h2>
<ul>
<li>Add <code>only-issue-types</code> option to filter issues by type by
<a href="https://github.com/Bibo-Joshi"><code>@​Bibo-Joshi</code></a> in
<a
href="https://redirect.github.com/actions/stale/pull/1255">actions/stale#1255</a></li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a
href="https://github.com/Bibo-Joshi"><code>@​Bibo-Joshi</code></a> made
their first contribution in <a
href="https://redirect.github.com/actions/stale/pull/1255">actions/stale#1255</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/actions/stale/compare/v10...v10.1.0">https://github.com/actions/stale/compare/v10...v10.1.0</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/actions/stale/commit/5f858e3efba33a5ca4407a664cc011ad407f2008"><code>5f858e3</code></a>
Add <code>only-issue-types</code> option to filter issues by type (<a
href="https://redirect.github.com/actions/stale/issues/1255">#1255</a>)</li>
<li>See full diff in <a
href="https://github.com/actions/stale/compare/3a9db7e6a41a89f618792c92c0e97cc736e1b13f...5f858e3efba33a5ca4407a664cc011ad407f2008">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/stale&package-manager=github_actions&previous-version=10.0.0&new-version=10.1.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/stale_bot.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/stale_bot.yml b/.github/workflows/stale_bot.yml
index 502a78f8e..c5a1ba9e5 100644
--- a/.github/workflows/stale_bot.yml
+++ b/.github/workflows/stale_bot.yml
@@ -24,7 +24,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Stale Action
-        uses: actions/stale@3a9db7e6a41a89f618792c92c0e97cc736e1b13f # v10.0.0
+        uses: actions/stale@5f858e3efba33a5ca4407a664cc011ad407f2008 # v10.1.0
         with:
           stale-issue-label: 'stale'
           stale-issue-message: >

From 5d711d4bcb1e7e84ca5325f88f3943b1fb274d79 Mon Sep 17 00:00:00 2001
From: Bill Murdock <bmurdock@redhat.com>
Date: Wed, 8 Oct 2025 07:29:43 -0400
Subject: [PATCH 7/9] fix: Update watsonx.ai provider to use LiteLLM mixin and
 list all models (#3674)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# What does this PR do?

- The watsonx.ai provider now uses the LiteLLM mixin instead of using
IBM's library, which does not seem to be working (see #3165 for
context).
- The watsonx.ai provider now lists all the models available by calling
the watsonx.ai server instead of having a hard coded list of known
models. (That list gets out of date quickly)
- An edge case in
[llama_stack/core/routers/inference.py](https://github.com/llamastack/llama-stack/pull/3674/files#diff-a34bc966ed9befd9f13d4883c23705dff49be0ad6211c850438cdda6113f3455)
is addressed that was causing my manual tests to fail.
- Fixes `b64_encode_openai_embeddings_response` which was trying to
enumerate over a dictionary and then reference elements of the
dictionary using .field instead of ["field"]. That method is called by
the LiteLLM mixin for embedding models, so it is needed to get the
watsonx.ai embedding models to work.
- A unit test along the lines of the one in #3348 is added. A more
comprehensive plan for automatically testing the end-to-end
functionality for inference providers would be a good idea, but is out
of scope for this PR.
- Updates to the watsonx distribution. Some were in response to the
switch to LiteLLM (e.g., updating the Python packages needed). Others
seem to be things that were already broken that I found along the way
(e.g., a reference to a watsonx specific doc template that doesn't seem
to exist).

Closes #3165

Also it is related to a line-item in #3387 but doesn't really address
that goal (because it uses the LiteLLM mixin, not the OpenAI one). I
tried the OpenAI one and it doesn't work with watsonx.ai, presumably
because the watsonx.ai service is not OpenAI compatible. It works with
LiteLLM because LiteLLM has a provider implementation for watsonx.ai.

## Test Plan

The test script below goes back and forth between the OpenAI and watsonx
providers. The idea is that the OpenAI provider shows how it should work
and then the watsonx provider output shows that it is also working with
watsonx. Note that the result from the MCP test is not as good (the
Llama 3.3 70b model does not choose tools as wisely as gpt-4o), but it
is still working and providing a valid response. For more details on
setup and the MCP server being used for testing, see [the AI Alliance
sample
notebook](https://github.com/The-AI-Alliance/llama-stack-examples/blob/main/notebooks/01-responses/)
that these examples are drawn from.

```python
#!/usr/bin/env python3

import json
from llama_stack_client import LlamaStackClient
from litellm import completion
import http.client


def print_response(response):
    """Print response in a nicely formatted way"""
    print(f"ID: {response.id}")
    print(f"Status: {response.status}")
    print(f"Model: {response.model}")
    print(f"Created at: {response.created_at}")
    print(f"Output items: {len(response.output)}")

    for i, output_item in enumerate(response.output):
        if len(response.output) > 1:
            print(f"\n--- Output Item {i+1} ---")
        print(f"Output type: {output_item.type}")

        if output_item.type in ("text", "message"):
            print(f"Response content: {output_item.content[0].text}")
        elif output_item.type == "file_search_call":
            print(f"  Tool Call ID: {output_item.id}")
            print(f"  Tool Status: {output_item.status}")
            # 'queries' is a list, so we join it for clean printing
            print(f"  Queries: {', '.join(output_item.queries)}")
            # Display results if they exist, otherwise note they are empty
            print(f"  Results: {output_item.results if output_item.results else 'None'}")
        elif output_item.type == "mcp_list_tools":
            print_mcp_list_tools(output_item)
        elif output_item.type == "mcp_call":
            print_mcp_call(output_item)
        else:
            print(f"Response content: {output_item.content}")


def print_mcp_call(mcp_call):
    """Print MCP call in a nicely formatted way"""
    print(f"\n🛠️  MCP Tool Call: {mcp_call.name}")
    print(f"   Server: {mcp_call.server_label}")
    print(f"   ID: {mcp_call.id}")
    print(f"   Arguments: {mcp_call.arguments}")

    if mcp_call.error:
        print("Error: {mcp_call.error}")
    elif mcp_call.output:
        print("Output:")
        # Try to format JSON output nicely
        try:
            parsed_output = json.loads(mcp_call.output)
            print(json.dumps(parsed_output, indent=4))
        except:
            # If not valid JSON, print as-is
            print(f"   {mcp_call.output}")
    else:
        print("   ⏳ No output yet")


def print_mcp_list_tools(mcp_list_tools):
    """Print MCP list tools in a nicely formatted way"""
    print(f"\n🔧 MCP Server: {mcp_list_tools.server_label}")
    print(f"   ID: {mcp_list_tools.id}")
    print(f"   Available Tools: {len(mcp_list_tools.tools)}")
    print("=" * 80)

    for i, tool in enumerate(mcp_list_tools.tools, 1):
        print(f"\n{i}. {tool.name}")
        print(f"   Description: {tool.description}")

        # Parse and display input schema
        schema = tool.input_schema
        if schema and 'properties' in schema:
            properties = schema['properties']
            required = schema.get('required', [])

            print("   Parameters:")
            for param_name, param_info in properties.items():
                param_type = param_info.get('type', 'unknown')
                param_desc = param_info.get('description', 'No description')
                required_marker = " (required)" if param_name in required else " (optional)"
                print(f"     • {param_name} ({param_type}){required_marker}")
                if param_desc:
                    print(f"       {param_desc}")

        if i < len(mcp_list_tools.tools):
            print("-" * 40)


def main():
    """Main function to run all the tests"""

    # Configuration
    LLAMA_STACK_URL = "http://localhost:8321/"
    LLAMA_STACK_MODEL_IDS = [
        "openai/gpt-3.5-turbo",
        "openai/gpt-4o",
        "llama-openai-compat/Llama-3.3-70B-Instruct",
        "watsonx/meta-llama/llama-3-3-70b-instruct"
    ]

    # Using gpt-4o for this demo, but feel free to try one of the others or add more to run.yaml.
    OPENAI_MODEL_ID = LLAMA_STACK_MODEL_IDS[1]
    WATSONX_MODEL_ID = LLAMA_STACK_MODEL_IDS[-1]
    NPS_MCP_URL = "http://localhost:3005/sse/"

    print("=== Llama Stack Testing Script ===")
    print(f"Using OpenAI model: {OPENAI_MODEL_ID}")
    print(f"Using WatsonX model: {WATSONX_MODEL_ID}")
    print(f"MCP URL: {NPS_MCP_URL}")
    print()

    # Initialize client
    print("Initializing LlamaStackClient...")
    client = LlamaStackClient(base_url="http://localhost:8321")

    # Test 1: List models
    print("\n=== Test 1: List Models ===")
    try:
        models = client.models.list()
        print(f"Found {len(models)} models")
    except Exception as e:
        print(f"Error listing models: {e}")
        raise e

    # Test 2: Basic chat completion with OpenAI
    print("\n=== Test 2: Basic Chat Completion (OpenAI) ===")
    try:
        chat_completion_response = client.chat.completions.create(
            model=OPENAI_MODEL_ID,
            messages=[{"role": "user", "content": "What is the capital of France?"}]
        )

        print("OpenAI Response:")
        for chunk in chat_completion_response.choices[0].message.content:
            print(chunk, end="", flush=True)
        print()
    except Exception as e:
        print(f"Error with OpenAI chat completion: {e}")
        raise e

    # Test 3: Basic chat completion with WatsonX
    print("\n=== Test 3: Basic Chat Completion (WatsonX) ===")
    try:
        chat_completion_response_wxai = client.chat.completions.create(
            model=WATSONX_MODEL_ID,
            messages=[{"role": "user", "content": "What is the capital of France?"}],
        )

        print("WatsonX Response:")
        for chunk in chat_completion_response_wxai.choices[0].message.content:
            print(chunk, end="", flush=True)
        print()
    except Exception as e:
        print(f"Error with WatsonX chat completion: {e}")
        raise e

    # Test 4: Tool calling with OpenAI
    print("\n=== Test 4: Tool Calling (OpenAI) ===")
    tools = [
        {
            "type": "function",
            "function": {
                "name": "get_current_weather",
                "description": "Get the current weather for a specific location",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "location": {
                            "type": "string",
                            "description": "The city and state, e.g., San Francisco, CA",
                        },
                        "unit": {
                            "type": "string",
                            "enum": ["celsius", "fahrenheit"]
                        },
                    },
                    "required": ["location"],
                },
            },
        }
    ]

    messages = [
        {"role": "user", "content": "What's the weather like in Boston, MA?"}
    ]

    try:
        print("--- Initial API Call ---")
        response = client.chat.completions.create(
            model=OPENAI_MODEL_ID,
            messages=messages,
            tools=tools,
            tool_choice="auto",  # "auto" is the default
        )
        print("OpenAI tool calling response received")
    except Exception as e:
        print(f"Error with OpenAI tool calling: {e}")
        raise e

    # Test 5: Tool calling with WatsonX
    print("\n=== Test 5: Tool Calling (WatsonX) ===")
    try:
        wxai_response = client.chat.completions.create(
            model=WATSONX_MODEL_ID,
            messages=messages,
            tools=tools,
            tool_choice="auto",  # "auto" is the default
        )
        print("WatsonX tool calling response received")
    except Exception as e:
        print(f"Error with WatsonX tool calling: {e}")
        raise e

    # Test 6: Streaming with WatsonX
    print("\n=== Test 6: Streaming Response (WatsonX) ===")
    try:
        chat_completion_response_wxai_stream = client.chat.completions.create(
            model=WATSONX_MODEL_ID,
            messages=[{"role": "user", "content": "What is the capital of France?"}],
            stream=True
        )
        print("Model response: ", end="")
        for chunk in chat_completion_response_wxai_stream:
            # Each 'chunk' is a ChatCompletionChunk object.
            # We want the content from the 'delta' attribute.
            if hasattr(chunk, 'choices') and chunk.choices is not None:
                content = chunk.choices[0].delta.content
                # The first few chunks might have None content, so we check for it.
                if content is not None:
                    print(content, end="", flush=True)
        print()
    except Exception as e:
        print(f"Error with streaming: {e}")
        raise e

    # Test 7: MCP with OpenAI
    print("\n=== Test 7: MCP Integration (OpenAI) ===")
    try:
        mcp_llama_stack_client_response = client.responses.create(
            model=OPENAI_MODEL_ID,
            input="Tell me about some parks in Rhode Island, and let me know if there are any upcoming events at them.",
            tools=[
                {
                    "type": "mcp",
                    "server_url": NPS_MCP_URL,
                    "server_label": "National Parks Service tools",
                    "allowed_tools": ["search_parks", "get_park_events"],
                }
            ]
        )
        print_response(mcp_llama_stack_client_response)
    except Exception as e:
        print(f"Error with MCP (OpenAI): {e}")
        raise e

    # Test 8: MCP with WatsonX
    print("\n=== Test 8: MCP Integration (WatsonX) ===")
    try:
        mcp_llama_stack_client_response = client.responses.create(
            model=WATSONX_MODEL_ID,
            input="What is the capital of France?"
        )
        print_response(mcp_llama_stack_client_response)
    except Exception as e:
        print(f"Error with MCP (WatsonX): {e}")
        raise e

    # Test 9: MCP with Llama 3.3
    print("\n=== Test 9: MCP Integration (Llama 3.3) ===")
    try:
        mcp_llama_stack_client_response = client.responses.create(
            model=WATSONX_MODEL_ID,
            input="Tell me about some parks in Rhode Island, and let me know if there are any upcoming events at them.",
            tools=[
                {
                    "type": "mcp",
                    "server_url": NPS_MCP_URL,
                    "server_label": "National Parks Service tools",
                    "allowed_tools": ["search_parks", "get_park_events"],
                }
            ]
        )
        print_response(mcp_llama_stack_client_response)
    except Exception as e:
        print(f"Error with MCP (Llama 3.3): {e}")
        raise e

    # Test 10: Embeddings
    print("\n=== Test 10: Embeddings ===")
    try:
        conn = http.client.HTTPConnection("localhost:8321")
        payload = json.dumps({
            "model": "watsonx/ibm/granite-embedding-278m-multilingual",
            "input": "Hello, world!",
        })
        headers = {
            'Content-Type': 'application/json',
            'Accept': 'application/json'
        }
        conn.request("POST", "/v1/openai/v1/embeddings", payload, headers)
        res = conn.getresponse()
        data = res.read()
        print(data.decode("utf-8"))
    except Exception as e:
        print(f"Error with Embeddings: {e}")
        raise e

    print("\n=== Testing Complete ===")


if __name__ == "__main__":
    main()
```

---------

Signed-off-by: Bill Murdock <bmurdock@redhat.com>
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
---
 .../providers/inference/remote_watsonx.mdx    |   4 +-
 llama_stack/core/routers/inference.py         |   2 +-
 llama_stack/distributions/watsonx/__init__.py |   2 +
 llama_stack/distributions/watsonx/build.yaml  |  41 +--
 llama_stack/distributions/watsonx/run.yaml    | 103 +-----
 llama_stack/distributions/watsonx/watsonx.py  |  36 +-
 llama_stack/providers/registry/inference.py   |   2 +-
 .../remote/inference/watsonx/__init__.py      |  11 +-
 .../remote/inference/watsonx/config.py        |  22 +-
 .../remote/inference/watsonx/models.py        |  47 ---
 .../remote/inference/watsonx/watsonx.py       | 324 ++++++------------
 .../utils/inference/litellm_openai_mixin.py   |  29 +-
 .../utils/inference/openai_compat.py          |  28 --
 .../test_inference_client_caching.py          |  28 ++
 14 files changed, 203 insertions(+), 476 deletions(-)
 delete mode 100644 llama_stack/providers/remote/inference/watsonx/models.py

diff --git a/docs/docs/providers/inference/remote_watsonx.mdx b/docs/docs/providers/inference/remote_watsonx.mdx
index 33bc5bbc3..f081703ab 100644
--- a/docs/docs/providers/inference/remote_watsonx.mdx
+++ b/docs/docs/providers/inference/remote_watsonx.mdx
@@ -17,8 +17,8 @@ IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform
 | `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
 | `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
 | `url` | `<class 'str'>` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai |
-| `api_key` | `pydantic.types.SecretStr \| None` | No |  | The watsonx API key |
-| `project_id` | `str \| None` | No |  | The Project ID key |
+| `api_key` | `pydantic.types.SecretStr \| None` | No |  | The watsonx.ai API key |
+| `project_id` | `str \| None` | No |  | The watsonx.ai project ID |
 | `timeout` | `<class 'int'>` | No | 60 | Timeout for the HTTP requests |
 
 ## Sample Configuration
diff --git a/llama_stack/core/routers/inference.py b/llama_stack/core/routers/inference.py
index c4338e614..847f6a2d2 100644
--- a/llama_stack/core/routers/inference.py
+++ b/llama_stack/core/routers/inference.py
@@ -611,7 +611,7 @@ class InferenceRouter(Inference):
                         completion_text += "".join(choice_data["content_parts"])
 
                     # Add metrics to the chunk
-                    if self.telemetry and chunk.usage:
+                    if self.telemetry and hasattr(chunk, "usage") and chunk.usage:
                         metrics = self._construct_metrics(
                             prompt_tokens=chunk.usage.prompt_tokens,
                             completion_tokens=chunk.usage.completion_tokens,
diff --git a/llama_stack/distributions/watsonx/__init__.py b/llama_stack/distributions/watsonx/__init__.py
index 756f351d8..078d86144 100644
--- a/llama_stack/distributions/watsonx/__init__.py
+++ b/llama_stack/distributions/watsonx/__init__.py
@@ -3,3 +3,5 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
+
+from .watsonx import get_distribution_template  # noqa: F401
diff --git a/llama_stack/distributions/watsonx/build.yaml b/llama_stack/distributions/watsonx/build.yaml
index bf4be7eaf..06349a741 100644
--- a/llama_stack/distributions/watsonx/build.yaml
+++ b/llama_stack/distributions/watsonx/build.yaml
@@ -3,44 +3,33 @@ distribution_spec:
   description: Use watsonx for running LLM inference
   providers:
     inference:
-    - provider_id: watsonx
-      provider_type: remote::watsonx
-    - provider_id: sentence-transformers
-      provider_type: inline::sentence-transformers
+    - provider_type: remote::watsonx
+    - provider_type: inline::sentence-transformers
     vector_io:
-    - provider_id: faiss
-      provider_type: inline::faiss
+    - provider_type: inline::faiss
     safety:
-    - provider_id: llama-guard
-      provider_type: inline::llama-guard
+    - provider_type: inline::llama-guard
     agents:
-    - provider_id: meta-reference
-      provider_type: inline::meta-reference
+    - provider_type: inline::meta-reference
     telemetry:
-    - provider_id: meta-reference
-      provider_type: inline::meta-reference
+    - provider_type: inline::meta-reference
     eval:
-    - provider_id: meta-reference
-      provider_type: inline::meta-reference
+    - provider_type: inline::meta-reference
     datasetio:
-    - provider_id: huggingface
-      provider_type: remote::huggingface
-    - provider_id: localfs
-      provider_type: inline::localfs
+    - provider_type: remote::huggingface
+    - provider_type: inline::localfs
     scoring:
-    - provider_id: basic
-      provider_type: inline::basic
-    - provider_id: llm-as-judge
-      provider_type: inline::llm-as-judge
-    - provider_id: braintrust
-      provider_type: inline::braintrust
+    - provider_type: inline::basic
+    - provider_type: inline::llm-as-judge
+    - provider_type: inline::braintrust
     tool_runtime:
     - provider_type: remote::brave-search
     - provider_type: remote::tavily-search
     - provider_type: inline::rag-runtime
     - provider_type: remote::model-context-protocol
+    files:
+    - provider_type: inline::localfs
 image_type: venv
 additional_pip_packages:
+- aiosqlite
 - sqlalchemy[asyncio]
-- aiosqlite
-- aiosqlite
diff --git a/llama_stack/distributions/watsonx/run.yaml b/llama_stack/distributions/watsonx/run.yaml
index 92f367910..e0c337f9d 100644
--- a/llama_stack/distributions/watsonx/run.yaml
+++ b/llama_stack/distributions/watsonx/run.yaml
@@ -4,13 +4,13 @@ apis:
 - agents
 - datasetio
 - eval
+- files
 - inference
 - safety
 - scoring
 - telemetry
 - tool_runtime
 - vector_io
-- files
 providers:
   inference:
   - provider_id: watsonx
@@ -19,8 +19,6 @@ providers:
       url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
       api_key: ${env.WATSONX_API_KEY:=}
       project_id: ${env.WATSONX_PROJECT_ID:=}
-  - provider_id: sentence-transformers
-    provider_type: inline::sentence-transformers
   vector_io:
   - provider_id: faiss
     provider_type: inline::faiss
@@ -48,7 +46,7 @@ providers:
     provider_type: inline::meta-reference
     config:
       service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sinks: ${env.TELEMETRY_SINKS:=sqlite}
       sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/trace_store.db
       otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
   eval:
@@ -109,102 +107,7 @@ metadata_store:
 inference_store:
   type: sqlite
   db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/inference_store.db
-models:
-- metadata: {}
-  model_id: meta-llama/llama-3-3-70b-instruct
-  provider_id: watsonx
-  provider_model_id: meta-llama/llama-3-3-70b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.3-70B-Instruct
-  provider_id: watsonx
-  provider_model_id: meta-llama/llama-3-3-70b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/llama-2-13b-chat
-  provider_id: watsonx
-  provider_model_id: meta-llama/llama-2-13b-chat
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-2-13b
-  provider_id: watsonx
-  provider_model_id: meta-llama/llama-2-13b-chat
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/llama-3-1-70b-instruct
-  provider_id: watsonx
-  provider_model_id: meta-llama/llama-3-1-70b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.1-70B-Instruct
-  provider_id: watsonx
-  provider_model_id: meta-llama/llama-3-1-70b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/llama-3-1-8b-instruct
-  provider_id: watsonx
-  provider_model_id: meta-llama/llama-3-1-8b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.1-8B-Instruct
-  provider_id: watsonx
-  provider_model_id: meta-llama/llama-3-1-8b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/llama-3-2-11b-vision-instruct
-  provider_id: watsonx
-  provider_model_id: meta-llama/llama-3-2-11b-vision-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
-  provider_id: watsonx
-  provider_model_id: meta-llama/llama-3-2-11b-vision-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/llama-3-2-1b-instruct
-  provider_id: watsonx
-  provider_model_id: meta-llama/llama-3-2-1b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.2-1B-Instruct
-  provider_id: watsonx
-  provider_model_id: meta-llama/llama-3-2-1b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/llama-3-2-3b-instruct
-  provider_id: watsonx
-  provider_model_id: meta-llama/llama-3-2-3b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.2-3B-Instruct
-  provider_id: watsonx
-  provider_model_id: meta-llama/llama-3-2-3b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/llama-3-2-90b-vision-instruct
-  provider_id: watsonx
-  provider_model_id: meta-llama/llama-3-2-90b-vision-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
-  provider_id: watsonx
-  provider_model_id: meta-llama/llama-3-2-90b-vision-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/llama-guard-3-11b-vision
-  provider_id: watsonx
-  provider_model_id: meta-llama/llama-guard-3-11b-vision
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-Guard-3-11B-Vision
-  provider_id: watsonx
-  provider_model_id: meta-llama/llama-guard-3-11b-vision
-  model_type: llm
-- metadata:
-    embedding_dimension: 384
-  model_id: all-MiniLM-L6-v2
-  provider_id: sentence-transformers
-  model_type: embedding
+models: []
 shields: []
 vector_dbs: []
 datasets: []
diff --git a/llama_stack/distributions/watsonx/watsonx.py b/llama_stack/distributions/watsonx/watsonx.py
index c3cab5d1b..645770612 100644
--- a/llama_stack/distributions/watsonx/watsonx.py
+++ b/llama_stack/distributions/watsonx/watsonx.py
@@ -4,17 +4,11 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from pathlib import Path
 
-from llama_stack.apis.models import ModelType
-from llama_stack.core.datatypes import BuildProvider, ModelInput, Provider, ToolGroupInput
-from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings, get_model_registry
+from llama_stack.core.datatypes import BuildProvider, Provider, ToolGroupInput
+from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings
 from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig
-from llama_stack.providers.inline.inference.sentence_transformers import (
-    SentenceTransformersInferenceConfig,
-)
 from llama_stack.providers.remote.inference.watsonx import WatsonXConfig
-from llama_stack.providers.remote.inference.watsonx.models import MODEL_ENTRIES
 
 
 def get_distribution_template(name: str = "watsonx") -> DistributionTemplate:
@@ -52,15 +46,6 @@ def get_distribution_template(name: str = "watsonx") -> DistributionTemplate:
         config=WatsonXConfig.sample_run_config(),
     )
 
-    embedding_provider = Provider(
-        provider_id="sentence-transformers",
-        provider_type="inline::sentence-transformers",
-        config=SentenceTransformersInferenceConfig.sample_run_config(),
-    )
-
-    available_models = {
-        "watsonx": MODEL_ENTRIES,
-    }
     default_tool_groups = [
         ToolGroupInput(
             toolgroup_id="builtin::websearch",
@@ -72,36 +57,25 @@ def get_distribution_template(name: str = "watsonx") -> DistributionTemplate:
         ),
     ]
 
-    embedding_model = ModelInput(
-        model_id="all-MiniLM-L6-v2",
-        provider_id="sentence-transformers",
-        model_type=ModelType.embedding,
-        metadata={
-            "embedding_dimension": 384,
-        },
-    )
-
     files_provider = Provider(
         provider_id="meta-reference-files",
         provider_type="inline::localfs",
         config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"),
     )
-    default_models, _ = get_model_registry(available_models)
     return DistributionTemplate(
         name=name,
         distro_type="remote_hosted",
         description="Use watsonx for running LLM inference",
         container_image=None,
-        template_path=Path(__file__).parent / "doc_template.md",
+        template_path=None,
         providers=providers,
-        available_models_by_provider=available_models,
         run_configs={
             "run.yaml": RunConfigSettings(
                 provider_overrides={
-                    "inference": [inference_provider, embedding_provider],
+                    "inference": [inference_provider],
                     "files": [files_provider],
                 },
-                default_models=default_models + [embedding_model],
+                default_models=[],
                 default_tool_groups=default_tool_groups,
             ),
         },
diff --git a/llama_stack/providers/registry/inference.py b/llama_stack/providers/registry/inference.py
index bf6a09b6c..f89565892 100644
--- a/llama_stack/providers/registry/inference.py
+++ b/llama_stack/providers/registry/inference.py
@@ -268,7 +268,7 @@ Available Models:
             api=Api.inference,
             adapter_type="watsonx",
             provider_type="remote::watsonx",
-            pip_packages=["ibm_watsonx_ai"],
+            pip_packages=["litellm"],
             module="llama_stack.providers.remote.inference.watsonx",
             config_class="llama_stack.providers.remote.inference.watsonx.WatsonXConfig",
             provider_data_validator="llama_stack.providers.remote.inference.watsonx.WatsonXProviderDataValidator",
diff --git a/llama_stack/providers/remote/inference/watsonx/__init__.py b/llama_stack/providers/remote/inference/watsonx/__init__.py
index e59e873b6..35e74a720 100644
--- a/llama_stack/providers/remote/inference/watsonx/__init__.py
+++ b/llama_stack/providers/remote/inference/watsonx/__init__.py
@@ -4,19 +4,12 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.inference import Inference
-
 from .config import WatsonXConfig
 
 
-async def get_adapter_impl(config: WatsonXConfig, _deps) -> Inference:
-    # import dynamically so `llama stack build` does not fail due to missing dependencies
+async def get_adapter_impl(config: WatsonXConfig, _deps):
+    # import dynamically so the import is used only when it is needed
     from .watsonx import WatsonXInferenceAdapter
 
-    if not isinstance(config, WatsonXConfig):
-        raise RuntimeError(f"Unexpected config type: {type(config)}")
     adapter = WatsonXInferenceAdapter(config)
     return adapter
-
-
-__all__ = ["get_adapter_impl", "WatsonXConfig"]
diff --git a/llama_stack/providers/remote/inference/watsonx/config.py b/llama_stack/providers/remote/inference/watsonx/config.py
index 4bc0173c4..9e98d4003 100644
--- a/llama_stack/providers/remote/inference/watsonx/config.py
+++ b/llama_stack/providers/remote/inference/watsonx/config.py
@@ -7,16 +7,18 @@
 import os
 from typing import Any
 
-from pydantic import BaseModel, Field, SecretStr
+from pydantic import BaseModel, ConfigDict, Field, SecretStr
 
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack.schema_utils import json_schema_type
 
 
 class WatsonXProviderDataValidator(BaseModel):
-    url: str
-    api_key: str
-    project_id: str
+    model_config = ConfigDict(
+        from_attributes=True,
+        extra="forbid",
+    )
+    watsonx_api_key: str | None
 
 
 @json_schema_type
@@ -25,13 +27,17 @@ class WatsonXConfig(RemoteInferenceProviderConfig):
         default_factory=lambda: os.getenv("WATSONX_BASE_URL", "https://us-south.ml.cloud.ibm.com"),
         description="A base url for accessing the watsonx.ai",
     )
+    # This seems like it should be required, but none of the other remote inference
+    # providers require it, so this is optional here too for consistency.
+    # The OpenAIConfig uses default=None instead, so this is following that precedent.
     api_key: SecretStr | None = Field(
-        default_factory=lambda: os.getenv("WATSONX_API_KEY"),
-        description="The watsonx API key",
+        default=None,
+        description="The watsonx.ai API key",
     )
+    # As above, this is optional here too for consistency.
     project_id: str | None = Field(
-        default_factory=lambda: os.getenv("WATSONX_PROJECT_ID"),
-        description="The Project ID key",
+        default=None,
+        description="The watsonx.ai project ID",
     )
     timeout: int = Field(
         default=60,
diff --git a/llama_stack/providers/remote/inference/watsonx/models.py b/llama_stack/providers/remote/inference/watsonx/models.py
deleted file mode 100644
index d98f0510a..000000000
--- a/llama_stack/providers/remote/inference/watsonx/models.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from llama_stack.models.llama.sku_types import CoreModelId
-from llama_stack.providers.utils.inference.model_registry import build_hf_repo_model_entry
-
-MODEL_ENTRIES = [
-    build_hf_repo_model_entry(
-        "meta-llama/llama-3-3-70b-instruct",
-        CoreModelId.llama3_3_70b_instruct.value,
-    ),
-    build_hf_repo_model_entry(
-        "meta-llama/llama-2-13b-chat",
-        CoreModelId.llama2_13b.value,
-    ),
-    build_hf_repo_model_entry(
-        "meta-llama/llama-3-1-70b-instruct",
-        CoreModelId.llama3_1_70b_instruct.value,
-    ),
-    build_hf_repo_model_entry(
-        "meta-llama/llama-3-1-8b-instruct",
-        CoreModelId.llama3_1_8b_instruct.value,
-    ),
-    build_hf_repo_model_entry(
-        "meta-llama/llama-3-2-11b-vision-instruct",
-        CoreModelId.llama3_2_11b_vision_instruct.value,
-    ),
-    build_hf_repo_model_entry(
-        "meta-llama/llama-3-2-1b-instruct",
-        CoreModelId.llama3_2_1b_instruct.value,
-    ),
-    build_hf_repo_model_entry(
-        "meta-llama/llama-3-2-3b-instruct",
-        CoreModelId.llama3_2_3b_instruct.value,
-    ),
-    build_hf_repo_model_entry(
-        "meta-llama/llama-3-2-90b-vision-instruct",
-        CoreModelId.llama3_2_90b_vision_instruct.value,
-    ),
-    build_hf_repo_model_entry(
-        "meta-llama/llama-guard-3-11b-vision",
-        CoreModelId.llama_guard_3_11b_vision.value,
-    ),
-]
diff --git a/llama_stack/providers/remote/inference/watsonx/watsonx.py b/llama_stack/providers/remote/inference/watsonx/watsonx.py
index fc58691e2..d04472936 100644
--- a/llama_stack/providers/remote/inference/watsonx/watsonx.py
+++ b/llama_stack/providers/remote/inference/watsonx/watsonx.py
@@ -4,240 +4,120 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from collections.abc import AsyncGenerator, AsyncIterator
 from typing import Any
 
-from ibm_watsonx_ai.foundation_models import Model
-from ibm_watsonx_ai.metanames import GenTextParamsMetaNames as GenParams
-from openai import AsyncOpenAI
+import requests
 
-from llama_stack.apis.inference import (
-    ChatCompletionRequest,
-    CompletionRequest,
-    GreedySamplingStrategy,
-    Inference,
-    OpenAIChatCompletion,
-    OpenAIChatCompletionChunk,
-    OpenAICompletion,
-    OpenAIEmbeddingsResponse,
-    OpenAIMessageParam,
-    OpenAIResponseFormatParam,
-    TopKSamplingStrategy,
-    TopPSamplingStrategy,
-)
-from llama_stack.log import get_logger
-from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
-from llama_stack.providers.utils.inference.openai_compat import (
-    prepare_openai_completion_params,
-)
-from llama_stack.providers.utils.inference.prompt_adapter import (
-    chat_completion_request_to_prompt,
-    completion_request_to_prompt,
-    request_has_media,
-)
-
-from . import WatsonXConfig
-from .models import MODEL_ENTRIES
-
-logger = get_logger(name=__name__, category="inference::watsonx")
+from llama_stack.apis.inference import ChatCompletionRequest
+from llama_stack.apis.models import Model
+from llama_stack.apis.models.models import ModelType
+from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig
+from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
 
 
-# Note on structured output
-# WatsonX returns responses with a json embedded into a string.
-# Examples:
+class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
+    _model_cache: dict[str, Model] = {}
 
-# ChatCompletionResponse(completion_message=CompletionMessage(content='```json\n{\n
-# "first_name": "Michael",\n  "last_name": "Jordan",\n'...)
-# Not even a valid JSON, but we can still extract the JSON from the content
+    def __init__(self, config: WatsonXConfig):
+        LiteLLMOpenAIMixin.__init__(
+            self,
+            litellm_provider_name="watsonx",
+            api_key_from_config=config.api_key.get_secret_value() if config.api_key else None,
+            provider_data_api_key_field="watsonx_api_key",
+        )
+        self.available_models = None
+        self.config = config
 
-# CompletionResponse(content=' \nThe best answer is $\\boxed{\\{"name": "Michael Jordan",
-# "year_born": "1963", "year_retired": "2003"\\}}$')
-# Find the start of the boxed content
+    def get_base_url(self) -> str:
+        return self.config.url
 
+    async def _get_params(self, request: ChatCompletionRequest) -> dict[str, Any]:
+        # Get base parameters from parent
+        params = await super()._get_params(request)
 
-class WatsonXInferenceAdapter(Inference, ModelRegistryHelper):
-    def __init__(self, config: WatsonXConfig) -> None:
-        ModelRegistryHelper.__init__(self, model_entries=MODEL_ENTRIES)
-
-        logger.info(f"Initializing watsonx InferenceAdapter({config.url})...")
-        self._config = config
-        self._openai_client: AsyncOpenAI | None = None
-
-        self._project_id = self._config.project_id
-
-    def _get_client(self, model_id) -> Model:
-        config_api_key = self._config.api_key.get_secret_value() if self._config.api_key else None
-        config_url = self._config.url
-        project_id = self._config.project_id
-        credentials = {"url": config_url, "apikey": config_api_key}
-
-        return Model(model_id=model_id, credentials=credentials, project_id=project_id)
-
-    def _get_openai_client(self) -> AsyncOpenAI:
-        if not self._openai_client:
-            self._openai_client = AsyncOpenAI(
-                base_url=f"{self._config.url}/openai/v1",
-                api_key=self._config.api_key,
-            )
-        return self._openai_client
-
-    async def _get_params(self, request: ChatCompletionRequest | CompletionRequest) -> dict:
-        input_dict = {"params": {}}
-        media_present = request_has_media(request)
-        llama_model = self.get_llama_model(request.model)
-        if isinstance(request, ChatCompletionRequest):
-            input_dict["prompt"] = await chat_completion_request_to_prompt(request, llama_model)
-        else:
-            assert not media_present, "Together does not support media for Completion requests"
-            input_dict["prompt"] = await completion_request_to_prompt(request)
-        if request.sampling_params:
-            if request.sampling_params.strategy:
-                input_dict["params"][GenParams.DECODING_METHOD] = request.sampling_params.strategy.type
-            if request.sampling_params.max_tokens:
-                input_dict["params"][GenParams.MAX_NEW_TOKENS] = request.sampling_params.max_tokens
-            if request.sampling_params.repetition_penalty:
-                input_dict["params"][GenParams.REPETITION_PENALTY] = request.sampling_params.repetition_penalty
-
-            if isinstance(request.sampling_params.strategy, TopPSamplingStrategy):
-                input_dict["params"][GenParams.TOP_P] = request.sampling_params.strategy.top_p
-                input_dict["params"][GenParams.TEMPERATURE] = request.sampling_params.strategy.temperature
-            if isinstance(request.sampling_params.strategy, TopKSamplingStrategy):
-                input_dict["params"][GenParams.TOP_K] = request.sampling_params.strategy.top_k
-            if isinstance(request.sampling_params.strategy, GreedySamplingStrategy):
-                input_dict["params"][GenParams.TEMPERATURE] = 0.0
-
-        input_dict["params"][GenParams.STOP_SEQUENCES] = ["<|endoftext|>"]
-
-        params = {
-            **input_dict,
-        }
+        # Add watsonx.ai specific parameters
+        params["project_id"] = self.config.project_id
+        params["time_limit"] = self.config.timeout
         return params
 
-    async def openai_embeddings(
-        self,
-        model: str,
-        input: str | list[str],
-        encoding_format: str | None = "float",
-        dimensions: int | None = None,
-        user: str | None = None,
-    ) -> OpenAIEmbeddingsResponse:
-        raise NotImplementedError()
+    # Copied from OpenAIMixin
+    async def check_model_availability(self, model: str) -> bool:
+        """
+        Check if a specific model is available from the provider's /v1/models.
 
-    async def openai_completion(
-        self,
-        model: str,
-        prompt: str | list[str] | list[int] | list[list[int]],
-        best_of: int | None = None,
-        echo: bool | None = None,
-        frequency_penalty: float | None = None,
-        logit_bias: dict[str, float] | None = None,
-        logprobs: bool | None = None,
-        max_tokens: int | None = None,
-        n: int | None = None,
-        presence_penalty: float | None = None,
-        seed: int | None = None,
-        stop: str | list[str] | None = None,
-        stream: bool | None = None,
-        stream_options: dict[str, Any] | None = None,
-        temperature: float | None = None,
-        top_p: float | None = None,
-        user: str | None = None,
-        guided_choice: list[str] | None = None,
-        prompt_logprobs: int | None = None,
-        suffix: str | None = None,
-    ) -> OpenAICompletion:
-        model_obj = await self.model_store.get_model(model)
-        params = await prepare_openai_completion_params(
-            model=model_obj.provider_resource_id,
-            prompt=prompt,
-            best_of=best_of,
-            echo=echo,
-            frequency_penalty=frequency_penalty,
-            logit_bias=logit_bias,
-            logprobs=logprobs,
-            max_tokens=max_tokens,
-            n=n,
-            presence_penalty=presence_penalty,
-            seed=seed,
-            stop=stop,
-            stream=stream,
-            stream_options=stream_options,
-            temperature=temperature,
-            top_p=top_p,
-            user=user,
-        )
-        return await self._get_openai_client().completions.create(**params)  # type: ignore
+        :param model: The model identifier to check.
+        :return: True if the model is available dynamically, False otherwise.
+        """
+        if not self._model_cache:
+            await self.list_models()
+        return model in self._model_cache
 
-    async def openai_chat_completion(
-        self,
-        model: str,
-        messages: list[OpenAIMessageParam],
-        frequency_penalty: float | None = None,
-        function_call: str | dict[str, Any] | None = None,
-        functions: list[dict[str, Any]] | None = None,
-        logit_bias: dict[str, float] | None = None,
-        logprobs: bool | None = None,
-        max_completion_tokens: int | None = None,
-        max_tokens: int | None = None,
-        n: int | None = None,
-        parallel_tool_calls: bool | None = None,
-        presence_penalty: float | None = None,
-        response_format: OpenAIResponseFormatParam | None = None,
-        seed: int | None = None,
-        stop: str | list[str] | None = None,
-        stream: bool | None = None,
-        stream_options: dict[str, Any] | None = None,
-        temperature: float | None = None,
-        tool_choice: str | dict[str, Any] | None = None,
-        tools: list[dict[str, Any]] | None = None,
-        top_logprobs: int | None = None,
-        top_p: float | None = None,
-        user: str | None = None,
-    ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
-        model_obj = await self.model_store.get_model(model)
-        params = await prepare_openai_completion_params(
-            model=model_obj.provider_resource_id,
-            messages=messages,
-            frequency_penalty=frequency_penalty,
-            function_call=function_call,
-            functions=functions,
-            logit_bias=logit_bias,
-            logprobs=logprobs,
-            max_completion_tokens=max_completion_tokens,
-            max_tokens=max_tokens,
-            n=n,
-            parallel_tool_calls=parallel_tool_calls,
-            presence_penalty=presence_penalty,
-            response_format=response_format,
-            seed=seed,
-            stop=stop,
-            stream=stream,
-            stream_options=stream_options,
-            temperature=temperature,
-            tool_choice=tool_choice,
-            tools=tools,
-            top_logprobs=top_logprobs,
-            top_p=top_p,
-            user=user,
-        )
-        if params.get("stream", False):
-            return self._stream_openai_chat_completion(params)
-        return await self._get_openai_client().chat.completions.create(**params)  # type: ignore
+    async def list_models(self) -> list[Model] | None:
+        self._model_cache = {}
+        models = []
+        for model_spec in self._get_model_specs():
+            functions = [f["id"] for f in model_spec.get("functions", [])]
+            # Format: {"embedding_dimension": 1536, "context_length": 8192}
 
-    async def _stream_openai_chat_completion(self, params: dict) -> AsyncGenerator:
-        # watsonx.ai sometimes adds usage data to the stream
-        include_usage = False
-        if params.get("stream_options", None):
-            include_usage = params["stream_options"].get("include_usage", False)
-        stream = await self._get_openai_client().chat.completions.create(**params)
+            # Example of an embedding model:
+            # {'model_id': 'ibm/granite-embedding-278m-multilingual',
+            # 'label': 'granite-embedding-278m-multilingual',
+            # 'model_limits': {'max_sequence_length': 512, 'embedding_dimension': 768},
+            # ...
+            provider_resource_id = f"{self.__provider_id__}/{model_spec['model_id']}"
+            if "embedding" in functions:
+                embedding_dimension = model_spec["model_limits"]["embedding_dimension"]
+                context_length = model_spec["model_limits"]["max_sequence_length"]
+                embedding_metadata = {
+                    "embedding_dimension": embedding_dimension,
+                    "context_length": context_length,
+                }
+                model = Model(
+                    identifier=model_spec["model_id"],
+                    provider_resource_id=provider_resource_id,
+                    provider_id=self.__provider_id__,
+                    metadata=embedding_metadata,
+                    model_type=ModelType.embedding,
+                )
+                self._model_cache[provider_resource_id] = model
+                models.append(model)
+            if "text_chat" in functions:
+                model = Model(
+                    identifier=model_spec["model_id"],
+                    provider_resource_id=provider_resource_id,
+                    provider_id=self.__provider_id__,
+                    metadata={},
+                    model_type=ModelType.llm,
+                )
+                # In theory, I guess it is possible that a model could be both an embedding model and a text chat model.
+                # In that case, the cache will record the generator Model object, and the list which we return will have
+                # both the generator Model object and the text chat Model object.  That's fine because the cache is
+                # only used for check_model_availability() anyway.
+                self._model_cache[provider_resource_id] = model
+                models.append(model)
+        return models
 
-        seen_finish_reason = False
-        async for chunk in stream:
-            # Final usage chunk with no choices that the user didn't request, so discard
-            if not include_usage and seen_finish_reason and len(chunk.choices) == 0:
-                break
-            yield chunk
-            for choice in chunk.choices:
-                if choice.finish_reason:
-                    seen_finish_reason = True
-                    break
+    # LiteLLM provides methods to list models for many providers, but not for watsonx.ai.
+    # So we need to implement our own method to list models by calling the watsonx.ai API.
+    def _get_model_specs(self) -> list[dict[str, Any]]:
+        """
+        Retrieves foundation model specifications from the watsonx.ai API.
+        """
+        url = f"{self.config.url}/ml/v1/foundation_model_specs?version=2023-10-25"
+        headers = {
+            # Note that there is no authorization header.  Listing models does not require authentication.
+            "Content-Type": "application/json",
+        }
+
+        response = requests.get(url, headers=headers)
+
+        # --- Process the Response ---
+        # Raise an exception for bad status codes (4xx or 5xx)
+        response.raise_for_status()
+
+        # If the request is successful, parse and return the JSON response.
+        # The response should contain a list of model specifications
+        response_data = response.json()
+        if "resources" not in response_data:
+            raise ValueError("Resources not found in response")
+        return response_data["resources"]
diff --git a/llama_stack/providers/utils/inference/litellm_openai_mixin.py b/llama_stack/providers/utils/inference/litellm_openai_mixin.py
index 6c8f61c3b..6bef97dd5 100644
--- a/llama_stack/providers/utils/inference/litellm_openai_mixin.py
+++ b/llama_stack/providers/utils/inference/litellm_openai_mixin.py
@@ -4,6 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+import base64
+import struct
 from collections.abc import AsyncIterator
 from typing import Any
 
@@ -16,6 +18,7 @@ from llama_stack.apis.inference import (
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
     OpenAICompletion,
+    OpenAIEmbeddingData,
     OpenAIEmbeddingsResponse,
     OpenAIEmbeddingUsage,
     OpenAIMessageParam,
@@ -26,7 +29,6 @@ from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry
 from llama_stack.providers.utils.inference.openai_compat import (
-    b64_encode_openai_embeddings_response,
     convert_message_to_openai_dict_new,
     convert_tooldef_to_openai_tool,
     get_sampling_options,
@@ -349,3 +351,28 @@ class LiteLLMOpenAIMixin(
             return False
 
         return model in litellm.models_by_provider[self.litellm_provider_name]
+
+
+def b64_encode_openai_embeddings_response(
+    response_data: list[dict], encoding_format: str | None = "float"
+) -> list[OpenAIEmbeddingData]:
+    """
+    Process the OpenAI embeddings response to encode the embeddings in base64 format if specified.
+    """
+    data = []
+    for i, embedding_data in enumerate(response_data):
+        if encoding_format == "base64":
+            byte_array = bytearray()
+            for embedding_value in embedding_data["embedding"]:
+                byte_array.extend(struct.pack("f", float(embedding_value)))
+
+            response_embedding = base64.b64encode(byte_array).decode("utf-8")
+        else:
+            response_embedding = embedding_data["embedding"]
+        data.append(
+            OpenAIEmbeddingData(
+                embedding=response_embedding,
+                index=i,
+            )
+        )
+    return data
diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py
index d863eb53a..7e465a14c 100644
--- a/llama_stack/providers/utils/inference/openai_compat.py
+++ b/llama_stack/providers/utils/inference/openai_compat.py
@@ -3,9 +3,7 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-import base64
 import json
-import struct
 import time
 import uuid
 import warnings
@@ -103,7 +101,6 @@ from llama_stack.apis.inference import (
     JsonSchemaResponseFormat,
     Message,
     OpenAIChatCompletion,
-    OpenAIEmbeddingData,
     OpenAIMessageParam,
     OpenAIResponseFormatParam,
     SamplingParams,
@@ -1402,28 +1399,3 @@ def prepare_openai_embeddings_params(
         params["user"] = user
 
     return params
-
-
-def b64_encode_openai_embeddings_response(
-    response_data: dict, encoding_format: str | None = "float"
-) -> list[OpenAIEmbeddingData]:
-    """
-    Process the OpenAI embeddings response to encode the embeddings in base64 format if specified.
-    """
-    data = []
-    for i, embedding_data in enumerate(response_data):
-        if encoding_format == "base64":
-            byte_array = bytearray()
-            for embedding_value in embedding_data.embedding:
-                byte_array.extend(struct.pack("f", float(embedding_value)))
-
-            response_embedding = base64.b64encode(byte_array).decode("utf-8")
-        else:
-            response_embedding = embedding_data.embedding
-        data.append(
-            OpenAIEmbeddingData(
-                embedding=response_embedding,
-                index=i,
-            )
-        )
-    return data
diff --git a/tests/unit/providers/inference/test_inference_client_caching.py b/tests/unit/providers/inference/test_inference_client_caching.py
index d30b5b12a..55a6793c2 100644
--- a/tests/unit/providers/inference/test_inference_client_caching.py
+++ b/tests/unit/providers/inference/test_inference_client_caching.py
@@ -18,6 +18,8 @@ from llama_stack.providers.remote.inference.openai.config import OpenAIConfig
 from llama_stack.providers.remote.inference.openai.openai import OpenAIInferenceAdapter
 from llama_stack.providers.remote.inference.together.config import TogetherImplConfig
 from llama_stack.providers.remote.inference.together.together import TogetherInferenceAdapter
+from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig
+from llama_stack.providers.remote.inference.watsonx.watsonx import WatsonXInferenceAdapter
 
 
 @pytest.mark.parametrize(
@@ -58,3 +60,29 @@ def test_openai_provider_data_used(config_cls, adapter_cls, provider_data_valida
             {"x-llamastack-provider-data": json.dumps({inference_adapter.provider_data_api_key_field: api_key})}
         ):
             assert inference_adapter.client.api_key == api_key
+
+
+@pytest.mark.parametrize(
+    "config_cls,adapter_cls,provider_data_validator",
+    [
+        (
+            WatsonXConfig,
+            WatsonXInferenceAdapter,
+            "llama_stack.providers.remote.inference.watsonx.config.WatsonXProviderDataValidator",
+        ),
+    ],
+)
+def test_litellm_provider_data_used(config_cls, adapter_cls, provider_data_validator: str):
+    """Validate data for LiteLLM-based providers.  Similar to test_openai_provider_data_used, but without the
+    assumption that there is an OpenAI-compatible client object."""
+
+    inference_adapter = adapter_cls(config=config_cls())
+
+    inference_adapter.__provider_spec__ = MagicMock()
+    inference_adapter.__provider_spec__.provider_data_validator = provider_data_validator
+
+    for api_key in ["test1", "test2"]:
+        with request_provider_data_context(
+            {"x-llamastack-provider-data": json.dumps({inference_adapter.provider_data_api_key_field: api_key})}
+        ):
+            assert inference_adapter.get_api_key() == api_key

From 96886afacaf16e45c74cdb341280c98fa0ce74f0 Mon Sep 17 00:00:00 2001
From: grs <gsim@redhat.com>
Date: Wed, 8 Oct 2025 15:47:17 +0100
Subject: [PATCH 8/9] fix(responses): fix regression in support for mcp tool
 require_approval argument (#3731)

# What does this PR do?

It prevents a tool call message being added to the chat completions
message without a corresponding tool call result, which is needed in the
case that an approval is required first or if the approval request is
denied. In both these cases the tool call messages is popped of the next
turn messages.

Closes #3728

## Test Plan
Ran the integration tests
Manual check of both approval and denial against gpt-4o

Signed-off-by: Gordon Sim <gsim@redhat.com>
---
 .../agents/meta_reference/responses/openai_responses.py   | 2 +-
 .../inline/agents/meta_reference/responses/streaming.py   | 8 +++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
index 8ccdcb0e1..245203f10 100644
--- a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
@@ -269,7 +269,7 @@ class OpenAIResponsesImpl:
             response_tools=tools,
             temperature=temperature,
             response_format=response_format,
-            inputs=input,
+            inputs=all_input,
         )
 
         # Create orchestrator and delegate streaming logic
diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
index 8a662e6db..895d13a7f 100644
--- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
@@ -175,6 +175,8 @@ class StreamingResponseOrchestrator:
             ):
                 yield stream_event
 
+            messages = next_turn_messages
+
             if not function_tool_calls and not non_function_tool_calls:
                 break
 
@@ -187,9 +189,7 @@ class StreamingResponseOrchestrator:
                 logger.info(f"Exiting inference loop since iteration count({n_iter}) exceeds {self.max_infer_iters=}")
                 break
 
-            messages = next_turn_messages
-
-        self.final_messages = messages.copy() + [current_response.choices[0].message]
+        self.final_messages = messages.copy()
 
         # Create final response
         final_response = OpenAIResponseObject(
@@ -232,9 +232,11 @@ class StreamingResponseOrchestrator:
                                     non_function_tool_calls.append(tool_call)
                                 else:
                                     logger.info(f"Approval denied for {tool_call.id} on {tool_call.function.name}")
+                                    next_turn_messages.pop()
                             else:
                                 logger.info(f"Requesting approval for {tool_call.id} on {tool_call.function.name}")
                                 approvals.append(tool_call)
+                                next_turn_messages.pop()
                         else:
                             non_function_tool_calls.append(tool_call)
 

From ed4e452de001b0143253379eca4e7ae197b7d167 Mon Sep 17 00:00:00 2001
From: Eric Huang <erichuang@meta.com>
Date: Wed, 8 Oct 2025 11:39:21 -0700
Subject: [PATCH 9/9] chore!: remove ALL telemetry APIs

# What does this PR do?


## Test Plan
---
 docs/static/deprecated-llama-stack-spec.html  |  897 ------------
 docs/static/deprecated-llama-stack-spec.yaml  |  663 ---------
 .../static/experimental-llama-stack-spec.html |  899 +-----------
 .../static/experimental-llama-stack-spec.yaml |  663 ---------
 docs/static/llama-stack-spec.html             |  391 -----
 docs/static/llama-stack-spec.yaml             |  291 ----
 docs/static/stainless-llama-stack-spec.html   | 1274 -----------------
 docs/static/stainless-llama-stack-spec.yaml   |  942 ------------
 llama_stack/apis/telemetry/telemetry.py       |   76 +-
 9 files changed, 2 insertions(+), 6094 deletions(-)

diff --git a/docs/static/deprecated-llama-stack-spec.html b/docs/static/deprecated-llama-stack-spec.html
index 04a3dca9b..1b8e78446 100644
--- a/docs/static/deprecated-llama-stack-spec.html
+++ b/docs/static/deprecated-llama-stack-spec.html
@@ -3526,343 +3526,6 @@
                 },
                 "deprecated": true
             }
-        },
-        "/v1/telemetry/metrics/{metric_name}": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "A QueryMetricsResponse.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/QueryMetricsResponse"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Telemetry"
-                ],
-                "summary": "Query metrics.",
-                "description": "Query metrics.",
-                "parameters": [
-                    {
-                        "name": "metric_name",
-                        "in": "path",
-                        "description": "The name of the metric to query.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/QueryMetricsRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                },
-                "deprecated": true
-            }
-        },
-        "/v1/telemetry/spans": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "A QuerySpansResponse.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/QuerySpansResponse"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Telemetry"
-                ],
-                "summary": "Query spans.",
-                "description": "Query spans.",
-                "parameters": [],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/QuerySpansRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                },
-                "deprecated": true
-            }
-        },
-        "/v1/telemetry/spans/export": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "OK"
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Telemetry"
-                ],
-                "summary": "Save spans to a dataset.",
-                "description": "Save spans to a dataset.",
-                "parameters": [],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/SaveSpansToDatasetRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                },
-                "deprecated": true
-            }
-        },
-        "/v1/telemetry/spans/{span_id}/tree": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "A QuerySpanTreeResponse.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/QuerySpanTreeResponse"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Telemetry"
-                ],
-                "summary": "Get a span tree by its ID.",
-                "description": "Get a span tree by its ID.",
-                "parameters": [
-                    {
-                        "name": "span_id",
-                        "in": "path",
-                        "description": "The ID of the span to get the tree from.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/GetSpanTreeRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                },
-                "deprecated": true
-            }
-        },
-        "/v1/telemetry/traces": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "A QueryTracesResponse.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/QueryTracesResponse"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Telemetry"
-                ],
-                "summary": "Query traces.",
-                "description": "Query traces.",
-                "parameters": [],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/QueryTracesRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                },
-                "deprecated": true
-            }
-        },
-        "/v1/telemetry/traces/{trace_id}": {
-            "get": {
-                "responses": {
-                    "200": {
-                        "description": "A Trace.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/Trace"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Telemetry"
-                ],
-                "summary": "Get a trace by its ID.",
-                "description": "Get a trace by its ID.",
-                "parameters": [
-                    {
-                        "name": "trace_id",
-                        "in": "path",
-                        "description": "The ID of the trace to get.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ],
-                "deprecated": true
-            }
-        },
-        "/v1/telemetry/traces/{trace_id}/spans/{span_id}": {
-            "get": {
-                "responses": {
-                    "200": {
-                        "description": "A Span.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/Span"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Telemetry"
-                ],
-                "summary": "Get a span by its ID.",
-                "description": "Get a span by its ID.",
-                "parameters": [
-                    {
-                        "name": "trace_id",
-                        "in": "path",
-                        "description": "The ID of the trace to get the span from.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    },
-                    {
-                        "name": "span_id",
-                        "in": "path",
-                        "description": "The ID of the span to get.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ],
-                "deprecated": true
-            }
         }
     },
     "jsonSchemaDialect": "https://json-schema.org/draft/2020-12/schema",
@@ -12716,561 +12379,6 @@
                     "logger_config"
                 ],
                 "title": "SupervisedFineTuneRequest"
-            },
-            "QueryMetricsRequest": {
-                "type": "object",
-                "properties": {
-                    "start_time": {
-                        "type": "integer",
-                        "description": "The start time of the metric to query."
-                    },
-                    "end_time": {
-                        "type": "integer",
-                        "description": "The end time of the metric to query."
-                    },
-                    "granularity": {
-                        "type": "string",
-                        "description": "The granularity of the metric to query."
-                    },
-                    "query_type": {
-                        "type": "string",
-                        "enum": [
-                            "range",
-                            "instant"
-                        ],
-                        "description": "The type of query to perform."
-                    },
-                    "label_matchers": {
-                        "type": "array",
-                        "items": {
-                            "type": "object",
-                            "properties": {
-                                "name": {
-                                    "type": "string",
-                                    "description": "The name of the label to match"
-                                },
-                                "value": {
-                                    "type": "string",
-                                    "description": "The value to match against"
-                                },
-                                "operator": {
-                                    "type": "string",
-                                    "enum": [
-                                        "=",
-                                        "!=",
-                                        "=~",
-                                        "!~"
-                                    ],
-                                    "description": "The comparison operator to use for matching",
-                                    "default": "="
-                                }
-                            },
-                            "additionalProperties": false,
-                            "required": [
-                                "name",
-                                "value",
-                                "operator"
-                            ],
-                            "title": "MetricLabelMatcher",
-                            "description": "A matcher for filtering metrics by label values."
-                        },
-                        "description": "The label matchers to apply to the metric."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "start_time",
-                    "query_type"
-                ],
-                "title": "QueryMetricsRequest"
-            },
-            "MetricDataPoint": {
-                "type": "object",
-                "properties": {
-                    "timestamp": {
-                        "type": "integer",
-                        "description": "Unix timestamp when the metric value was recorded"
-                    },
-                    "value": {
-                        "type": "number",
-                        "description": "The numeric value of the metric at this timestamp"
-                    },
-                    "unit": {
-                        "type": "string"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "timestamp",
-                    "value",
-                    "unit"
-                ],
-                "title": "MetricDataPoint",
-                "description": "A single data point in a metric time series."
-            },
-            "MetricLabel": {
-                "type": "object",
-                "properties": {
-                    "name": {
-                        "type": "string",
-                        "description": "The name of the label"
-                    },
-                    "value": {
-                        "type": "string",
-                        "description": "The value of the label"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "name",
-                    "value"
-                ],
-                "title": "MetricLabel",
-                "description": "A label associated with a metric."
-            },
-            "MetricSeries": {
-                "type": "object",
-                "properties": {
-                    "metric": {
-                        "type": "string",
-                        "description": "The name of the metric"
-                    },
-                    "labels": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/MetricLabel"
-                        },
-                        "description": "List of labels associated with this metric series"
-                    },
-                    "values": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/MetricDataPoint"
-                        },
-                        "description": "List of data points in chronological order"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "metric",
-                    "labels",
-                    "values"
-                ],
-                "title": "MetricSeries",
-                "description": "A time series of metric data points."
-            },
-            "QueryMetricsResponse": {
-                "type": "object",
-                "properties": {
-                    "data": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/MetricSeries"
-                        },
-                        "description": "List of metric series matching the query criteria"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "data"
-                ],
-                "title": "QueryMetricsResponse",
-                "description": "Response containing metric time series data."
-            },
-            "QueryCondition": {
-                "type": "object",
-                "properties": {
-                    "key": {
-                        "type": "string",
-                        "description": "The attribute key to filter on"
-                    },
-                    "op": {
-                        "$ref": "#/components/schemas/QueryConditionOp",
-                        "description": "The comparison operator to apply"
-                    },
-                    "value": {
-                        "oneOf": [
-                            {
-                                "type": "null"
-                            },
-                            {
-                                "type": "boolean"
-                            },
-                            {
-                                "type": "number"
-                            },
-                            {
-                                "type": "string"
-                            },
-                            {
-                                "type": "array"
-                            },
-                            {
-                                "type": "object"
-                            }
-                        ],
-                        "description": "The value to compare against"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "key",
-                    "op",
-                    "value"
-                ],
-                "title": "QueryCondition",
-                "description": "A condition for filtering query results."
-            },
-            "QueryConditionOp": {
-                "type": "string",
-                "enum": [
-                    "eq",
-                    "ne",
-                    "gt",
-                    "lt"
-                ],
-                "title": "QueryConditionOp",
-                "description": "Comparison operators for query conditions."
-            },
-            "QuerySpansRequest": {
-                "type": "object",
-                "properties": {
-                    "attribute_filters": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/QueryCondition"
-                        },
-                        "description": "The attribute filters to apply to the spans."
-                    },
-                    "attributes_to_return": {
-                        "type": "array",
-                        "items": {
-                            "type": "string"
-                        },
-                        "description": "The attributes to return in the spans."
-                    },
-                    "max_depth": {
-                        "type": "integer",
-                        "description": "The maximum depth of the tree."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "attribute_filters",
-                    "attributes_to_return"
-                ],
-                "title": "QuerySpansRequest"
-            },
-            "Span": {
-                "type": "object",
-                "properties": {
-                    "span_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the span"
-                    },
-                    "trace_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the trace this span belongs to"
-                    },
-                    "parent_span_id": {
-                        "type": "string",
-                        "description": "(Optional) Unique identifier for the parent span, if this is a child span"
-                    },
-                    "name": {
-                        "type": "string",
-                        "description": "Human-readable name describing the operation this span represents"
-                    },
-                    "start_time": {
-                        "type": "string",
-                        "format": "date-time",
-                        "description": "Timestamp when the operation began"
-                    },
-                    "end_time": {
-                        "type": "string",
-                        "format": "date-time",
-                        "description": "(Optional) Timestamp when the operation finished, if completed"
-                    },
-                    "attributes": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "(Optional) Key-value pairs containing additional metadata about the span"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "span_id",
-                    "trace_id",
-                    "name",
-                    "start_time"
-                ],
-                "title": "Span",
-                "description": "A span representing a single operation within a trace."
-            },
-            "QuerySpansResponse": {
-                "type": "object",
-                "properties": {
-                    "data": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/Span"
-                        },
-                        "description": "List of spans matching the query criteria"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "data"
-                ],
-                "title": "QuerySpansResponse",
-                "description": "Response containing a list of spans."
-            },
-            "SaveSpansToDatasetRequest": {
-                "type": "object",
-                "properties": {
-                    "attribute_filters": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/QueryCondition"
-                        },
-                        "description": "The attribute filters to apply to the spans."
-                    },
-                    "attributes_to_save": {
-                        "type": "array",
-                        "items": {
-                            "type": "string"
-                        },
-                        "description": "The attributes to save to the dataset."
-                    },
-                    "dataset_id": {
-                        "type": "string",
-                        "description": "The ID of the dataset to save the spans to."
-                    },
-                    "max_depth": {
-                        "type": "integer",
-                        "description": "The maximum depth of the tree."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "attribute_filters",
-                    "attributes_to_save",
-                    "dataset_id"
-                ],
-                "title": "SaveSpansToDatasetRequest"
-            },
-            "GetSpanTreeRequest": {
-                "type": "object",
-                "properties": {
-                    "attributes_to_return": {
-                        "type": "array",
-                        "items": {
-                            "type": "string"
-                        },
-                        "description": "The attributes to return in the tree."
-                    },
-                    "max_depth": {
-                        "type": "integer",
-                        "description": "The maximum depth of the tree."
-                    }
-                },
-                "additionalProperties": false,
-                "title": "GetSpanTreeRequest"
-            },
-            "SpanStatus": {
-                "type": "string",
-                "enum": [
-                    "ok",
-                    "error"
-                ],
-                "title": "SpanStatus",
-                "description": "The status of a span indicating whether it completed successfully or with an error."
-            },
-            "SpanWithStatus": {
-                "type": "object",
-                "properties": {
-                    "span_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the span"
-                    },
-                    "trace_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the trace this span belongs to"
-                    },
-                    "parent_span_id": {
-                        "type": "string",
-                        "description": "(Optional) Unique identifier for the parent span, if this is a child span"
-                    },
-                    "name": {
-                        "type": "string",
-                        "description": "Human-readable name describing the operation this span represents"
-                    },
-                    "start_time": {
-                        "type": "string",
-                        "format": "date-time",
-                        "description": "Timestamp when the operation began"
-                    },
-                    "end_time": {
-                        "type": "string",
-                        "format": "date-time",
-                        "description": "(Optional) Timestamp when the operation finished, if completed"
-                    },
-                    "attributes": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "(Optional) Key-value pairs containing additional metadata about the span"
-                    },
-                    "status": {
-                        "$ref": "#/components/schemas/SpanStatus",
-                        "description": "(Optional) The current status of the span"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "span_id",
-                    "trace_id",
-                    "name",
-                    "start_time"
-                ],
-                "title": "SpanWithStatus",
-                "description": "A span that includes status information."
-            },
-            "QuerySpanTreeResponse": {
-                "type": "object",
-                "properties": {
-                    "data": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "$ref": "#/components/schemas/SpanWithStatus"
-                        },
-                        "description": "Dictionary mapping span IDs to spans with status information"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "data"
-                ],
-                "title": "QuerySpanTreeResponse",
-                "description": "Response containing a tree structure of spans."
-            },
-            "QueryTracesRequest": {
-                "type": "object",
-                "properties": {
-                    "attribute_filters": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/QueryCondition"
-                        },
-                        "description": "The attribute filters to apply to the traces."
-                    },
-                    "limit": {
-                        "type": "integer",
-                        "description": "The limit of traces to return."
-                    },
-                    "offset": {
-                        "type": "integer",
-                        "description": "The offset of the traces to return."
-                    },
-                    "order_by": {
-                        "type": "array",
-                        "items": {
-                            "type": "string"
-                        },
-                        "description": "The order by of the traces to return."
-                    }
-                },
-                "additionalProperties": false,
-                "title": "QueryTracesRequest"
-            },
-            "Trace": {
-                "type": "object",
-                "properties": {
-                    "trace_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the trace"
-                    },
-                    "root_span_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the root span that started this trace"
-                    },
-                    "start_time": {
-                        "type": "string",
-                        "format": "date-time",
-                        "description": "Timestamp when the trace began"
-                    },
-                    "end_time": {
-                        "type": "string",
-                        "format": "date-time",
-                        "description": "(Optional) Timestamp when the trace finished, if completed"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "trace_id",
-                    "root_span_id",
-                    "start_time"
-                ],
-                "title": "Trace",
-                "description": "A trace representing the complete execution path of a request across multiple operations."
-            },
-            "QueryTracesResponse": {
-                "type": "object",
-                "properties": {
-                    "data": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/Trace"
-                        },
-                        "description": "List of traces matching the query criteria"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "data"
-                ],
-                "title": "QueryTracesResponse",
-                "description": "Response containing a list of traces."
             }
         },
         "responses": {
@@ -13387,10 +12495,6 @@
             "description": "OpenAI-compatible Moderations API.",
             "x-displayName": "Safety"
         },
-        {
-            "name": "Telemetry",
-            "description": ""
-        },
         {
             "name": "VectorIO",
             "description": ""
@@ -13410,7 +12514,6 @@
                 "Models",
                 "PostTraining (Coming Soon)",
                 "Safety",
-                "Telemetry",
                 "VectorIO"
             ]
         }
diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml
index 1a215b877..349f54224 100644
--- a/docs/static/deprecated-llama-stack-spec.yaml
+++ b/docs/static/deprecated-llama-stack-spec.yaml
@@ -2593,238 +2593,6 @@ paths:
               $ref: '#/components/schemas/SupervisedFineTuneRequest'
         required: true
       deprecated: true
-  /v1/telemetry/metrics/{metric_name}:
-    post:
-      responses:
-        '200':
-          description: A QueryMetricsResponse.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/QueryMetricsResponse'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Telemetry
-      summary: Query metrics.
-      description: Query metrics.
-      parameters:
-        - name: metric_name
-          in: path
-          description: The name of the metric to query.
-          required: true
-          schema:
-            type: string
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/QueryMetricsRequest'
-        required: true
-      deprecated: true
-  /v1/telemetry/spans:
-    post:
-      responses:
-        '200':
-          description: A QuerySpansResponse.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/QuerySpansResponse'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Telemetry
-      summary: Query spans.
-      description: Query spans.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/QuerySpansRequest'
-        required: true
-      deprecated: true
-  /v1/telemetry/spans/export:
-    post:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Telemetry
-      summary: Save spans to a dataset.
-      description: Save spans to a dataset.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/SaveSpansToDatasetRequest'
-        required: true
-      deprecated: true
-  /v1/telemetry/spans/{span_id}/tree:
-    post:
-      responses:
-        '200':
-          description: A QuerySpanTreeResponse.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/QuerySpanTreeResponse'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Telemetry
-      summary: Get a span tree by its ID.
-      description: Get a span tree by its ID.
-      parameters:
-        - name: span_id
-          in: path
-          description: The ID of the span to get the tree from.
-          required: true
-          schema:
-            type: string
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/GetSpanTreeRequest'
-        required: true
-      deprecated: true
-  /v1/telemetry/traces:
-    post:
-      responses:
-        '200':
-          description: A QueryTracesResponse.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/QueryTracesResponse'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Telemetry
-      summary: Query traces.
-      description: Query traces.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/QueryTracesRequest'
-        required: true
-      deprecated: true
-  /v1/telemetry/traces/{trace_id}:
-    get:
-      responses:
-        '200':
-          description: A Trace.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/Trace'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Telemetry
-      summary: Get a trace by its ID.
-      description: Get a trace by its ID.
-      parameters:
-        - name: trace_id
-          in: path
-          description: The ID of the trace to get.
-          required: true
-          schema:
-            type: string
-      deprecated: true
-  /v1/telemetry/traces/{trace_id}/spans/{span_id}:
-    get:
-      responses:
-        '200':
-          description: A Span.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/Span'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Telemetry
-      summary: Get a span by its ID.
-      description: Get a span by its ID.
-      parameters:
-        - name: trace_id
-          in: path
-          description: >-
-            The ID of the trace to get the span from.
-          required: true
-          schema:
-            type: string
-        - name: span_id
-          in: path
-          description: The ID of the span to get.
-          required: true
-          schema:
-            type: string
-      deprecated: true
 jsonSchemaDialect: >-
   https://json-schema.org/draft/2020-12/schema
 components:
@@ -9510,434 +9278,6 @@ components:
         - hyperparam_search_config
         - logger_config
       title: SupervisedFineTuneRequest
-    QueryMetricsRequest:
-      type: object
-      properties:
-        start_time:
-          type: integer
-          description: The start time of the metric to query.
-        end_time:
-          type: integer
-          description: The end time of the metric to query.
-        granularity:
-          type: string
-          description: The granularity of the metric to query.
-        query_type:
-          type: string
-          enum:
-            - range
-            - instant
-          description: The type of query to perform.
-        label_matchers:
-          type: array
-          items:
-            type: object
-            properties:
-              name:
-                type: string
-                description: The name of the label to match
-              value:
-                type: string
-                description: The value to match against
-              operator:
-                type: string
-                enum:
-                  - '='
-                  - '!='
-                  - =~
-                  - '!~'
-                description: >-
-                  The comparison operator to use for matching
-                default: '='
-            additionalProperties: false
-            required:
-              - name
-              - value
-              - operator
-            title: MetricLabelMatcher
-            description: >-
-              A matcher for filtering metrics by label values.
-          description: >-
-            The label matchers to apply to the metric.
-      additionalProperties: false
-      required:
-        - start_time
-        - query_type
-      title: QueryMetricsRequest
-    MetricDataPoint:
-      type: object
-      properties:
-        timestamp:
-          type: integer
-          description: >-
-            Unix timestamp when the metric value was recorded
-        value:
-          type: number
-          description: >-
-            The numeric value of the metric at this timestamp
-        unit:
-          type: string
-      additionalProperties: false
-      required:
-        - timestamp
-        - value
-        - unit
-      title: MetricDataPoint
-      description: >-
-        A single data point in a metric time series.
-    MetricLabel:
-      type: object
-      properties:
-        name:
-          type: string
-          description: The name of the label
-        value:
-          type: string
-          description: The value of the label
-      additionalProperties: false
-      required:
-        - name
-        - value
-      title: MetricLabel
-      description: A label associated with a metric.
-    MetricSeries:
-      type: object
-      properties:
-        metric:
-          type: string
-          description: The name of the metric
-        labels:
-          type: array
-          items:
-            $ref: '#/components/schemas/MetricLabel'
-          description: >-
-            List of labels associated with this metric series
-        values:
-          type: array
-          items:
-            $ref: '#/components/schemas/MetricDataPoint'
-          description: >-
-            List of data points in chronological order
-      additionalProperties: false
-      required:
-        - metric
-        - labels
-        - values
-      title: MetricSeries
-      description: A time series of metric data points.
-    QueryMetricsResponse:
-      type: object
-      properties:
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/MetricSeries'
-          description: >-
-            List of metric series matching the query criteria
-      additionalProperties: false
-      required:
-        - data
-      title: QueryMetricsResponse
-      description: >-
-        Response containing metric time series data.
-    QueryCondition:
-      type: object
-      properties:
-        key:
-          type: string
-          description: The attribute key to filter on
-        op:
-          $ref: '#/components/schemas/QueryConditionOp'
-          description: The comparison operator to apply
-        value:
-          oneOf:
-            - type: 'null'
-            - type: boolean
-            - type: number
-            - type: string
-            - type: array
-            - type: object
-          description: The value to compare against
-      additionalProperties: false
-      required:
-        - key
-        - op
-        - value
-      title: QueryCondition
-      description: A condition for filtering query results.
-    QueryConditionOp:
-      type: string
-      enum:
-        - eq
-        - ne
-        - gt
-        - lt
-      title: QueryConditionOp
-      description: >-
-        Comparison operators for query conditions.
-    QuerySpansRequest:
-      type: object
-      properties:
-        attribute_filters:
-          type: array
-          items:
-            $ref: '#/components/schemas/QueryCondition'
-          description: >-
-            The attribute filters to apply to the spans.
-        attributes_to_return:
-          type: array
-          items:
-            type: string
-          description: The attributes to return in the spans.
-        max_depth:
-          type: integer
-          description: The maximum depth of the tree.
-      additionalProperties: false
-      required:
-        - attribute_filters
-        - attributes_to_return
-      title: QuerySpansRequest
-    Span:
-      type: object
-      properties:
-        span_id:
-          type: string
-          description: Unique identifier for the span
-        trace_id:
-          type: string
-          description: >-
-            Unique identifier for the trace this span belongs to
-        parent_span_id:
-          type: string
-          description: >-
-            (Optional) Unique identifier for the parent span, if this is a child span
-        name:
-          type: string
-          description: >-
-            Human-readable name describing the operation this span represents
-        start_time:
-          type: string
-          format: date-time
-          description: Timestamp when the operation began
-        end_time:
-          type: string
-          format: date-time
-          description: >-
-            (Optional) Timestamp when the operation finished, if completed
-        attributes:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) Key-value pairs containing additional metadata about the span
-      additionalProperties: false
-      required:
-        - span_id
-        - trace_id
-        - name
-        - start_time
-      title: Span
-      description: >-
-        A span representing a single operation within a trace.
-    QuerySpansResponse:
-      type: object
-      properties:
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/Span'
-          description: >-
-            List of spans matching the query criteria
-      additionalProperties: false
-      required:
-        - data
-      title: QuerySpansResponse
-      description: Response containing a list of spans.
-    SaveSpansToDatasetRequest:
-      type: object
-      properties:
-        attribute_filters:
-          type: array
-          items:
-            $ref: '#/components/schemas/QueryCondition'
-          description: >-
-            The attribute filters to apply to the spans.
-        attributes_to_save:
-          type: array
-          items:
-            type: string
-          description: The attributes to save to the dataset.
-        dataset_id:
-          type: string
-          description: >-
-            The ID of the dataset to save the spans to.
-        max_depth:
-          type: integer
-          description: The maximum depth of the tree.
-      additionalProperties: false
-      required:
-        - attribute_filters
-        - attributes_to_save
-        - dataset_id
-      title: SaveSpansToDatasetRequest
-    GetSpanTreeRequest:
-      type: object
-      properties:
-        attributes_to_return:
-          type: array
-          items:
-            type: string
-          description: The attributes to return in the tree.
-        max_depth:
-          type: integer
-          description: The maximum depth of the tree.
-      additionalProperties: false
-      title: GetSpanTreeRequest
-    SpanStatus:
-      type: string
-      enum:
-        - ok
-        - error
-      title: SpanStatus
-      description: >-
-        The status of a span indicating whether it completed successfully or with
-        an error.
-    SpanWithStatus:
-      type: object
-      properties:
-        span_id:
-          type: string
-          description: Unique identifier for the span
-        trace_id:
-          type: string
-          description: >-
-            Unique identifier for the trace this span belongs to
-        parent_span_id:
-          type: string
-          description: >-
-            (Optional) Unique identifier for the parent span, if this is a child span
-        name:
-          type: string
-          description: >-
-            Human-readable name describing the operation this span represents
-        start_time:
-          type: string
-          format: date-time
-          description: Timestamp when the operation began
-        end_time:
-          type: string
-          format: date-time
-          description: >-
-            (Optional) Timestamp when the operation finished, if completed
-        attributes:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) Key-value pairs containing additional metadata about the span
-        status:
-          $ref: '#/components/schemas/SpanStatus'
-          description: >-
-            (Optional) The current status of the span
-      additionalProperties: false
-      required:
-        - span_id
-        - trace_id
-        - name
-        - start_time
-      title: SpanWithStatus
-      description: A span that includes status information.
-    QuerySpanTreeResponse:
-      type: object
-      properties:
-        data:
-          type: object
-          additionalProperties:
-            $ref: '#/components/schemas/SpanWithStatus'
-          description: >-
-            Dictionary mapping span IDs to spans with status information
-      additionalProperties: false
-      required:
-        - data
-      title: QuerySpanTreeResponse
-      description: >-
-        Response containing a tree structure of spans.
-    QueryTracesRequest:
-      type: object
-      properties:
-        attribute_filters:
-          type: array
-          items:
-            $ref: '#/components/schemas/QueryCondition'
-          description: >-
-            The attribute filters to apply to the traces.
-        limit:
-          type: integer
-          description: The limit of traces to return.
-        offset:
-          type: integer
-          description: The offset of the traces to return.
-        order_by:
-          type: array
-          items:
-            type: string
-          description: The order by of the traces to return.
-      additionalProperties: false
-      title: QueryTracesRequest
-    Trace:
-      type: object
-      properties:
-        trace_id:
-          type: string
-          description: Unique identifier for the trace
-        root_span_id:
-          type: string
-          description: >-
-            Unique identifier for the root span that started this trace
-        start_time:
-          type: string
-          format: date-time
-          description: Timestamp when the trace began
-        end_time:
-          type: string
-          format: date-time
-          description: >-
-            (Optional) Timestamp when the trace finished, if completed
-      additionalProperties: false
-      required:
-        - trace_id
-        - root_span_id
-        - start_time
-      title: Trace
-      description: >-
-        A trace representing the complete execution path of a request across multiple
-        operations.
-    QueryTracesResponse:
-      type: object
-      properties:
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/Trace'
-          description: >-
-            List of traces matching the query criteria
-      additionalProperties: false
-      required:
-        - data
-      title: QueryTracesResponse
-      description: Response containing a list of traces.
   responses:
     BadRequest400:
       description: The request was invalid or malformed
@@ -10043,8 +9383,6 @@ tags:
   - name: Safety
     description: OpenAI-compatible Moderations API.
     x-displayName: Safety
-  - name: Telemetry
-    description: ''
   - name: VectorIO
     description: ''
 x-tagGroups:
@@ -10060,5 +9398,4 @@ x-tagGroups:
       - Models
       - PostTraining (Coming Soon)
       - Safety
-      - Telemetry
       - VectorIO
diff --git a/docs/static/experimental-llama-stack-spec.html b/docs/static/experimental-llama-stack-spec.html
index a84226c05..e3edf2ffc 100644
--- a/docs/static/experimental-llama-stack-spec.html
+++ b/docs/static/experimental-llama-stack-spec.html
@@ -1711,343 +1711,6 @@
                 },
                 "deprecated": false
             }
-        },
-        "/v1alpha/telemetry/metrics/{metric_name}": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "A QueryMetricsResponse.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/QueryMetricsResponse"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Telemetry"
-                ],
-                "summary": "Query metrics.",
-                "description": "Query metrics.",
-                "parameters": [
-                    {
-                        "name": "metric_name",
-                        "in": "path",
-                        "description": "The name of the metric to query.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/QueryMetricsRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                },
-                "deprecated": false
-            }
-        },
-        "/v1alpha/telemetry/spans": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "A QuerySpansResponse.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/QuerySpansResponse"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Telemetry"
-                ],
-                "summary": "Query spans.",
-                "description": "Query spans.",
-                "parameters": [],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/QuerySpansRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                },
-                "deprecated": false
-            }
-        },
-        "/v1alpha/telemetry/spans/export": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "OK"
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Telemetry"
-                ],
-                "summary": "Save spans to a dataset.",
-                "description": "Save spans to a dataset.",
-                "parameters": [],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/SaveSpansToDatasetRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                },
-                "deprecated": false
-            }
-        },
-        "/v1alpha/telemetry/spans/{span_id}/tree": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "A QuerySpanTreeResponse.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/QuerySpanTreeResponse"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Telemetry"
-                ],
-                "summary": "Get a span tree by its ID.",
-                "description": "Get a span tree by its ID.",
-                "parameters": [
-                    {
-                        "name": "span_id",
-                        "in": "path",
-                        "description": "The ID of the span to get the tree from.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/GetSpanTreeRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                },
-                "deprecated": false
-            }
-        },
-        "/v1alpha/telemetry/traces": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "A QueryTracesResponse.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/QueryTracesResponse"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Telemetry"
-                ],
-                "summary": "Query traces.",
-                "description": "Query traces.",
-                "parameters": [],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/QueryTracesRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                },
-                "deprecated": false
-            }
-        },
-        "/v1alpha/telemetry/traces/{trace_id}": {
-            "get": {
-                "responses": {
-                    "200": {
-                        "description": "A Trace.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/Trace"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Telemetry"
-                ],
-                "summary": "Get a trace by its ID.",
-                "description": "Get a trace by its ID.",
-                "parameters": [
-                    {
-                        "name": "trace_id",
-                        "in": "path",
-                        "description": "The ID of the trace to get.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ],
-                "deprecated": false
-            }
-        },
-        "/v1alpha/telemetry/traces/{trace_id}/spans/{span_id}": {
-            "get": {
-                "responses": {
-                    "200": {
-                        "description": "A Span.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/Span"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Telemetry"
-                ],
-                "summary": "Get a span by its ID.",
-                "description": "Get a span by its ID.",
-                "parameters": [
-                    {
-                        "name": "trace_id",
-                        "in": "path",
-                        "description": "The ID of the trace to get the span from.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    },
-                    {
-                        "name": "span_id",
-                        "in": "path",
-                        "description": "The ID of the span to get.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ],
-                "deprecated": false
-            }
         }
     },
     "jsonSchemaDialect": "https://json-schema.org/draft/2020-12/schema",
@@ -5765,561 +5428,6 @@
                     "logger_config"
                 ],
                 "title": "SupervisedFineTuneRequest"
-            },
-            "QueryMetricsRequest": {
-                "type": "object",
-                "properties": {
-                    "start_time": {
-                        "type": "integer",
-                        "description": "The start time of the metric to query."
-                    },
-                    "end_time": {
-                        "type": "integer",
-                        "description": "The end time of the metric to query."
-                    },
-                    "granularity": {
-                        "type": "string",
-                        "description": "The granularity of the metric to query."
-                    },
-                    "query_type": {
-                        "type": "string",
-                        "enum": [
-                            "range",
-                            "instant"
-                        ],
-                        "description": "The type of query to perform."
-                    },
-                    "label_matchers": {
-                        "type": "array",
-                        "items": {
-                            "type": "object",
-                            "properties": {
-                                "name": {
-                                    "type": "string",
-                                    "description": "The name of the label to match"
-                                },
-                                "value": {
-                                    "type": "string",
-                                    "description": "The value to match against"
-                                },
-                                "operator": {
-                                    "type": "string",
-                                    "enum": [
-                                        "=",
-                                        "!=",
-                                        "=~",
-                                        "!~"
-                                    ],
-                                    "description": "The comparison operator to use for matching",
-                                    "default": "="
-                                }
-                            },
-                            "additionalProperties": false,
-                            "required": [
-                                "name",
-                                "value",
-                                "operator"
-                            ],
-                            "title": "MetricLabelMatcher",
-                            "description": "A matcher for filtering metrics by label values."
-                        },
-                        "description": "The label matchers to apply to the metric."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "start_time",
-                    "query_type"
-                ],
-                "title": "QueryMetricsRequest"
-            },
-            "MetricDataPoint": {
-                "type": "object",
-                "properties": {
-                    "timestamp": {
-                        "type": "integer",
-                        "description": "Unix timestamp when the metric value was recorded"
-                    },
-                    "value": {
-                        "type": "number",
-                        "description": "The numeric value of the metric at this timestamp"
-                    },
-                    "unit": {
-                        "type": "string"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "timestamp",
-                    "value",
-                    "unit"
-                ],
-                "title": "MetricDataPoint",
-                "description": "A single data point in a metric time series."
-            },
-            "MetricLabel": {
-                "type": "object",
-                "properties": {
-                    "name": {
-                        "type": "string",
-                        "description": "The name of the label"
-                    },
-                    "value": {
-                        "type": "string",
-                        "description": "The value of the label"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "name",
-                    "value"
-                ],
-                "title": "MetricLabel",
-                "description": "A label associated with a metric."
-            },
-            "MetricSeries": {
-                "type": "object",
-                "properties": {
-                    "metric": {
-                        "type": "string",
-                        "description": "The name of the metric"
-                    },
-                    "labels": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/MetricLabel"
-                        },
-                        "description": "List of labels associated with this metric series"
-                    },
-                    "values": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/MetricDataPoint"
-                        },
-                        "description": "List of data points in chronological order"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "metric",
-                    "labels",
-                    "values"
-                ],
-                "title": "MetricSeries",
-                "description": "A time series of metric data points."
-            },
-            "QueryMetricsResponse": {
-                "type": "object",
-                "properties": {
-                    "data": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/MetricSeries"
-                        },
-                        "description": "List of metric series matching the query criteria"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "data"
-                ],
-                "title": "QueryMetricsResponse",
-                "description": "Response containing metric time series data."
-            },
-            "QueryCondition": {
-                "type": "object",
-                "properties": {
-                    "key": {
-                        "type": "string",
-                        "description": "The attribute key to filter on"
-                    },
-                    "op": {
-                        "$ref": "#/components/schemas/QueryConditionOp",
-                        "description": "The comparison operator to apply"
-                    },
-                    "value": {
-                        "oneOf": [
-                            {
-                                "type": "null"
-                            },
-                            {
-                                "type": "boolean"
-                            },
-                            {
-                                "type": "number"
-                            },
-                            {
-                                "type": "string"
-                            },
-                            {
-                                "type": "array"
-                            },
-                            {
-                                "type": "object"
-                            }
-                        ],
-                        "description": "The value to compare against"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "key",
-                    "op",
-                    "value"
-                ],
-                "title": "QueryCondition",
-                "description": "A condition for filtering query results."
-            },
-            "QueryConditionOp": {
-                "type": "string",
-                "enum": [
-                    "eq",
-                    "ne",
-                    "gt",
-                    "lt"
-                ],
-                "title": "QueryConditionOp",
-                "description": "Comparison operators for query conditions."
-            },
-            "QuerySpansRequest": {
-                "type": "object",
-                "properties": {
-                    "attribute_filters": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/QueryCondition"
-                        },
-                        "description": "The attribute filters to apply to the spans."
-                    },
-                    "attributes_to_return": {
-                        "type": "array",
-                        "items": {
-                            "type": "string"
-                        },
-                        "description": "The attributes to return in the spans."
-                    },
-                    "max_depth": {
-                        "type": "integer",
-                        "description": "The maximum depth of the tree."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "attribute_filters",
-                    "attributes_to_return"
-                ],
-                "title": "QuerySpansRequest"
-            },
-            "Span": {
-                "type": "object",
-                "properties": {
-                    "span_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the span"
-                    },
-                    "trace_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the trace this span belongs to"
-                    },
-                    "parent_span_id": {
-                        "type": "string",
-                        "description": "(Optional) Unique identifier for the parent span, if this is a child span"
-                    },
-                    "name": {
-                        "type": "string",
-                        "description": "Human-readable name describing the operation this span represents"
-                    },
-                    "start_time": {
-                        "type": "string",
-                        "format": "date-time",
-                        "description": "Timestamp when the operation began"
-                    },
-                    "end_time": {
-                        "type": "string",
-                        "format": "date-time",
-                        "description": "(Optional) Timestamp when the operation finished, if completed"
-                    },
-                    "attributes": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "(Optional) Key-value pairs containing additional metadata about the span"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "span_id",
-                    "trace_id",
-                    "name",
-                    "start_time"
-                ],
-                "title": "Span",
-                "description": "A span representing a single operation within a trace."
-            },
-            "QuerySpansResponse": {
-                "type": "object",
-                "properties": {
-                    "data": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/Span"
-                        },
-                        "description": "List of spans matching the query criteria"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "data"
-                ],
-                "title": "QuerySpansResponse",
-                "description": "Response containing a list of spans."
-            },
-            "SaveSpansToDatasetRequest": {
-                "type": "object",
-                "properties": {
-                    "attribute_filters": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/QueryCondition"
-                        },
-                        "description": "The attribute filters to apply to the spans."
-                    },
-                    "attributes_to_save": {
-                        "type": "array",
-                        "items": {
-                            "type": "string"
-                        },
-                        "description": "The attributes to save to the dataset."
-                    },
-                    "dataset_id": {
-                        "type": "string",
-                        "description": "The ID of the dataset to save the spans to."
-                    },
-                    "max_depth": {
-                        "type": "integer",
-                        "description": "The maximum depth of the tree."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "attribute_filters",
-                    "attributes_to_save",
-                    "dataset_id"
-                ],
-                "title": "SaveSpansToDatasetRequest"
-            },
-            "GetSpanTreeRequest": {
-                "type": "object",
-                "properties": {
-                    "attributes_to_return": {
-                        "type": "array",
-                        "items": {
-                            "type": "string"
-                        },
-                        "description": "The attributes to return in the tree."
-                    },
-                    "max_depth": {
-                        "type": "integer",
-                        "description": "The maximum depth of the tree."
-                    }
-                },
-                "additionalProperties": false,
-                "title": "GetSpanTreeRequest"
-            },
-            "SpanStatus": {
-                "type": "string",
-                "enum": [
-                    "ok",
-                    "error"
-                ],
-                "title": "SpanStatus",
-                "description": "The status of a span indicating whether it completed successfully or with an error."
-            },
-            "SpanWithStatus": {
-                "type": "object",
-                "properties": {
-                    "span_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the span"
-                    },
-                    "trace_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the trace this span belongs to"
-                    },
-                    "parent_span_id": {
-                        "type": "string",
-                        "description": "(Optional) Unique identifier for the parent span, if this is a child span"
-                    },
-                    "name": {
-                        "type": "string",
-                        "description": "Human-readable name describing the operation this span represents"
-                    },
-                    "start_time": {
-                        "type": "string",
-                        "format": "date-time",
-                        "description": "Timestamp when the operation began"
-                    },
-                    "end_time": {
-                        "type": "string",
-                        "format": "date-time",
-                        "description": "(Optional) Timestamp when the operation finished, if completed"
-                    },
-                    "attributes": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "(Optional) Key-value pairs containing additional metadata about the span"
-                    },
-                    "status": {
-                        "$ref": "#/components/schemas/SpanStatus",
-                        "description": "(Optional) The current status of the span"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "span_id",
-                    "trace_id",
-                    "name",
-                    "start_time"
-                ],
-                "title": "SpanWithStatus",
-                "description": "A span that includes status information."
-            },
-            "QuerySpanTreeResponse": {
-                "type": "object",
-                "properties": {
-                    "data": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "$ref": "#/components/schemas/SpanWithStatus"
-                        },
-                        "description": "Dictionary mapping span IDs to spans with status information"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "data"
-                ],
-                "title": "QuerySpanTreeResponse",
-                "description": "Response containing a tree structure of spans."
-            },
-            "QueryTracesRequest": {
-                "type": "object",
-                "properties": {
-                    "attribute_filters": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/QueryCondition"
-                        },
-                        "description": "The attribute filters to apply to the traces."
-                    },
-                    "limit": {
-                        "type": "integer",
-                        "description": "The limit of traces to return."
-                    },
-                    "offset": {
-                        "type": "integer",
-                        "description": "The offset of the traces to return."
-                    },
-                    "order_by": {
-                        "type": "array",
-                        "items": {
-                            "type": "string"
-                        },
-                        "description": "The order by of the traces to return."
-                    }
-                },
-                "additionalProperties": false,
-                "title": "QueryTracesRequest"
-            },
-            "Trace": {
-                "type": "object",
-                "properties": {
-                    "trace_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the trace"
-                    },
-                    "root_span_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the root span that started this trace"
-                    },
-                    "start_time": {
-                        "type": "string",
-                        "format": "date-time",
-                        "description": "Timestamp when the trace began"
-                    },
-                    "end_time": {
-                        "type": "string",
-                        "format": "date-time",
-                        "description": "(Optional) Timestamp when the trace finished, if completed"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "trace_id",
-                    "root_span_id",
-                    "start_time"
-                ],
-                "title": "Trace",
-                "description": "A trace representing the complete execution path of a request across multiple operations."
-            },
-            "QueryTracesResponse": {
-                "type": "object",
-                "properties": {
-                    "data": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/Trace"
-                        },
-                        "description": "List of traces matching the query criteria"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "data"
-                ],
-                "title": "QueryTracesResponse",
-                "description": "Response containing a list of traces."
             }
         },
         "responses": {
@@ -6416,10 +5524,6 @@
         {
             "name": "PostTraining (Coming Soon)",
             "description": ""
-        },
-        {
-            "name": "Telemetry",
-            "description": ""
         }
     ],
     "x-tagGroups": [
@@ -6431,8 +5535,7 @@
                 "DatasetIO",
                 "Datasets",
                 "Eval",
-                "PostTraining (Coming Soon)",
-                "Telemetry"
+                "PostTraining (Coming Soon)"
             ]
         }
     ]
diff --git a/docs/static/experimental-llama-stack-spec.yaml b/docs/static/experimental-llama-stack-spec.yaml
index a08c0cc87..7ee5a6cdf 100644
--- a/docs/static/experimental-llama-stack-spec.yaml
+++ b/docs/static/experimental-llama-stack-spec.yaml
@@ -1224,238 +1224,6 @@ paths:
               $ref: '#/components/schemas/SupervisedFineTuneRequest'
         required: true
       deprecated: false
-  /v1alpha/telemetry/metrics/{metric_name}:
-    post:
-      responses:
-        '200':
-          description: A QueryMetricsResponse.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/QueryMetricsResponse'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Telemetry
-      summary: Query metrics.
-      description: Query metrics.
-      parameters:
-        - name: metric_name
-          in: path
-          description: The name of the metric to query.
-          required: true
-          schema:
-            type: string
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/QueryMetricsRequest'
-        required: true
-      deprecated: false
-  /v1alpha/telemetry/spans:
-    post:
-      responses:
-        '200':
-          description: A QuerySpansResponse.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/QuerySpansResponse'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Telemetry
-      summary: Query spans.
-      description: Query spans.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/QuerySpansRequest'
-        required: true
-      deprecated: false
-  /v1alpha/telemetry/spans/export:
-    post:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Telemetry
-      summary: Save spans to a dataset.
-      description: Save spans to a dataset.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/SaveSpansToDatasetRequest'
-        required: true
-      deprecated: false
-  /v1alpha/telemetry/spans/{span_id}/tree:
-    post:
-      responses:
-        '200':
-          description: A QuerySpanTreeResponse.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/QuerySpanTreeResponse'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Telemetry
-      summary: Get a span tree by its ID.
-      description: Get a span tree by its ID.
-      parameters:
-        - name: span_id
-          in: path
-          description: The ID of the span to get the tree from.
-          required: true
-          schema:
-            type: string
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/GetSpanTreeRequest'
-        required: true
-      deprecated: false
-  /v1alpha/telemetry/traces:
-    post:
-      responses:
-        '200':
-          description: A QueryTracesResponse.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/QueryTracesResponse'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Telemetry
-      summary: Query traces.
-      description: Query traces.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/QueryTracesRequest'
-        required: true
-      deprecated: false
-  /v1alpha/telemetry/traces/{trace_id}:
-    get:
-      responses:
-        '200':
-          description: A Trace.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/Trace'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Telemetry
-      summary: Get a trace by its ID.
-      description: Get a trace by its ID.
-      parameters:
-        - name: trace_id
-          in: path
-          description: The ID of the trace to get.
-          required: true
-          schema:
-            type: string
-      deprecated: false
-  /v1alpha/telemetry/traces/{trace_id}/spans/{span_id}:
-    get:
-      responses:
-        '200':
-          description: A Span.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/Span'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Telemetry
-      summary: Get a span by its ID.
-      description: Get a span by its ID.
-      parameters:
-        - name: trace_id
-          in: path
-          description: >-
-            The ID of the trace to get the span from.
-          required: true
-          schema:
-            type: string
-        - name: span_id
-          in: path
-          description: The ID of the span to get.
-          required: true
-          schema:
-            type: string
-      deprecated: false
 jsonSchemaDialect: >-
   https://json-schema.org/draft/2020-12/schema
 components:
@@ -4249,434 +4017,6 @@ components:
         - hyperparam_search_config
         - logger_config
       title: SupervisedFineTuneRequest
-    QueryMetricsRequest:
-      type: object
-      properties:
-        start_time:
-          type: integer
-          description: The start time of the metric to query.
-        end_time:
-          type: integer
-          description: The end time of the metric to query.
-        granularity:
-          type: string
-          description: The granularity of the metric to query.
-        query_type:
-          type: string
-          enum:
-            - range
-            - instant
-          description: The type of query to perform.
-        label_matchers:
-          type: array
-          items:
-            type: object
-            properties:
-              name:
-                type: string
-                description: The name of the label to match
-              value:
-                type: string
-                description: The value to match against
-              operator:
-                type: string
-                enum:
-                  - '='
-                  - '!='
-                  - =~
-                  - '!~'
-                description: >-
-                  The comparison operator to use for matching
-                default: '='
-            additionalProperties: false
-            required:
-              - name
-              - value
-              - operator
-            title: MetricLabelMatcher
-            description: >-
-              A matcher for filtering metrics by label values.
-          description: >-
-            The label matchers to apply to the metric.
-      additionalProperties: false
-      required:
-        - start_time
-        - query_type
-      title: QueryMetricsRequest
-    MetricDataPoint:
-      type: object
-      properties:
-        timestamp:
-          type: integer
-          description: >-
-            Unix timestamp when the metric value was recorded
-        value:
-          type: number
-          description: >-
-            The numeric value of the metric at this timestamp
-        unit:
-          type: string
-      additionalProperties: false
-      required:
-        - timestamp
-        - value
-        - unit
-      title: MetricDataPoint
-      description: >-
-        A single data point in a metric time series.
-    MetricLabel:
-      type: object
-      properties:
-        name:
-          type: string
-          description: The name of the label
-        value:
-          type: string
-          description: The value of the label
-      additionalProperties: false
-      required:
-        - name
-        - value
-      title: MetricLabel
-      description: A label associated with a metric.
-    MetricSeries:
-      type: object
-      properties:
-        metric:
-          type: string
-          description: The name of the metric
-        labels:
-          type: array
-          items:
-            $ref: '#/components/schemas/MetricLabel'
-          description: >-
-            List of labels associated with this metric series
-        values:
-          type: array
-          items:
-            $ref: '#/components/schemas/MetricDataPoint'
-          description: >-
-            List of data points in chronological order
-      additionalProperties: false
-      required:
-        - metric
-        - labels
-        - values
-      title: MetricSeries
-      description: A time series of metric data points.
-    QueryMetricsResponse:
-      type: object
-      properties:
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/MetricSeries'
-          description: >-
-            List of metric series matching the query criteria
-      additionalProperties: false
-      required:
-        - data
-      title: QueryMetricsResponse
-      description: >-
-        Response containing metric time series data.
-    QueryCondition:
-      type: object
-      properties:
-        key:
-          type: string
-          description: The attribute key to filter on
-        op:
-          $ref: '#/components/schemas/QueryConditionOp'
-          description: The comparison operator to apply
-        value:
-          oneOf:
-            - type: 'null'
-            - type: boolean
-            - type: number
-            - type: string
-            - type: array
-            - type: object
-          description: The value to compare against
-      additionalProperties: false
-      required:
-        - key
-        - op
-        - value
-      title: QueryCondition
-      description: A condition for filtering query results.
-    QueryConditionOp:
-      type: string
-      enum:
-        - eq
-        - ne
-        - gt
-        - lt
-      title: QueryConditionOp
-      description: >-
-        Comparison operators for query conditions.
-    QuerySpansRequest:
-      type: object
-      properties:
-        attribute_filters:
-          type: array
-          items:
-            $ref: '#/components/schemas/QueryCondition'
-          description: >-
-            The attribute filters to apply to the spans.
-        attributes_to_return:
-          type: array
-          items:
-            type: string
-          description: The attributes to return in the spans.
-        max_depth:
-          type: integer
-          description: The maximum depth of the tree.
-      additionalProperties: false
-      required:
-        - attribute_filters
-        - attributes_to_return
-      title: QuerySpansRequest
-    Span:
-      type: object
-      properties:
-        span_id:
-          type: string
-          description: Unique identifier for the span
-        trace_id:
-          type: string
-          description: >-
-            Unique identifier for the trace this span belongs to
-        parent_span_id:
-          type: string
-          description: >-
-            (Optional) Unique identifier for the parent span, if this is a child span
-        name:
-          type: string
-          description: >-
-            Human-readable name describing the operation this span represents
-        start_time:
-          type: string
-          format: date-time
-          description: Timestamp when the operation began
-        end_time:
-          type: string
-          format: date-time
-          description: >-
-            (Optional) Timestamp when the operation finished, if completed
-        attributes:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) Key-value pairs containing additional metadata about the span
-      additionalProperties: false
-      required:
-        - span_id
-        - trace_id
-        - name
-        - start_time
-      title: Span
-      description: >-
-        A span representing a single operation within a trace.
-    QuerySpansResponse:
-      type: object
-      properties:
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/Span'
-          description: >-
-            List of spans matching the query criteria
-      additionalProperties: false
-      required:
-        - data
-      title: QuerySpansResponse
-      description: Response containing a list of spans.
-    SaveSpansToDatasetRequest:
-      type: object
-      properties:
-        attribute_filters:
-          type: array
-          items:
-            $ref: '#/components/schemas/QueryCondition'
-          description: >-
-            The attribute filters to apply to the spans.
-        attributes_to_save:
-          type: array
-          items:
-            type: string
-          description: The attributes to save to the dataset.
-        dataset_id:
-          type: string
-          description: >-
-            The ID of the dataset to save the spans to.
-        max_depth:
-          type: integer
-          description: The maximum depth of the tree.
-      additionalProperties: false
-      required:
-        - attribute_filters
-        - attributes_to_save
-        - dataset_id
-      title: SaveSpansToDatasetRequest
-    GetSpanTreeRequest:
-      type: object
-      properties:
-        attributes_to_return:
-          type: array
-          items:
-            type: string
-          description: The attributes to return in the tree.
-        max_depth:
-          type: integer
-          description: The maximum depth of the tree.
-      additionalProperties: false
-      title: GetSpanTreeRequest
-    SpanStatus:
-      type: string
-      enum:
-        - ok
-        - error
-      title: SpanStatus
-      description: >-
-        The status of a span indicating whether it completed successfully or with
-        an error.
-    SpanWithStatus:
-      type: object
-      properties:
-        span_id:
-          type: string
-          description: Unique identifier for the span
-        trace_id:
-          type: string
-          description: >-
-            Unique identifier for the trace this span belongs to
-        parent_span_id:
-          type: string
-          description: >-
-            (Optional) Unique identifier for the parent span, if this is a child span
-        name:
-          type: string
-          description: >-
-            Human-readable name describing the operation this span represents
-        start_time:
-          type: string
-          format: date-time
-          description: Timestamp when the operation began
-        end_time:
-          type: string
-          format: date-time
-          description: >-
-            (Optional) Timestamp when the operation finished, if completed
-        attributes:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) Key-value pairs containing additional metadata about the span
-        status:
-          $ref: '#/components/schemas/SpanStatus'
-          description: >-
-            (Optional) The current status of the span
-      additionalProperties: false
-      required:
-        - span_id
-        - trace_id
-        - name
-        - start_time
-      title: SpanWithStatus
-      description: A span that includes status information.
-    QuerySpanTreeResponse:
-      type: object
-      properties:
-        data:
-          type: object
-          additionalProperties:
-            $ref: '#/components/schemas/SpanWithStatus'
-          description: >-
-            Dictionary mapping span IDs to spans with status information
-      additionalProperties: false
-      required:
-        - data
-      title: QuerySpanTreeResponse
-      description: >-
-        Response containing a tree structure of spans.
-    QueryTracesRequest:
-      type: object
-      properties:
-        attribute_filters:
-          type: array
-          items:
-            $ref: '#/components/schemas/QueryCondition'
-          description: >-
-            The attribute filters to apply to the traces.
-        limit:
-          type: integer
-          description: The limit of traces to return.
-        offset:
-          type: integer
-          description: The offset of the traces to return.
-        order_by:
-          type: array
-          items:
-            type: string
-          description: The order by of the traces to return.
-      additionalProperties: false
-      title: QueryTracesRequest
-    Trace:
-      type: object
-      properties:
-        trace_id:
-          type: string
-          description: Unique identifier for the trace
-        root_span_id:
-          type: string
-          description: >-
-            Unique identifier for the root span that started this trace
-        start_time:
-          type: string
-          format: date-time
-          description: Timestamp when the trace began
-        end_time:
-          type: string
-          format: date-time
-          description: >-
-            (Optional) Timestamp when the trace finished, if completed
-      additionalProperties: false
-      required:
-        - trace_id
-        - root_span_id
-        - start_time
-      title: Trace
-      description: >-
-        A trace representing the complete execution path of a request across multiple
-        operations.
-    QueryTracesResponse:
-      type: object
-      properties:
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/Trace'
-          description: >-
-            List of traces matching the query criteria
-      additionalProperties: false
-      required:
-        - data
-      title: QueryTracesResponse
-      description: Response containing a list of traces.
   responses:
     BadRequest400:
       description: The request was invalid or malformed
@@ -4784,8 +4124,6 @@ tags:
       Llama Stack Evaluation API for running evaluations on model and agent candidates.
   - name: PostTraining (Coming Soon)
     description: ''
-  - name: Telemetry
-    description: ''
 x-tagGroups:
   - name: Operations
     tags:
@@ -4795,4 +4133,3 @@ x-tagGroups:
       - Datasets
       - Eval
       - PostTraining (Coming Soon)
-      - Telemetry
diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html
index 9cd526176..8ac30d548 100644
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
@@ -2525,44 +2525,6 @@
                 "deprecated": false
             }
         },
-        "/v1/telemetry/events": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "OK"
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Telemetry"
-                ],
-                "summary": "Log an event.",
-                "description": "Log an event.",
-                "parameters": [],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/LogEventRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                },
-                "deprecated": false
-            }
-        },
         "/v1/tool-runtime/invoke": {
             "post": {
                 "responses": {
@@ -10364,354 +10326,6 @@
                 "title": "SyntheticDataGenerationResponse",
                 "description": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold."
             },
-            "Event": {
-                "oneOf": [
-                    {
-                        "$ref": "#/components/schemas/UnstructuredLogEvent"
-                    },
-                    {
-                        "$ref": "#/components/schemas/MetricEvent"
-                    },
-                    {
-                        "$ref": "#/components/schemas/StructuredLogEvent"
-                    }
-                ],
-                "discriminator": {
-                    "propertyName": "type",
-                    "mapping": {
-                        "unstructured_log": "#/components/schemas/UnstructuredLogEvent",
-                        "metric": "#/components/schemas/MetricEvent",
-                        "structured_log": "#/components/schemas/StructuredLogEvent"
-                    }
-                }
-            },
-            "EventType": {
-                "type": "string",
-                "enum": [
-                    "unstructured_log",
-                    "structured_log",
-                    "metric"
-                ],
-                "title": "EventType",
-                "description": "The type of telemetry event being logged."
-            },
-            "LogSeverity": {
-                "type": "string",
-                "enum": [
-                    "verbose",
-                    "debug",
-                    "info",
-                    "warn",
-                    "error",
-                    "critical"
-                ],
-                "title": "LogSeverity",
-                "description": "The severity level of a log message."
-            },
-            "MetricEvent": {
-                "type": "object",
-                "properties": {
-                    "trace_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the trace this event belongs to"
-                    },
-                    "span_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the span this event belongs to"
-                    },
-                    "timestamp": {
-                        "type": "string",
-                        "format": "date-time",
-                        "description": "Timestamp when the event occurred"
-                    },
-                    "attributes": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "integer"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "null"
-                                }
-                            ]
-                        },
-                        "description": "(Optional) Key-value pairs containing additional metadata about the event"
-                    },
-                    "type": {
-                        "$ref": "#/components/schemas/EventType",
-                        "const": "metric",
-                        "default": "metric",
-                        "description": "Event type identifier set to METRIC"
-                    },
-                    "metric": {
-                        "type": "string",
-                        "description": "The name of the metric being measured"
-                    },
-                    "value": {
-                        "oneOf": [
-                            {
-                                "type": "integer"
-                            },
-                            {
-                                "type": "number"
-                            }
-                        ],
-                        "description": "The numeric value of the metric measurement"
-                    },
-                    "unit": {
-                        "type": "string",
-                        "description": "The unit of measurement for the metric value"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "trace_id",
-                    "span_id",
-                    "timestamp",
-                    "type",
-                    "metric",
-                    "value",
-                    "unit"
-                ],
-                "title": "MetricEvent",
-                "description": "A metric event containing a measured value."
-            },
-            "SpanEndPayload": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "$ref": "#/components/schemas/StructuredLogType",
-                        "const": "span_end",
-                        "default": "span_end",
-                        "description": "Payload type identifier set to SPAN_END"
-                    },
-                    "status": {
-                        "$ref": "#/components/schemas/SpanStatus",
-                        "description": "The final status of the span indicating success or failure"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type",
-                    "status"
-                ],
-                "title": "SpanEndPayload",
-                "description": "Payload for a span end event."
-            },
-            "SpanStartPayload": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "$ref": "#/components/schemas/StructuredLogType",
-                        "const": "span_start",
-                        "default": "span_start",
-                        "description": "Payload type identifier set to SPAN_START"
-                    },
-                    "name": {
-                        "type": "string",
-                        "description": "Human-readable name describing the operation this span represents"
-                    },
-                    "parent_span_id": {
-                        "type": "string",
-                        "description": "(Optional) Unique identifier for the parent span, if this is a child span"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type",
-                    "name"
-                ],
-                "title": "SpanStartPayload",
-                "description": "Payload for a span start event."
-            },
-            "SpanStatus": {
-                "type": "string",
-                "enum": [
-                    "ok",
-                    "error"
-                ],
-                "title": "SpanStatus",
-                "description": "The status of a span indicating whether it completed successfully or with an error."
-            },
-            "StructuredLogEvent": {
-                "type": "object",
-                "properties": {
-                    "trace_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the trace this event belongs to"
-                    },
-                    "span_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the span this event belongs to"
-                    },
-                    "timestamp": {
-                        "type": "string",
-                        "format": "date-time",
-                        "description": "Timestamp when the event occurred"
-                    },
-                    "attributes": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "integer"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "null"
-                                }
-                            ]
-                        },
-                        "description": "(Optional) Key-value pairs containing additional metadata about the event"
-                    },
-                    "type": {
-                        "$ref": "#/components/schemas/EventType",
-                        "const": "structured_log",
-                        "default": "structured_log",
-                        "description": "Event type identifier set to STRUCTURED_LOG"
-                    },
-                    "payload": {
-                        "oneOf": [
-                            {
-                                "$ref": "#/components/schemas/SpanStartPayload"
-                            },
-                            {
-                                "$ref": "#/components/schemas/SpanEndPayload"
-                            }
-                        ],
-                        "discriminator": {
-                            "propertyName": "type",
-                            "mapping": {
-                                "span_start": "#/components/schemas/SpanStartPayload",
-                                "span_end": "#/components/schemas/SpanEndPayload"
-                            }
-                        },
-                        "description": "The structured payload data for the log event"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "trace_id",
-                    "span_id",
-                    "timestamp",
-                    "type",
-                    "payload"
-                ],
-                "title": "StructuredLogEvent",
-                "description": "A structured log event containing typed payload data."
-            },
-            "StructuredLogType": {
-                "type": "string",
-                "enum": [
-                    "span_start",
-                    "span_end"
-                ],
-                "title": "StructuredLogType",
-                "description": "The type of structured log event payload."
-            },
-            "UnstructuredLogEvent": {
-                "type": "object",
-                "properties": {
-                    "trace_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the trace this event belongs to"
-                    },
-                    "span_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the span this event belongs to"
-                    },
-                    "timestamp": {
-                        "type": "string",
-                        "format": "date-time",
-                        "description": "Timestamp when the event occurred"
-                    },
-                    "attributes": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "integer"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "null"
-                                }
-                            ]
-                        },
-                        "description": "(Optional) Key-value pairs containing additional metadata about the event"
-                    },
-                    "type": {
-                        "$ref": "#/components/schemas/EventType",
-                        "const": "unstructured_log",
-                        "default": "unstructured_log",
-                        "description": "Event type identifier set to UNSTRUCTURED_LOG"
-                    },
-                    "message": {
-                        "type": "string",
-                        "description": "The log message text"
-                    },
-                    "severity": {
-                        "$ref": "#/components/schemas/LogSeverity",
-                        "description": "The severity level of the log message"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "trace_id",
-                    "span_id",
-                    "timestamp",
-                    "type",
-                    "message",
-                    "severity"
-                ],
-                "title": "UnstructuredLogEvent",
-                "description": "An unstructured log event containing a simple text message."
-            },
-            "LogEventRequest": {
-                "type": "object",
-                "properties": {
-                    "event": {
-                        "$ref": "#/components/schemas/Event",
-                        "description": "The event to log."
-                    },
-                    "ttl_seconds": {
-                        "type": "integer",
-                        "description": "The time to live of the event."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "event",
-                    "ttl_seconds"
-                ],
-                "title": "LogEventRequest"
-            },
             "InvokeToolRequest": {
                 "type": "object",
                 "properties": {
@@ -12962,10 +12576,6 @@
             "name": "SyntheticDataGeneration (Coming Soon)",
             "description": ""
         },
-        {
-            "name": "Telemetry",
-            "description": ""
-        },
         {
             "name": "ToolGroups",
             "description": ""
@@ -13000,7 +12610,6 @@
                 "ScoringFunctions",
                 "Shields",
                 "SyntheticDataGeneration (Coming Soon)",
-                "Telemetry",
                 "ToolGroups",
                 "ToolRuntime",
                 "VectorDBs",
diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
index 66ce8e38a..6b6d8a83d 100644
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@@ -1944,33 +1944,6 @@ paths:
               $ref: '#/components/schemas/SyntheticDataGenerateRequest'
         required: true
       deprecated: false
-  /v1/telemetry/events:
-    post:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Telemetry
-      summary: Log an event.
-      description: Log an event.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/LogEventRequest'
-        required: true
-      deprecated: false
   /v1/tool-runtime/invoke:
     post:
       responses:
@@ -7840,267 +7813,6 @@ components:
       description: >-
         Response from the synthetic data generation. Batch of (prompt, response, score)
         tuples that pass the threshold.
-    Event:
-      oneOf:
-        - $ref: '#/components/schemas/UnstructuredLogEvent'
-        - $ref: '#/components/schemas/MetricEvent'
-        - $ref: '#/components/schemas/StructuredLogEvent'
-      discriminator:
-        propertyName: type
-        mapping:
-          unstructured_log: '#/components/schemas/UnstructuredLogEvent'
-          metric: '#/components/schemas/MetricEvent'
-          structured_log: '#/components/schemas/StructuredLogEvent'
-    EventType:
-      type: string
-      enum:
-        - unstructured_log
-        - structured_log
-        - metric
-      title: EventType
-      description: >-
-        The type of telemetry event being logged.
-    LogSeverity:
-      type: string
-      enum:
-        - verbose
-        - debug
-        - info
-        - warn
-        - error
-        - critical
-      title: LogSeverity
-      description: The severity level of a log message.
-    MetricEvent:
-      type: object
-      properties:
-        trace_id:
-          type: string
-          description: >-
-            Unique identifier for the trace this event belongs to
-        span_id:
-          type: string
-          description: >-
-            Unique identifier for the span this event belongs to
-        timestamp:
-          type: string
-          format: date-time
-          description: Timestamp when the event occurred
-        attributes:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: string
-              - type: integer
-              - type: number
-              - type: boolean
-              - type: 'null'
-          description: >-
-            (Optional) Key-value pairs containing additional metadata about the event
-        type:
-          $ref: '#/components/schemas/EventType'
-          const: metric
-          default: metric
-          description: Event type identifier set to METRIC
-        metric:
-          type: string
-          description: The name of the metric being measured
-        value:
-          oneOf:
-            - type: integer
-            - type: number
-          description: >-
-            The numeric value of the metric measurement
-        unit:
-          type: string
-          description: >-
-            The unit of measurement for the metric value
-      additionalProperties: false
-      required:
-        - trace_id
-        - span_id
-        - timestamp
-        - type
-        - metric
-        - value
-        - unit
-      title: MetricEvent
-      description: >-
-        A metric event containing a measured value.
-    SpanEndPayload:
-      type: object
-      properties:
-        type:
-          $ref: '#/components/schemas/StructuredLogType'
-          const: span_end
-          default: span_end
-          description: Payload type identifier set to SPAN_END
-        status:
-          $ref: '#/components/schemas/SpanStatus'
-          description: >-
-            The final status of the span indicating success or failure
-      additionalProperties: false
-      required:
-        - type
-        - status
-      title: SpanEndPayload
-      description: Payload for a span end event.
-    SpanStartPayload:
-      type: object
-      properties:
-        type:
-          $ref: '#/components/schemas/StructuredLogType'
-          const: span_start
-          default: span_start
-          description: >-
-            Payload type identifier set to SPAN_START
-        name:
-          type: string
-          description: >-
-            Human-readable name describing the operation this span represents
-        parent_span_id:
-          type: string
-          description: >-
-            (Optional) Unique identifier for the parent span, if this is a child span
-      additionalProperties: false
-      required:
-        - type
-        - name
-      title: SpanStartPayload
-      description: Payload for a span start event.
-    SpanStatus:
-      type: string
-      enum:
-        - ok
-        - error
-      title: SpanStatus
-      description: >-
-        The status of a span indicating whether it completed successfully or with
-        an error.
-    StructuredLogEvent:
-      type: object
-      properties:
-        trace_id:
-          type: string
-          description: >-
-            Unique identifier for the trace this event belongs to
-        span_id:
-          type: string
-          description: >-
-            Unique identifier for the span this event belongs to
-        timestamp:
-          type: string
-          format: date-time
-          description: Timestamp when the event occurred
-        attributes:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: string
-              - type: integer
-              - type: number
-              - type: boolean
-              - type: 'null'
-          description: >-
-            (Optional) Key-value pairs containing additional metadata about the event
-        type:
-          $ref: '#/components/schemas/EventType'
-          const: structured_log
-          default: structured_log
-          description: >-
-            Event type identifier set to STRUCTURED_LOG
-        payload:
-          oneOf:
-            - $ref: '#/components/schemas/SpanStartPayload'
-            - $ref: '#/components/schemas/SpanEndPayload'
-          discriminator:
-            propertyName: type
-            mapping:
-              span_start: '#/components/schemas/SpanStartPayload'
-              span_end: '#/components/schemas/SpanEndPayload'
-          description: >-
-            The structured payload data for the log event
-      additionalProperties: false
-      required:
-        - trace_id
-        - span_id
-        - timestamp
-        - type
-        - payload
-      title: StructuredLogEvent
-      description: >-
-        A structured log event containing typed payload data.
-    StructuredLogType:
-      type: string
-      enum:
-        - span_start
-        - span_end
-      title: StructuredLogType
-      description: >-
-        The type of structured log event payload.
-    UnstructuredLogEvent:
-      type: object
-      properties:
-        trace_id:
-          type: string
-          description: >-
-            Unique identifier for the trace this event belongs to
-        span_id:
-          type: string
-          description: >-
-            Unique identifier for the span this event belongs to
-        timestamp:
-          type: string
-          format: date-time
-          description: Timestamp when the event occurred
-        attributes:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: string
-              - type: integer
-              - type: number
-              - type: boolean
-              - type: 'null'
-          description: >-
-            (Optional) Key-value pairs containing additional metadata about the event
-        type:
-          $ref: '#/components/schemas/EventType'
-          const: unstructured_log
-          default: unstructured_log
-          description: >-
-            Event type identifier set to UNSTRUCTURED_LOG
-        message:
-          type: string
-          description: The log message text
-        severity:
-          $ref: '#/components/schemas/LogSeverity'
-          description: The severity level of the log message
-      additionalProperties: false
-      required:
-        - trace_id
-        - span_id
-        - timestamp
-        - type
-        - message
-        - severity
-      title: UnstructuredLogEvent
-      description: >-
-        An unstructured log event containing a simple text message.
-    LogEventRequest:
-      type: object
-      properties:
-        event:
-          $ref: '#/components/schemas/Event'
-          description: The event to log.
-        ttl_seconds:
-          type: integer
-          description: The time to live of the event.
-      additionalProperties: false
-      required:
-        - event
-        - ttl_seconds
-      title: LogEventRequest
     InvokeToolRequest:
       type: object
       properties:
@@ -9833,8 +9545,6 @@ tags:
     description: ''
   - name: SyntheticDataGeneration (Coming Soon)
     description: ''
-  - name: Telemetry
-    description: ''
   - name: ToolGroups
     description: ''
   - name: ToolRuntime
@@ -9859,7 +9569,6 @@ x-tagGroups:
       - ScoringFunctions
       - Shields
       - SyntheticDataGeneration (Coming Soon)
-      - Telemetry
       - ToolGroups
       - ToolRuntime
       - VectorDBs
diff --git a/docs/static/stainless-llama-stack-spec.html b/docs/static/stainless-llama-stack-spec.html
index 3478d3338..2bda06ea5 100644
--- a/docs/static/stainless-llama-stack-spec.html
+++ b/docs/static/stainless-llama-stack-spec.html
@@ -2525,44 +2525,6 @@
                 "deprecated": false
             }
         },
-        "/v1/telemetry/events": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "OK"
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Telemetry"
-                ],
-                "summary": "Log an event.",
-                "description": "Log an event.",
-                "parameters": [],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/LogEventRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                },
-                "deprecated": false
-            }
-        },
         "/v1/tool-runtime/invoke": {
             "post": {
                 "responses": {
@@ -5873,343 +5835,6 @@
                 },
                 "deprecated": false
             }
-        },
-        "/v1alpha/telemetry/metrics/{metric_name}": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "A QueryMetricsResponse.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/QueryMetricsResponse"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Telemetry"
-                ],
-                "summary": "Query metrics.",
-                "description": "Query metrics.",
-                "parameters": [
-                    {
-                        "name": "metric_name",
-                        "in": "path",
-                        "description": "The name of the metric to query.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/QueryMetricsRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                },
-                "deprecated": false
-            }
-        },
-        "/v1alpha/telemetry/spans": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "A QuerySpansResponse.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/QuerySpansResponse"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Telemetry"
-                ],
-                "summary": "Query spans.",
-                "description": "Query spans.",
-                "parameters": [],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/QuerySpansRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                },
-                "deprecated": false
-            }
-        },
-        "/v1alpha/telemetry/spans/export": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "OK"
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Telemetry"
-                ],
-                "summary": "Save spans to a dataset.",
-                "description": "Save spans to a dataset.",
-                "parameters": [],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/SaveSpansToDatasetRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                },
-                "deprecated": false
-            }
-        },
-        "/v1alpha/telemetry/spans/{span_id}/tree": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "A QuerySpanTreeResponse.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/QuerySpanTreeResponse"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Telemetry"
-                ],
-                "summary": "Get a span tree by its ID.",
-                "description": "Get a span tree by its ID.",
-                "parameters": [
-                    {
-                        "name": "span_id",
-                        "in": "path",
-                        "description": "The ID of the span to get the tree from.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/GetSpanTreeRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                },
-                "deprecated": false
-            }
-        },
-        "/v1alpha/telemetry/traces": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "A QueryTracesResponse.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/QueryTracesResponse"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Telemetry"
-                ],
-                "summary": "Query traces.",
-                "description": "Query traces.",
-                "parameters": [],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/QueryTracesRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                },
-                "deprecated": false
-            }
-        },
-        "/v1alpha/telemetry/traces/{trace_id}": {
-            "get": {
-                "responses": {
-                    "200": {
-                        "description": "A Trace.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/Trace"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Telemetry"
-                ],
-                "summary": "Get a trace by its ID.",
-                "description": "Get a trace by its ID.",
-                "parameters": [
-                    {
-                        "name": "trace_id",
-                        "in": "path",
-                        "description": "The ID of the trace to get.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ],
-                "deprecated": false
-            }
-        },
-        "/v1alpha/telemetry/traces/{trace_id}/spans/{span_id}": {
-            "get": {
-                "responses": {
-                    "200": {
-                        "description": "A Span.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/Span"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Telemetry"
-                ],
-                "summary": "Get a span by its ID.",
-                "description": "Get a span by its ID.",
-                "parameters": [
-                    {
-                        "name": "trace_id",
-                        "in": "path",
-                        "description": "The ID of the trace to get the span from.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    },
-                    {
-                        "name": "span_id",
-                        "in": "path",
-                        "description": "The ID of the span to get.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ],
-                "deprecated": false
-            }
         }
     },
     "jsonSchemaDialect": "https://json-schema.org/draft/2020-12/schema",
@@ -12373,354 +11998,6 @@
                 "title": "SyntheticDataGenerationResponse",
                 "description": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold."
             },
-            "Event": {
-                "oneOf": [
-                    {
-                        "$ref": "#/components/schemas/UnstructuredLogEvent"
-                    },
-                    {
-                        "$ref": "#/components/schemas/MetricEvent"
-                    },
-                    {
-                        "$ref": "#/components/schemas/StructuredLogEvent"
-                    }
-                ],
-                "discriminator": {
-                    "propertyName": "type",
-                    "mapping": {
-                        "unstructured_log": "#/components/schemas/UnstructuredLogEvent",
-                        "metric": "#/components/schemas/MetricEvent",
-                        "structured_log": "#/components/schemas/StructuredLogEvent"
-                    }
-                }
-            },
-            "EventType": {
-                "type": "string",
-                "enum": [
-                    "unstructured_log",
-                    "structured_log",
-                    "metric"
-                ],
-                "title": "EventType",
-                "description": "The type of telemetry event being logged."
-            },
-            "LogSeverity": {
-                "type": "string",
-                "enum": [
-                    "verbose",
-                    "debug",
-                    "info",
-                    "warn",
-                    "error",
-                    "critical"
-                ],
-                "title": "LogSeverity",
-                "description": "The severity level of a log message."
-            },
-            "MetricEvent": {
-                "type": "object",
-                "properties": {
-                    "trace_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the trace this event belongs to"
-                    },
-                    "span_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the span this event belongs to"
-                    },
-                    "timestamp": {
-                        "type": "string",
-                        "format": "date-time",
-                        "description": "Timestamp when the event occurred"
-                    },
-                    "attributes": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "integer"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "null"
-                                }
-                            ]
-                        },
-                        "description": "(Optional) Key-value pairs containing additional metadata about the event"
-                    },
-                    "type": {
-                        "$ref": "#/components/schemas/EventType",
-                        "const": "metric",
-                        "default": "metric",
-                        "description": "Event type identifier set to METRIC"
-                    },
-                    "metric": {
-                        "type": "string",
-                        "description": "The name of the metric being measured"
-                    },
-                    "value": {
-                        "oneOf": [
-                            {
-                                "type": "integer"
-                            },
-                            {
-                                "type": "number"
-                            }
-                        ],
-                        "description": "The numeric value of the metric measurement"
-                    },
-                    "unit": {
-                        "type": "string",
-                        "description": "The unit of measurement for the metric value"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "trace_id",
-                    "span_id",
-                    "timestamp",
-                    "type",
-                    "metric",
-                    "value",
-                    "unit"
-                ],
-                "title": "MetricEvent",
-                "description": "A metric event containing a measured value."
-            },
-            "SpanEndPayload": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "$ref": "#/components/schemas/StructuredLogType",
-                        "const": "span_end",
-                        "default": "span_end",
-                        "description": "Payload type identifier set to SPAN_END"
-                    },
-                    "status": {
-                        "$ref": "#/components/schemas/SpanStatus",
-                        "description": "The final status of the span indicating success or failure"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type",
-                    "status"
-                ],
-                "title": "SpanEndPayload",
-                "description": "Payload for a span end event."
-            },
-            "SpanStartPayload": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "$ref": "#/components/schemas/StructuredLogType",
-                        "const": "span_start",
-                        "default": "span_start",
-                        "description": "Payload type identifier set to SPAN_START"
-                    },
-                    "name": {
-                        "type": "string",
-                        "description": "Human-readable name describing the operation this span represents"
-                    },
-                    "parent_span_id": {
-                        "type": "string",
-                        "description": "(Optional) Unique identifier for the parent span, if this is a child span"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type",
-                    "name"
-                ],
-                "title": "SpanStartPayload",
-                "description": "Payload for a span start event."
-            },
-            "SpanStatus": {
-                "type": "string",
-                "enum": [
-                    "ok",
-                    "error"
-                ],
-                "title": "SpanStatus",
-                "description": "The status of a span indicating whether it completed successfully or with an error."
-            },
-            "StructuredLogEvent": {
-                "type": "object",
-                "properties": {
-                    "trace_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the trace this event belongs to"
-                    },
-                    "span_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the span this event belongs to"
-                    },
-                    "timestamp": {
-                        "type": "string",
-                        "format": "date-time",
-                        "description": "Timestamp when the event occurred"
-                    },
-                    "attributes": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "integer"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "null"
-                                }
-                            ]
-                        },
-                        "description": "(Optional) Key-value pairs containing additional metadata about the event"
-                    },
-                    "type": {
-                        "$ref": "#/components/schemas/EventType",
-                        "const": "structured_log",
-                        "default": "structured_log",
-                        "description": "Event type identifier set to STRUCTURED_LOG"
-                    },
-                    "payload": {
-                        "oneOf": [
-                            {
-                                "$ref": "#/components/schemas/SpanStartPayload"
-                            },
-                            {
-                                "$ref": "#/components/schemas/SpanEndPayload"
-                            }
-                        ],
-                        "discriminator": {
-                            "propertyName": "type",
-                            "mapping": {
-                                "span_start": "#/components/schemas/SpanStartPayload",
-                                "span_end": "#/components/schemas/SpanEndPayload"
-                            }
-                        },
-                        "description": "The structured payload data for the log event"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "trace_id",
-                    "span_id",
-                    "timestamp",
-                    "type",
-                    "payload"
-                ],
-                "title": "StructuredLogEvent",
-                "description": "A structured log event containing typed payload data."
-            },
-            "StructuredLogType": {
-                "type": "string",
-                "enum": [
-                    "span_start",
-                    "span_end"
-                ],
-                "title": "StructuredLogType",
-                "description": "The type of structured log event payload."
-            },
-            "UnstructuredLogEvent": {
-                "type": "object",
-                "properties": {
-                    "trace_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the trace this event belongs to"
-                    },
-                    "span_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the span this event belongs to"
-                    },
-                    "timestamp": {
-                        "type": "string",
-                        "format": "date-time",
-                        "description": "Timestamp when the event occurred"
-                    },
-                    "attributes": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "integer"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "null"
-                                }
-                            ]
-                        },
-                        "description": "(Optional) Key-value pairs containing additional metadata about the event"
-                    },
-                    "type": {
-                        "$ref": "#/components/schemas/EventType",
-                        "const": "unstructured_log",
-                        "default": "unstructured_log",
-                        "description": "Event type identifier set to UNSTRUCTURED_LOG"
-                    },
-                    "message": {
-                        "type": "string",
-                        "description": "The log message text"
-                    },
-                    "severity": {
-                        "$ref": "#/components/schemas/LogSeverity",
-                        "description": "The severity level of the log message"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "trace_id",
-                    "span_id",
-                    "timestamp",
-                    "type",
-                    "message",
-                    "severity"
-                ],
-                "title": "UnstructuredLogEvent",
-                "description": "An unstructured log event containing a simple text message."
-            },
-            "LogEventRequest": {
-                "type": "object",
-                "properties": {
-                    "event": {
-                        "$ref": "#/components/schemas/Event",
-                        "description": "The event to log."
-                    },
-                    "ttl_seconds": {
-                        "type": "integer",
-                        "description": "The time to live of the event."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "event",
-                    "ttl_seconds"
-                ],
-                "title": "LogEventRequest"
-            },
             "InvokeToolRequest": {
                 "type": "object",
                 "properties": {
@@ -17841,552 +17118,6 @@
                     "logger_config"
                 ],
                 "title": "SupervisedFineTuneRequest"
-            },
-            "QueryMetricsRequest": {
-                "type": "object",
-                "properties": {
-                    "start_time": {
-                        "type": "integer",
-                        "description": "The start time of the metric to query."
-                    },
-                    "end_time": {
-                        "type": "integer",
-                        "description": "The end time of the metric to query."
-                    },
-                    "granularity": {
-                        "type": "string",
-                        "description": "The granularity of the metric to query."
-                    },
-                    "query_type": {
-                        "type": "string",
-                        "enum": [
-                            "range",
-                            "instant"
-                        ],
-                        "description": "The type of query to perform."
-                    },
-                    "label_matchers": {
-                        "type": "array",
-                        "items": {
-                            "type": "object",
-                            "properties": {
-                                "name": {
-                                    "type": "string",
-                                    "description": "The name of the label to match"
-                                },
-                                "value": {
-                                    "type": "string",
-                                    "description": "The value to match against"
-                                },
-                                "operator": {
-                                    "type": "string",
-                                    "enum": [
-                                        "=",
-                                        "!=",
-                                        "=~",
-                                        "!~"
-                                    ],
-                                    "description": "The comparison operator to use for matching",
-                                    "default": "="
-                                }
-                            },
-                            "additionalProperties": false,
-                            "required": [
-                                "name",
-                                "value",
-                                "operator"
-                            ],
-                            "title": "MetricLabelMatcher",
-                            "description": "A matcher for filtering metrics by label values."
-                        },
-                        "description": "The label matchers to apply to the metric."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "start_time",
-                    "query_type"
-                ],
-                "title": "QueryMetricsRequest"
-            },
-            "MetricDataPoint": {
-                "type": "object",
-                "properties": {
-                    "timestamp": {
-                        "type": "integer",
-                        "description": "Unix timestamp when the metric value was recorded"
-                    },
-                    "value": {
-                        "type": "number",
-                        "description": "The numeric value of the metric at this timestamp"
-                    },
-                    "unit": {
-                        "type": "string"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "timestamp",
-                    "value",
-                    "unit"
-                ],
-                "title": "MetricDataPoint",
-                "description": "A single data point in a metric time series."
-            },
-            "MetricLabel": {
-                "type": "object",
-                "properties": {
-                    "name": {
-                        "type": "string",
-                        "description": "The name of the label"
-                    },
-                    "value": {
-                        "type": "string",
-                        "description": "The value of the label"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "name",
-                    "value"
-                ],
-                "title": "MetricLabel",
-                "description": "A label associated with a metric."
-            },
-            "MetricSeries": {
-                "type": "object",
-                "properties": {
-                    "metric": {
-                        "type": "string",
-                        "description": "The name of the metric"
-                    },
-                    "labels": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/MetricLabel"
-                        },
-                        "description": "List of labels associated with this metric series"
-                    },
-                    "values": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/MetricDataPoint"
-                        },
-                        "description": "List of data points in chronological order"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "metric",
-                    "labels",
-                    "values"
-                ],
-                "title": "MetricSeries",
-                "description": "A time series of metric data points."
-            },
-            "QueryMetricsResponse": {
-                "type": "object",
-                "properties": {
-                    "data": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/MetricSeries"
-                        },
-                        "description": "List of metric series matching the query criteria"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "data"
-                ],
-                "title": "QueryMetricsResponse",
-                "description": "Response containing metric time series data."
-            },
-            "QueryCondition": {
-                "type": "object",
-                "properties": {
-                    "key": {
-                        "type": "string",
-                        "description": "The attribute key to filter on"
-                    },
-                    "op": {
-                        "$ref": "#/components/schemas/QueryConditionOp",
-                        "description": "The comparison operator to apply"
-                    },
-                    "value": {
-                        "oneOf": [
-                            {
-                                "type": "null"
-                            },
-                            {
-                                "type": "boolean"
-                            },
-                            {
-                                "type": "number"
-                            },
-                            {
-                                "type": "string"
-                            },
-                            {
-                                "type": "array"
-                            },
-                            {
-                                "type": "object"
-                            }
-                        ],
-                        "description": "The value to compare against"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "key",
-                    "op",
-                    "value"
-                ],
-                "title": "QueryCondition",
-                "description": "A condition for filtering query results."
-            },
-            "QueryConditionOp": {
-                "type": "string",
-                "enum": [
-                    "eq",
-                    "ne",
-                    "gt",
-                    "lt"
-                ],
-                "title": "QueryConditionOp",
-                "description": "Comparison operators for query conditions."
-            },
-            "QuerySpansRequest": {
-                "type": "object",
-                "properties": {
-                    "attribute_filters": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/QueryCondition"
-                        },
-                        "description": "The attribute filters to apply to the spans."
-                    },
-                    "attributes_to_return": {
-                        "type": "array",
-                        "items": {
-                            "type": "string"
-                        },
-                        "description": "The attributes to return in the spans."
-                    },
-                    "max_depth": {
-                        "type": "integer",
-                        "description": "The maximum depth of the tree."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "attribute_filters",
-                    "attributes_to_return"
-                ],
-                "title": "QuerySpansRequest"
-            },
-            "Span": {
-                "type": "object",
-                "properties": {
-                    "span_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the span"
-                    },
-                    "trace_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the trace this span belongs to"
-                    },
-                    "parent_span_id": {
-                        "type": "string",
-                        "description": "(Optional) Unique identifier for the parent span, if this is a child span"
-                    },
-                    "name": {
-                        "type": "string",
-                        "description": "Human-readable name describing the operation this span represents"
-                    },
-                    "start_time": {
-                        "type": "string",
-                        "format": "date-time",
-                        "description": "Timestamp when the operation began"
-                    },
-                    "end_time": {
-                        "type": "string",
-                        "format": "date-time",
-                        "description": "(Optional) Timestamp when the operation finished, if completed"
-                    },
-                    "attributes": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "(Optional) Key-value pairs containing additional metadata about the span"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "span_id",
-                    "trace_id",
-                    "name",
-                    "start_time"
-                ],
-                "title": "Span",
-                "description": "A span representing a single operation within a trace."
-            },
-            "QuerySpansResponse": {
-                "type": "object",
-                "properties": {
-                    "data": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/Span"
-                        },
-                        "description": "List of spans matching the query criteria"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "data"
-                ],
-                "title": "QuerySpansResponse",
-                "description": "Response containing a list of spans."
-            },
-            "SaveSpansToDatasetRequest": {
-                "type": "object",
-                "properties": {
-                    "attribute_filters": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/QueryCondition"
-                        },
-                        "description": "The attribute filters to apply to the spans."
-                    },
-                    "attributes_to_save": {
-                        "type": "array",
-                        "items": {
-                            "type": "string"
-                        },
-                        "description": "The attributes to save to the dataset."
-                    },
-                    "dataset_id": {
-                        "type": "string",
-                        "description": "The ID of the dataset to save the spans to."
-                    },
-                    "max_depth": {
-                        "type": "integer",
-                        "description": "The maximum depth of the tree."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "attribute_filters",
-                    "attributes_to_save",
-                    "dataset_id"
-                ],
-                "title": "SaveSpansToDatasetRequest"
-            },
-            "GetSpanTreeRequest": {
-                "type": "object",
-                "properties": {
-                    "attributes_to_return": {
-                        "type": "array",
-                        "items": {
-                            "type": "string"
-                        },
-                        "description": "The attributes to return in the tree."
-                    },
-                    "max_depth": {
-                        "type": "integer",
-                        "description": "The maximum depth of the tree."
-                    }
-                },
-                "additionalProperties": false,
-                "title": "GetSpanTreeRequest"
-            },
-            "SpanWithStatus": {
-                "type": "object",
-                "properties": {
-                    "span_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the span"
-                    },
-                    "trace_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the trace this span belongs to"
-                    },
-                    "parent_span_id": {
-                        "type": "string",
-                        "description": "(Optional) Unique identifier for the parent span, if this is a child span"
-                    },
-                    "name": {
-                        "type": "string",
-                        "description": "Human-readable name describing the operation this span represents"
-                    },
-                    "start_time": {
-                        "type": "string",
-                        "format": "date-time",
-                        "description": "Timestamp when the operation began"
-                    },
-                    "end_time": {
-                        "type": "string",
-                        "format": "date-time",
-                        "description": "(Optional) Timestamp when the operation finished, if completed"
-                    },
-                    "attributes": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "(Optional) Key-value pairs containing additional metadata about the span"
-                    },
-                    "status": {
-                        "$ref": "#/components/schemas/SpanStatus",
-                        "description": "(Optional) The current status of the span"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "span_id",
-                    "trace_id",
-                    "name",
-                    "start_time"
-                ],
-                "title": "SpanWithStatus",
-                "description": "A span that includes status information."
-            },
-            "QuerySpanTreeResponse": {
-                "type": "object",
-                "properties": {
-                    "data": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "$ref": "#/components/schemas/SpanWithStatus"
-                        },
-                        "description": "Dictionary mapping span IDs to spans with status information"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "data"
-                ],
-                "title": "QuerySpanTreeResponse",
-                "description": "Response containing a tree structure of spans."
-            },
-            "QueryTracesRequest": {
-                "type": "object",
-                "properties": {
-                    "attribute_filters": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/QueryCondition"
-                        },
-                        "description": "The attribute filters to apply to the traces."
-                    },
-                    "limit": {
-                        "type": "integer",
-                        "description": "The limit of traces to return."
-                    },
-                    "offset": {
-                        "type": "integer",
-                        "description": "The offset of the traces to return."
-                    },
-                    "order_by": {
-                        "type": "array",
-                        "items": {
-                            "type": "string"
-                        },
-                        "description": "The order by of the traces to return."
-                    }
-                },
-                "additionalProperties": false,
-                "title": "QueryTracesRequest"
-            },
-            "Trace": {
-                "type": "object",
-                "properties": {
-                    "trace_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the trace"
-                    },
-                    "root_span_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the root span that started this trace"
-                    },
-                    "start_time": {
-                        "type": "string",
-                        "format": "date-time",
-                        "description": "Timestamp when the trace began"
-                    },
-                    "end_time": {
-                        "type": "string",
-                        "format": "date-time",
-                        "description": "(Optional) Timestamp when the trace finished, if completed"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "trace_id",
-                    "root_span_id",
-                    "start_time"
-                ],
-                "title": "Trace",
-                "description": "A trace representing the complete execution path of a request across multiple operations."
-            },
-            "QueryTracesResponse": {
-                "type": "object",
-                "properties": {
-                    "data": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/Trace"
-                        },
-                        "description": "List of traces matching the query criteria"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "data"
-                ],
-                "title": "QueryTracesResponse",
-                "description": "Response containing a list of traces."
             }
         },
         "responses": {
@@ -18539,10 +17270,6 @@
             "name": "SyntheticDataGeneration (Coming Soon)",
             "description": ""
         },
-        {
-            "name": "Telemetry",
-            "description": ""
-        },
         {
             "name": "ToolGroups",
             "description": ""
@@ -18582,7 +17309,6 @@
                 "ScoringFunctions",
                 "Shields",
                 "SyntheticDataGeneration (Coming Soon)",
-                "Telemetry",
                 "ToolGroups",
                 "ToolRuntime",
                 "VectorDBs",
diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml
index 6c04542bf..56035ad5f 100644
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@@ -1947,33 +1947,6 @@ paths:
               $ref: '#/components/schemas/SyntheticDataGenerateRequest'
         required: true
       deprecated: false
-  /v1/telemetry/events:
-    post:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Telemetry
-      summary: Log an event.
-      description: Log an event.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/LogEventRequest'
-        required: true
-      deprecated: false
   /v1/tool-runtime/invoke:
     post:
       responses:
@@ -4392,238 +4365,6 @@ paths:
               $ref: '#/components/schemas/SupervisedFineTuneRequest'
         required: true
       deprecated: false
-  /v1alpha/telemetry/metrics/{metric_name}:
-    post:
-      responses:
-        '200':
-          description: A QueryMetricsResponse.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/QueryMetricsResponse'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Telemetry
-      summary: Query metrics.
-      description: Query metrics.
-      parameters:
-        - name: metric_name
-          in: path
-          description: The name of the metric to query.
-          required: true
-          schema:
-            type: string
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/QueryMetricsRequest'
-        required: true
-      deprecated: false
-  /v1alpha/telemetry/spans:
-    post:
-      responses:
-        '200':
-          description: A QuerySpansResponse.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/QuerySpansResponse'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Telemetry
-      summary: Query spans.
-      description: Query spans.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/QuerySpansRequest'
-        required: true
-      deprecated: false
-  /v1alpha/telemetry/spans/export:
-    post:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Telemetry
-      summary: Save spans to a dataset.
-      description: Save spans to a dataset.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/SaveSpansToDatasetRequest'
-        required: true
-      deprecated: false
-  /v1alpha/telemetry/spans/{span_id}/tree:
-    post:
-      responses:
-        '200':
-          description: A QuerySpanTreeResponse.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/QuerySpanTreeResponse'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Telemetry
-      summary: Get a span tree by its ID.
-      description: Get a span tree by its ID.
-      parameters:
-        - name: span_id
-          in: path
-          description: The ID of the span to get the tree from.
-          required: true
-          schema:
-            type: string
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/GetSpanTreeRequest'
-        required: true
-      deprecated: false
-  /v1alpha/telemetry/traces:
-    post:
-      responses:
-        '200':
-          description: A QueryTracesResponse.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/QueryTracesResponse'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Telemetry
-      summary: Query traces.
-      description: Query traces.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/QueryTracesRequest'
-        required: true
-      deprecated: false
-  /v1alpha/telemetry/traces/{trace_id}:
-    get:
-      responses:
-        '200':
-          description: A Trace.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/Trace'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Telemetry
-      summary: Get a trace by its ID.
-      description: Get a trace by its ID.
-      parameters:
-        - name: trace_id
-          in: path
-          description: The ID of the trace to get.
-          required: true
-          schema:
-            type: string
-      deprecated: false
-  /v1alpha/telemetry/traces/{trace_id}/spans/{span_id}:
-    get:
-      responses:
-        '200':
-          description: A Span.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/Span'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Telemetry
-      summary: Get a span by its ID.
-      description: Get a span by its ID.
-      parameters:
-        - name: trace_id
-          in: path
-          description: >-
-            The ID of the trace to get the span from.
-          required: true
-          schema:
-            type: string
-        - name: span_id
-          in: path
-          description: The ID of the span to get.
-          required: true
-          schema:
-            type: string
-      deprecated: false
 jsonSchemaDialect: >-
   https://json-schema.org/draft/2020-12/schema
 components:
@@ -9285,267 +9026,6 @@ components:
       description: >-
         Response from the synthetic data generation. Batch of (prompt, response, score)
         tuples that pass the threshold.
-    Event:
-      oneOf:
-        - $ref: '#/components/schemas/UnstructuredLogEvent'
-        - $ref: '#/components/schemas/MetricEvent'
-        - $ref: '#/components/schemas/StructuredLogEvent'
-      discriminator:
-        propertyName: type
-        mapping:
-          unstructured_log: '#/components/schemas/UnstructuredLogEvent'
-          metric: '#/components/schemas/MetricEvent'
-          structured_log: '#/components/schemas/StructuredLogEvent'
-    EventType:
-      type: string
-      enum:
-        - unstructured_log
-        - structured_log
-        - metric
-      title: EventType
-      description: >-
-        The type of telemetry event being logged.
-    LogSeverity:
-      type: string
-      enum:
-        - verbose
-        - debug
-        - info
-        - warn
-        - error
-        - critical
-      title: LogSeverity
-      description: The severity level of a log message.
-    MetricEvent:
-      type: object
-      properties:
-        trace_id:
-          type: string
-          description: >-
-            Unique identifier for the trace this event belongs to
-        span_id:
-          type: string
-          description: >-
-            Unique identifier for the span this event belongs to
-        timestamp:
-          type: string
-          format: date-time
-          description: Timestamp when the event occurred
-        attributes:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: string
-              - type: integer
-              - type: number
-              - type: boolean
-              - type: 'null'
-          description: >-
-            (Optional) Key-value pairs containing additional metadata about the event
-        type:
-          $ref: '#/components/schemas/EventType'
-          const: metric
-          default: metric
-          description: Event type identifier set to METRIC
-        metric:
-          type: string
-          description: The name of the metric being measured
-        value:
-          oneOf:
-            - type: integer
-            - type: number
-          description: >-
-            The numeric value of the metric measurement
-        unit:
-          type: string
-          description: >-
-            The unit of measurement for the metric value
-      additionalProperties: false
-      required:
-        - trace_id
-        - span_id
-        - timestamp
-        - type
-        - metric
-        - value
-        - unit
-      title: MetricEvent
-      description: >-
-        A metric event containing a measured value.
-    SpanEndPayload:
-      type: object
-      properties:
-        type:
-          $ref: '#/components/schemas/StructuredLogType'
-          const: span_end
-          default: span_end
-          description: Payload type identifier set to SPAN_END
-        status:
-          $ref: '#/components/schemas/SpanStatus'
-          description: >-
-            The final status of the span indicating success or failure
-      additionalProperties: false
-      required:
-        - type
-        - status
-      title: SpanEndPayload
-      description: Payload for a span end event.
-    SpanStartPayload:
-      type: object
-      properties:
-        type:
-          $ref: '#/components/schemas/StructuredLogType'
-          const: span_start
-          default: span_start
-          description: >-
-            Payload type identifier set to SPAN_START
-        name:
-          type: string
-          description: >-
-            Human-readable name describing the operation this span represents
-        parent_span_id:
-          type: string
-          description: >-
-            (Optional) Unique identifier for the parent span, if this is a child span
-      additionalProperties: false
-      required:
-        - type
-        - name
-      title: SpanStartPayload
-      description: Payload for a span start event.
-    SpanStatus:
-      type: string
-      enum:
-        - ok
-        - error
-      title: SpanStatus
-      description: >-
-        The status of a span indicating whether it completed successfully or with
-        an error.
-    StructuredLogEvent:
-      type: object
-      properties:
-        trace_id:
-          type: string
-          description: >-
-            Unique identifier for the trace this event belongs to
-        span_id:
-          type: string
-          description: >-
-            Unique identifier for the span this event belongs to
-        timestamp:
-          type: string
-          format: date-time
-          description: Timestamp when the event occurred
-        attributes:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: string
-              - type: integer
-              - type: number
-              - type: boolean
-              - type: 'null'
-          description: >-
-            (Optional) Key-value pairs containing additional metadata about the event
-        type:
-          $ref: '#/components/schemas/EventType'
-          const: structured_log
-          default: structured_log
-          description: >-
-            Event type identifier set to STRUCTURED_LOG
-        payload:
-          oneOf:
-            - $ref: '#/components/schemas/SpanStartPayload'
-            - $ref: '#/components/schemas/SpanEndPayload'
-          discriminator:
-            propertyName: type
-            mapping:
-              span_start: '#/components/schemas/SpanStartPayload'
-              span_end: '#/components/schemas/SpanEndPayload'
-          description: >-
-            The structured payload data for the log event
-      additionalProperties: false
-      required:
-        - trace_id
-        - span_id
-        - timestamp
-        - type
-        - payload
-      title: StructuredLogEvent
-      description: >-
-        A structured log event containing typed payload data.
-    StructuredLogType:
-      type: string
-      enum:
-        - span_start
-        - span_end
-      title: StructuredLogType
-      description: >-
-        The type of structured log event payload.
-    UnstructuredLogEvent:
-      type: object
-      properties:
-        trace_id:
-          type: string
-          description: >-
-            Unique identifier for the trace this event belongs to
-        span_id:
-          type: string
-          description: >-
-            Unique identifier for the span this event belongs to
-        timestamp:
-          type: string
-          format: date-time
-          description: Timestamp when the event occurred
-        attributes:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: string
-              - type: integer
-              - type: number
-              - type: boolean
-              - type: 'null'
-          description: >-
-            (Optional) Key-value pairs containing additional metadata about the event
-        type:
-          $ref: '#/components/schemas/EventType'
-          const: unstructured_log
-          default: unstructured_log
-          description: >-
-            Event type identifier set to UNSTRUCTURED_LOG
-        message:
-          type: string
-          description: The log message text
-        severity:
-          $ref: '#/components/schemas/LogSeverity'
-          description: The severity level of the log message
-      additionalProperties: false
-      required:
-        - trace_id
-        - span_id
-        - timestamp
-        - type
-        - message
-        - severity
-      title: UnstructuredLogEvent
-      description: >-
-        An unstructured log event containing a simple text message.
-    LogEventRequest:
-      type: object
-      properties:
-        event:
-          $ref: '#/components/schemas/Event'
-          description: The event to log.
-        ttl_seconds:
-          type: integer
-          description: The time to live of the event.
-      additionalProperties: false
-      required:
-        - event
-        - ttl_seconds
-      title: LogEventRequest
     InvokeToolRequest:
       type: object
       properties:
@@ -13349,425 +12829,6 @@ components:
         - hyperparam_search_config
         - logger_config
       title: SupervisedFineTuneRequest
-    QueryMetricsRequest:
-      type: object
-      properties:
-        start_time:
-          type: integer
-          description: The start time of the metric to query.
-        end_time:
-          type: integer
-          description: The end time of the metric to query.
-        granularity:
-          type: string
-          description: The granularity of the metric to query.
-        query_type:
-          type: string
-          enum:
-            - range
-            - instant
-          description: The type of query to perform.
-        label_matchers:
-          type: array
-          items:
-            type: object
-            properties:
-              name:
-                type: string
-                description: The name of the label to match
-              value:
-                type: string
-                description: The value to match against
-              operator:
-                type: string
-                enum:
-                  - '='
-                  - '!='
-                  - =~
-                  - '!~'
-                description: >-
-                  The comparison operator to use for matching
-                default: '='
-            additionalProperties: false
-            required:
-              - name
-              - value
-              - operator
-            title: MetricLabelMatcher
-            description: >-
-              A matcher for filtering metrics by label values.
-          description: >-
-            The label matchers to apply to the metric.
-      additionalProperties: false
-      required:
-        - start_time
-        - query_type
-      title: QueryMetricsRequest
-    MetricDataPoint:
-      type: object
-      properties:
-        timestamp:
-          type: integer
-          description: >-
-            Unix timestamp when the metric value was recorded
-        value:
-          type: number
-          description: >-
-            The numeric value of the metric at this timestamp
-        unit:
-          type: string
-      additionalProperties: false
-      required:
-        - timestamp
-        - value
-        - unit
-      title: MetricDataPoint
-      description: >-
-        A single data point in a metric time series.
-    MetricLabel:
-      type: object
-      properties:
-        name:
-          type: string
-          description: The name of the label
-        value:
-          type: string
-          description: The value of the label
-      additionalProperties: false
-      required:
-        - name
-        - value
-      title: MetricLabel
-      description: A label associated with a metric.
-    MetricSeries:
-      type: object
-      properties:
-        metric:
-          type: string
-          description: The name of the metric
-        labels:
-          type: array
-          items:
-            $ref: '#/components/schemas/MetricLabel'
-          description: >-
-            List of labels associated with this metric series
-        values:
-          type: array
-          items:
-            $ref: '#/components/schemas/MetricDataPoint'
-          description: >-
-            List of data points in chronological order
-      additionalProperties: false
-      required:
-        - metric
-        - labels
-        - values
-      title: MetricSeries
-      description: A time series of metric data points.
-    QueryMetricsResponse:
-      type: object
-      properties:
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/MetricSeries'
-          description: >-
-            List of metric series matching the query criteria
-      additionalProperties: false
-      required:
-        - data
-      title: QueryMetricsResponse
-      description: >-
-        Response containing metric time series data.
-    QueryCondition:
-      type: object
-      properties:
-        key:
-          type: string
-          description: The attribute key to filter on
-        op:
-          $ref: '#/components/schemas/QueryConditionOp'
-          description: The comparison operator to apply
-        value:
-          oneOf:
-            - type: 'null'
-            - type: boolean
-            - type: number
-            - type: string
-            - type: array
-            - type: object
-          description: The value to compare against
-      additionalProperties: false
-      required:
-        - key
-        - op
-        - value
-      title: QueryCondition
-      description: A condition for filtering query results.
-    QueryConditionOp:
-      type: string
-      enum:
-        - eq
-        - ne
-        - gt
-        - lt
-      title: QueryConditionOp
-      description: >-
-        Comparison operators for query conditions.
-    QuerySpansRequest:
-      type: object
-      properties:
-        attribute_filters:
-          type: array
-          items:
-            $ref: '#/components/schemas/QueryCondition'
-          description: >-
-            The attribute filters to apply to the spans.
-        attributes_to_return:
-          type: array
-          items:
-            type: string
-          description: The attributes to return in the spans.
-        max_depth:
-          type: integer
-          description: The maximum depth of the tree.
-      additionalProperties: false
-      required:
-        - attribute_filters
-        - attributes_to_return
-      title: QuerySpansRequest
-    Span:
-      type: object
-      properties:
-        span_id:
-          type: string
-          description: Unique identifier for the span
-        trace_id:
-          type: string
-          description: >-
-            Unique identifier for the trace this span belongs to
-        parent_span_id:
-          type: string
-          description: >-
-            (Optional) Unique identifier for the parent span, if this is a child span
-        name:
-          type: string
-          description: >-
-            Human-readable name describing the operation this span represents
-        start_time:
-          type: string
-          format: date-time
-          description: Timestamp when the operation began
-        end_time:
-          type: string
-          format: date-time
-          description: >-
-            (Optional) Timestamp when the operation finished, if completed
-        attributes:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) Key-value pairs containing additional metadata about the span
-      additionalProperties: false
-      required:
-        - span_id
-        - trace_id
-        - name
-        - start_time
-      title: Span
-      description: >-
-        A span representing a single operation within a trace.
-    QuerySpansResponse:
-      type: object
-      properties:
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/Span'
-          description: >-
-            List of spans matching the query criteria
-      additionalProperties: false
-      required:
-        - data
-      title: QuerySpansResponse
-      description: Response containing a list of spans.
-    SaveSpansToDatasetRequest:
-      type: object
-      properties:
-        attribute_filters:
-          type: array
-          items:
-            $ref: '#/components/schemas/QueryCondition'
-          description: >-
-            The attribute filters to apply to the spans.
-        attributes_to_save:
-          type: array
-          items:
-            type: string
-          description: The attributes to save to the dataset.
-        dataset_id:
-          type: string
-          description: >-
-            The ID of the dataset to save the spans to.
-        max_depth:
-          type: integer
-          description: The maximum depth of the tree.
-      additionalProperties: false
-      required:
-        - attribute_filters
-        - attributes_to_save
-        - dataset_id
-      title: SaveSpansToDatasetRequest
-    GetSpanTreeRequest:
-      type: object
-      properties:
-        attributes_to_return:
-          type: array
-          items:
-            type: string
-          description: The attributes to return in the tree.
-        max_depth:
-          type: integer
-          description: The maximum depth of the tree.
-      additionalProperties: false
-      title: GetSpanTreeRequest
-    SpanWithStatus:
-      type: object
-      properties:
-        span_id:
-          type: string
-          description: Unique identifier for the span
-        trace_id:
-          type: string
-          description: >-
-            Unique identifier for the trace this span belongs to
-        parent_span_id:
-          type: string
-          description: >-
-            (Optional) Unique identifier for the parent span, if this is a child span
-        name:
-          type: string
-          description: >-
-            Human-readable name describing the operation this span represents
-        start_time:
-          type: string
-          format: date-time
-          description: Timestamp when the operation began
-        end_time:
-          type: string
-          format: date-time
-          description: >-
-            (Optional) Timestamp when the operation finished, if completed
-        attributes:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) Key-value pairs containing additional metadata about the span
-        status:
-          $ref: '#/components/schemas/SpanStatus'
-          description: >-
-            (Optional) The current status of the span
-      additionalProperties: false
-      required:
-        - span_id
-        - trace_id
-        - name
-        - start_time
-      title: SpanWithStatus
-      description: A span that includes status information.
-    QuerySpanTreeResponse:
-      type: object
-      properties:
-        data:
-          type: object
-          additionalProperties:
-            $ref: '#/components/schemas/SpanWithStatus'
-          description: >-
-            Dictionary mapping span IDs to spans with status information
-      additionalProperties: false
-      required:
-        - data
-      title: QuerySpanTreeResponse
-      description: >-
-        Response containing a tree structure of spans.
-    QueryTracesRequest:
-      type: object
-      properties:
-        attribute_filters:
-          type: array
-          items:
-            $ref: '#/components/schemas/QueryCondition'
-          description: >-
-            The attribute filters to apply to the traces.
-        limit:
-          type: integer
-          description: The limit of traces to return.
-        offset:
-          type: integer
-          description: The offset of the traces to return.
-        order_by:
-          type: array
-          items:
-            type: string
-          description: The order by of the traces to return.
-      additionalProperties: false
-      title: QueryTracesRequest
-    Trace:
-      type: object
-      properties:
-        trace_id:
-          type: string
-          description: Unique identifier for the trace
-        root_span_id:
-          type: string
-          description: >-
-            Unique identifier for the root span that started this trace
-        start_time:
-          type: string
-          format: date-time
-          description: Timestamp when the trace began
-        end_time:
-          type: string
-          format: date-time
-          description: >-
-            (Optional) Timestamp when the trace finished, if completed
-      additionalProperties: false
-      required:
-        - trace_id
-        - root_span_id
-        - start_time
-      title: Trace
-      description: >-
-        A trace representing the complete execution path of a request across multiple
-        operations.
-    QueryTracesResponse:
-      type: object
-      properties:
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/Trace'
-          description: >-
-            List of traces matching the query criteria
-      additionalProperties: false
-      required:
-        - data
-      title: QueryTracesResponse
-      description: Response containing a list of traces.
   responses:
     BadRequest400:
       description: The request was invalid or malformed
@@ -13881,8 +12942,6 @@ tags:
     description: ''
   - name: SyntheticDataGeneration (Coming Soon)
     description: ''
-  - name: Telemetry
-    description: ''
   - name: ToolGroups
     description: ''
   - name: ToolRuntime
@@ -13912,7 +12971,6 @@ x-tagGroups:
       - ScoringFunctions
       - Shields
       - SyntheticDataGeneration (Coming Soon)
-      - Telemetry
       - ToolGroups
       - ToolRuntime
       - VectorDBs
diff --git a/llama_stack/apis/telemetry/telemetry.py b/llama_stack/apis/telemetry/telemetry.py
index 0e772da6a..b2999ad33 100644
--- a/llama_stack/apis/telemetry/telemetry.py
+++ b/llama_stack/apis/telemetry/telemetry.py
@@ -16,15 +16,12 @@ from typing import (
 
 from pydantic import BaseModel, Field
 
-from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
 from llama_stack.models.llama.datatypes import Primitive
-from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
+from llama_stack.schema_utils import json_schema_type, register_schema
 
 # Add this constant near the top of the file, after the imports
 DEFAULT_TTL_DAYS = 7
 
-REQUIRED_SCOPE = "telemetry.read"
-
 
 @json_schema_type
 class SpanStatus(Enum):
@@ -413,7 +410,6 @@ class QueryMetricsResponse(BaseModel):
 
 @runtime_checkable
 class Telemetry(Protocol):
-    @webmethod(route="/telemetry/events", method="POST", level=LLAMA_STACK_API_V1)
     async def log_event(
         self,
         event: Event,
@@ -426,14 +422,6 @@ class Telemetry(Protocol):
         """
         ...
 
-    @webmethod(
-        route="/telemetry/traces",
-        method="POST",
-        required_scope=REQUIRED_SCOPE,
-        deprecated=True,
-        level=LLAMA_STACK_API_V1,
-    )
-    @webmethod(route="/telemetry/traces", method="POST", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1ALPHA)
     async def query_traces(
         self,
         attribute_filters: list[QueryCondition] | None = None,
@@ -451,19 +439,6 @@ class Telemetry(Protocol):
         """
         ...
 
-    @webmethod(
-        route="/telemetry/traces/{trace_id:path}",
-        method="GET",
-        required_scope=REQUIRED_SCOPE,
-        deprecated=True,
-        level=LLAMA_STACK_API_V1,
-    )
-    @webmethod(
-        route="/telemetry/traces/{trace_id:path}",
-        method="GET",
-        required_scope=REQUIRED_SCOPE,
-        level=LLAMA_STACK_API_V1ALPHA,
-    )
     async def get_trace(self, trace_id: str) -> Trace:
         """Get a trace by its ID.
 
@@ -472,19 +447,6 @@ class Telemetry(Protocol):
         """
         ...
 
-    @webmethod(
-        route="/telemetry/traces/{trace_id:path}/spans/{span_id:path}",
-        method="GET",
-        required_scope=REQUIRED_SCOPE,
-        deprecated=True,
-        level=LLAMA_STACK_API_V1,
-    )
-    @webmethod(
-        route="/telemetry/traces/{trace_id:path}/spans/{span_id:path}",
-        method="GET",
-        required_scope=REQUIRED_SCOPE,
-        level=LLAMA_STACK_API_V1ALPHA,
-    )
     async def get_span(self, trace_id: str, span_id: str) -> Span:
         """Get a span by its ID.
 
@@ -494,19 +456,6 @@ class Telemetry(Protocol):
         """
         ...
 
-    @webmethod(
-        route="/telemetry/spans/{span_id:path}/tree",
-        method="POST",
-        deprecated=True,
-        required_scope=REQUIRED_SCOPE,
-        level=LLAMA_STACK_API_V1,
-    )
-    @webmethod(
-        route="/telemetry/spans/{span_id:path}/tree",
-        method="POST",
-        required_scope=REQUIRED_SCOPE,
-        level=LLAMA_STACK_API_V1ALPHA,
-    )
     async def get_span_tree(
         self,
         span_id: str,
@@ -522,14 +471,6 @@ class Telemetry(Protocol):
         """
         ...
 
-    @webmethod(
-        route="/telemetry/spans",
-        method="POST",
-        required_scope=REQUIRED_SCOPE,
-        deprecated=True,
-        level=LLAMA_STACK_API_V1,
-    )
-    @webmethod(route="/telemetry/spans", method="POST", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1ALPHA)
     async def query_spans(
         self,
         attribute_filters: list[QueryCondition],
@@ -545,8 +486,6 @@ class Telemetry(Protocol):
         """
         ...
 
-    @webmethod(route="/telemetry/spans/export", method="POST", deprecated=True, level=LLAMA_STACK_API_V1)
-    @webmethod(route="/telemetry/spans/export", method="POST", level=LLAMA_STACK_API_V1ALPHA)
     async def save_spans_to_dataset(
         self,
         attribute_filters: list[QueryCondition],
@@ -563,19 +502,6 @@ class Telemetry(Protocol):
         """
         ...
 
-    @webmethod(
-        route="/telemetry/metrics/{metric_name}",
-        method="POST",
-        required_scope=REQUIRED_SCOPE,
-        deprecated=True,
-        level=LLAMA_STACK_API_V1,
-    )
-    @webmethod(
-        route="/telemetry/metrics/{metric_name}",
-        method="POST",
-        required_scope=REQUIRED_SCOPE,
-        level=LLAMA_STACK_API_V1ALPHA,
-    )
     async def query_metrics(
         self,
         metric_name: str,