From dc4665af179e83b8f93d5e3a004e5751761e55a5 Mon Sep 17 00:00:00 2001
From: Ken Dreyer <kdreyer@redhat.com>
Date: Fri, 21 Nov 2025 09:48:05 -0500
Subject: [PATCH 01/17] feat!: change bedrock bearer token env variable to
 match AWS docs & boto3 convention (#4152)

Rename `AWS_BEDROCK_API_KEY` to `AWS_BEARER_TOKEN_BEDROCK` to align with
the naming convention used in AWS Bedrock documentation and the AWS web
console UI. This reduces confusion when developers compare LLS docs with
AWS docs.

Closes #4147
---
 docs/docs/providers/inference/remote_bedrock.mdx            | 2 +-
 .../distributions/ci-tests/run-with-postgres-store.yaml     | 2 +-
 src/llama_stack/distributions/ci-tests/run.yaml             | 2 +-
 .../distributions/starter-gpu/run-with-postgres-store.yaml  | 2 +-
 src/llama_stack/distributions/starter-gpu/run.yaml          | 2 +-
 .../distributions/starter/run-with-postgres-store.yaml      | 2 +-
 src/llama_stack/distributions/starter/run.yaml              | 2 +-
 .../providers/remote/inference/bedrock/bedrock.py           | 4 ++--
 .../providers/remote/inference/bedrock/config.py            | 6 +++---
 tests/unit/providers/inference/test_bedrock_adapter.py      | 4 ++--
 tests/unit/providers/inference/test_bedrock_config.py       | 4 ++--
 11 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/docs/docs/providers/inference/remote_bedrock.mdx b/docs/docs/providers/inference/remote_bedrock.mdx
index 86bef3000..0b36ea01a 100644
--- a/docs/docs/providers/inference/remote_bedrock.mdx
+++ b/docs/docs/providers/inference/remote_bedrock.mdx
@@ -22,6 +22,6 @@ AWS Bedrock inference provider using OpenAI compatible endpoint.
 ## Sample Configuration
 
 ```yaml
-api_key: ${env.AWS_BEDROCK_API_KEY:=}
+api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
 region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
 ```
diff --git a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml
index d942c23a4..7721138c7 100644
--- a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml
+++ b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml
@@ -47,7 +47,7 @@ providers:
   - provider_id: bedrock
     provider_type: remote::bedrock
     config:
-      api_key: ${env.AWS_BEDROCK_API_KEY:=}
+      api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
       region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
   - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
     provider_type: remote::nvidia
diff --git a/src/llama_stack/distributions/ci-tests/run.yaml b/src/llama_stack/distributions/ci-tests/run.yaml
index 8b1cd2bb2..b791e1488 100644
--- a/src/llama_stack/distributions/ci-tests/run.yaml
+++ b/src/llama_stack/distributions/ci-tests/run.yaml
@@ -47,7 +47,7 @@ providers:
   - provider_id: bedrock
     provider_type: remote::bedrock
     config:
-      api_key: ${env.AWS_BEDROCK_API_KEY:=}
+      api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
       region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
   - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
     provider_type: remote::nvidia
diff --git a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml
index 75cc9d188..9c250c05a 100644
--- a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml
+++ b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml
@@ -47,7 +47,7 @@ providers:
   - provider_id: bedrock
     provider_type: remote::bedrock
     config:
-      api_key: ${env.AWS_BEDROCK_API_KEY:=}
+      api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
       region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
   - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
     provider_type: remote::nvidia
diff --git a/src/llama_stack/distributions/starter-gpu/run.yaml b/src/llama_stack/distributions/starter-gpu/run.yaml
index 09c7be5a1..65f9ae326 100644
--- a/src/llama_stack/distributions/starter-gpu/run.yaml
+++ b/src/llama_stack/distributions/starter-gpu/run.yaml
@@ -47,7 +47,7 @@ providers:
   - provider_id: bedrock
     provider_type: remote::bedrock
     config:
-      api_key: ${env.AWS_BEDROCK_API_KEY:=}
+      api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
       region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
   - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
     provider_type: remote::nvidia
diff --git a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
index f59c809d2..3314bb9e9 100644
--- a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
+++ b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
@@ -47,7 +47,7 @@ providers:
   - provider_id: bedrock
     provider_type: remote::bedrock
     config:
-      api_key: ${env.AWS_BEDROCK_API_KEY:=}
+      api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
       region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
   - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
     provider_type: remote::nvidia
diff --git a/src/llama_stack/distributions/starter/run.yaml b/src/llama_stack/distributions/starter/run.yaml
index 435bb22a7..e88539e6a 100644
--- a/src/llama_stack/distributions/starter/run.yaml
+++ b/src/llama_stack/distributions/starter/run.yaml
@@ -47,7 +47,7 @@ providers:
   - provider_id: bedrock
     provider_type: remote::bedrock
     config:
-      api_key: ${env.AWS_BEDROCK_API_KEY:=}
+      api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
       region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
   - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
     provider_type: remote::nvidia
diff --git a/src/llama_stack/providers/remote/inference/bedrock/bedrock.py b/src/llama_stack/providers/remote/inference/bedrock/bedrock.py
index 70ee95916..451549db8 100644
--- a/src/llama_stack/providers/remote/inference/bedrock/bedrock.py
+++ b/src/llama_stack/providers/remote/inference/bedrock/bedrock.py
@@ -37,7 +37,7 @@ class BedrockInferenceAdapter(OpenAIMixin):
     """
 
     config: BedrockConfig
-    provider_data_api_key_field: str = "aws_bedrock_api_key"
+    provider_data_api_key_field: str = "aws_bearer_token_bedrock"
 
     def get_base_url(self) -> str:
         """Get base URL for OpenAI client."""
@@ -111,7 +111,7 @@ class BedrockInferenceAdapter(OpenAIMixin):
                 logger.error(f"AWS Bedrock authentication token expired: {error_msg}")
                 raise ValueError(
                     "AWS Bedrock authentication failed: Bearer token has expired. "
-                    "The AWS_BEDROCK_API_KEY environment variable contains an expired pre-signed URL. "
+                    "The AWS_BEARER_TOKEN_BEDROCK environment variable contains an expired pre-signed URL. "
                     "Please refresh your token by generating a new pre-signed URL with AWS credentials. "
                     "Refer to AWS Bedrock documentation for details on OpenAI-compatible endpoints."
                 ) from e
diff --git a/src/llama_stack/providers/remote/inference/bedrock/config.py b/src/llama_stack/providers/remote/inference/bedrock/config.py
index 631a6e7ef..f31db63aa 100644
--- a/src/llama_stack/providers/remote/inference/bedrock/config.py
+++ b/src/llama_stack/providers/remote/inference/bedrock/config.py
@@ -12,9 +12,9 @@ from llama_stack.providers.utils.inference.model_registry import RemoteInference
 
 
 class BedrockProviderDataValidator(BaseModel):
-    aws_bedrock_api_key: str | None = Field(
+    aws_bearer_token_bedrock: str | None = Field(
         default=None,
-        description="API key for Amazon Bedrock",
+        description="API Key (Bearer token) for Amazon Bedrock",
     )
 
 
@@ -27,6 +27,6 @@ class BedrockConfig(RemoteInferenceProviderConfig):
     @classmethod
     def sample_run_config(cls, **kwargs):
         return {
-            "api_key": "${env.AWS_BEDROCK_API_KEY:=}",
+            "api_key": "${env.AWS_BEARER_TOKEN_BEDROCK:=}",
             "region_name": "${env.AWS_DEFAULT_REGION:=us-east-2}",
         }
diff --git a/tests/unit/providers/inference/test_bedrock_adapter.py b/tests/unit/providers/inference/test_bedrock_adapter.py
index a20f2860a..2a1ca769b 100644
--- a/tests/unit/providers/inference/test_bedrock_adapter.py
+++ b/tests/unit/providers/inference/test_bedrock_adapter.py
@@ -40,8 +40,8 @@ def test_api_key_from_header_overrides_config():
     """Test API key from request header overrides config via client property"""
     config = BedrockConfig(api_key="config-key", region_name="us-east-1")
     adapter = BedrockInferenceAdapter(config=config)
-    adapter.provider_data_api_key_field = "aws_bedrock_api_key"
-    adapter.get_request_provider_data = MagicMock(return_value=SimpleNamespace(aws_bedrock_api_key="header-key"))
+    adapter.provider_data_api_key_field = "aws_bearer_token_bedrock"
+    adapter.get_request_provider_data = MagicMock(return_value=SimpleNamespace(aws_bearer_token_bedrock="header-key"))
 
     # The client property is where header override happens (in OpenAIMixin)
     assert adapter.client.api_key == "header-key"
diff --git a/tests/unit/providers/inference/test_bedrock_config.py b/tests/unit/providers/inference/test_bedrock_config.py
index 4c1fd56a2..622080426 100644
--- a/tests/unit/providers/inference/test_bedrock_config.py
+++ b/tests/unit/providers/inference/test_bedrock_config.py
@@ -9,7 +9,7 @@ from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
 
 def test_bedrock_config_defaults_no_env(monkeypatch):
     """Test BedrockConfig defaults when env vars are not set"""
-    monkeypatch.delenv("AWS_BEDROCK_API_KEY", raising=False)
+    monkeypatch.delenv("AWS_BEARER_TOKEN_BEDROCK", raising=False)
     monkeypatch.delenv("AWS_DEFAULT_REGION", raising=False)
     config = BedrockConfig()
     assert config.auth_credential is None
@@ -35,5 +35,5 @@ def test_bedrock_config_sample():
     sample = BedrockConfig.sample_run_config()
     assert "api_key" in sample
     assert "region_name" in sample
-    assert sample["api_key"] == "${env.AWS_BEDROCK_API_KEY:=}"
+    assert sample["api_key"] == "${env.AWS_BEARER_TOKEN_BEDROCK:=}"
     assert sample["region_name"] == "${env.AWS_DEFAULT_REGION:=us-east-2}"

From 74dceb30da601fac809f7d9d04d83c66c1aac7d2 Mon Sep 17 00:00:00 2001
From: raghotham <rsm@meta.com>
Date: Sat, 22 Nov 2025 00:30:36 +0530
Subject: [PATCH 02/17] chore: Add @cdoern as a code owner (#4209)

We went through the nomination process for CODEOWNERS in the codeowners
discord channel.

Welcome to the code owners group @cdoern! Thanks for your contributions
and we look forward to working with you!
---
 .github/CODEOWNERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 418d3113a..75636525e 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -2,4 +2,4 @@
 
 # These owners will be the default owners for everything in
 # the repo. Unless a later match takes precedence,
-* @ashwinb @raghotham @ehhuang @leseb @bbrowning @mattf @franciscojavierarceo
+* @ashwinb @raghotham @ehhuang @leseb @bbrowning @mattf @franciscojavierarceo @cdoern

From dabebdd2303af1401c5dd9d92654b530c4db5050 Mon Sep 17 00:00:00 2001
From: Ken Dreyer <kdreyer@redhat.com>
Date: Fri, 21 Nov 2025 16:12:01 -0500
Subject: [PATCH 03/17] fix: update hard-coded google model names (#4212)

# What does this PR do?
When we send the model names to Google's openai API, we must use the
"google" name prefix. Google does not recognize the "vertexai" model
names.

Closes #4211

## Test Plan
```bash
uv venv --python python312
. .venv/bin/activate
llama stack list-deps starter | xargs -L1 uv pip install
llama stack run starter
```

Test that this shows the gemini models with their correct names:
```bash
curl http://127.0.0.1:8321/v1/models | jq '.data | map(select(.custom_metadata.provider_id == "vertexai"))'
```

Test that this chat completion works:
```bash
curl -X POST   -H "Content-Type: application/json"   "http://127.0.0.1:8321/v1/chat/completions"   -d '{
        "model": "vertexai/google/gemini-2.5-flash",
        "messages": [
          {
            "role": "system",
            "content": "You are a helpful assistant."
          },
          {
            "role": "user",
            "content": "Hello! Can you tell me a joke?"
          }
        ],
        "temperature": 1.0,
        "max_tokens": 256
      }'
```
---
 src/llama_stack/providers/remote/inference/vertexai/vertexai.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/llama_stack/providers/remote/inference/vertexai/vertexai.py b/src/llama_stack/providers/remote/inference/vertexai/vertexai.py
index b91430fd0..7941f8c89 100644
--- a/src/llama_stack/providers/remote/inference/vertexai/vertexai.py
+++ b/src/llama_stack/providers/remote/inference/vertexai/vertexai.py
@@ -51,4 +51,4 @@ class VertexAIInferenceAdapter(OpenAIMixin):
 
         :return: An iterable of model IDs
         """
-        return ["vertexai/gemini-2.0-flash", "vertexai/gemini-2.5-flash", "vertexai/gemini-2.5-pro"]
+        return ["google/gemini-2.0-flash", "google/gemini-2.5-flash", "google/gemini-2.5-pro"]

From 3434c92a1446cf5eee8147541b030bbbe32c7823 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Sun, 23 Nov 2025 22:32:58 -0500
Subject: [PATCH 04/17] chore(github-deps): bump actions/setup-node from 4.1.0
 to 6.0.0 (#4216)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [actions/setup-node](https://github.com/actions/setup-node) from
4.1.0 to 6.0.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/actions/setup-node/releases">actions/setup-node's
releases</a>.</em></p>
<blockquote>
<h2>v6.0.0</h2>
<h2>What's Changed</h2>
<p><strong>Breaking Changes</strong></p>
<ul>
<li>Limit automatic caching to npm, update workflows and documentation
by <a
href="https://github.com/priyagupta108"><code>@​priyagupta108</code></a>
in <a
href="https://redirect.github.com/actions/setup-node/pull/1374">actions/setup-node#1374</a></li>
</ul>
<p><strong>Dependency Upgrades</strong></p>
<ul>
<li>Upgrade ts-jest from 29.1.2 to 29.4.1 and document breaking changes
in v5 by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a>[bot]
in <a
href="https://redirect.github.com/actions/setup-node/pull/1336">#1336</a></li>
<li>Upgrade prettier from 2.8.8 to 3.6.2 by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a>[bot]
in <a
href="https://redirect.github.com/actions/setup-node/pull/1334">#1334</a></li>
<li>Upgrade actions/publish-action from 0.3.0 to 0.4.0 by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a>[bot]
in <a
href="https://redirect.github.com/actions/setup-node/pull/1362">#1362</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/actions/setup-node/compare/v5...v6.0.0">https://github.com/actions/setup-node/compare/v5...v6.0.0</a></p>
<h2>v5.0.0</h2>
<h2>What's Changed</h2>
<h3>Breaking Changes</h3>
<ul>
<li>Enhance caching in setup-node with automatic package manager
detection by <a
href="https://github.com/priya-kinthali"><code>@​priya-kinthali</code></a>
in <a
href="https://redirect.github.com/actions/setup-node/pull/1348">actions/setup-node#1348</a></li>
</ul>
<p>This update, introduces automatic caching when a valid
<code>packageManager</code> field is present in your
<code>package.json</code>. This aims to improve workflow performance and
make dependency management more seamless.
To disable this automatic caching, set <code>package-manager-cache:
false</code></p>
<pre lang="yaml"><code>steps:
- uses: actions/checkout@v5
- uses: actions/setup-node@v5
  with:
    package-manager-cache: false
</code></pre>
<ul>
<li>Upgrade action to use node24 by <a
href="https://github.com/salmanmkc"><code>@​salmanmkc</code></a> in <a
href="https://redirect.github.com/actions/setup-node/pull/1325">actions/setup-node#1325</a></li>
</ul>
<p>Make sure your runner is on version v2.327.1 or later to ensure
compatibility with this release. <a
href="https://github.com/actions/runner/releases/tag/v2.327.1">See
Release Notes</a></p>
<h3>Dependency Upgrades</h3>
<ul>
<li>Upgrade <code>@​octokit/request-error</code> and
<code>@​actions/github</code> by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a>[bot]
in <a
href="https://redirect.github.com/actions/setup-node/pull/1227">actions/setup-node#1227</a></li>
<li>Upgrade uuid from 9.0.1 to 11.1.0 by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a>[bot]
in <a
href="https://redirect.github.com/actions/setup-node/pull/1273">actions/setup-node#1273</a></li>
<li>Upgrade undici from 5.28.5 to 5.29.0 by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a>[bot]
in <a
href="https://redirect.github.com/actions/setup-node/pull/1295">actions/setup-node#1295</a></li>
<li>Upgrade form-data to bring in fix for critical vulnerability by <a
href="https://github.com/gowridurgad"><code>@​gowridurgad</code></a> in
<a
href="https://redirect.github.com/actions/setup-node/pull/1332">actions/setup-node#1332</a></li>
<li>Upgrade actions/checkout from 4 to 5 by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a>[bot]
in <a
href="https://redirect.github.com/actions/setup-node/pull/1345">actions/setup-node#1345</a></li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a
href="https://github.com/priya-kinthali"><code>@​priya-kinthali</code></a>
made their first contribution in <a
href="https://redirect.github.com/actions/setup-node/pull/1348">actions/setup-node#1348</a></li>
<li><a href="https://github.com/salmanmkc"><code>@​salmanmkc</code></a>
made their first contribution in <a
href="https://redirect.github.com/actions/setup-node/pull/1325">actions/setup-node#1325</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/actions/setup-node/compare/v4...v5.0.0">https://github.com/actions/setup-node/compare/v4...v5.0.0</a></p>
<h2>v4.4.0</h2>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/actions/setup-node/commit/2028fbc5c25fe9cf00d9f06a71cc4710d4507903"><code>2028fbc</code></a>
Limit automatic caching to npm, update workflows and documentation (<a
href="https://redirect.github.com/actions/setup-node/issues/1374">#1374</a>)</li>
<li><a
href="https://github.com/actions/setup-node/commit/13427813f706a0f6c9b74603b31103c40ab1c35a"><code>1342781</code></a>
Bump actions/publish-action from 0.3.0 to 0.4.0 (<a
href="https://redirect.github.com/actions/setup-node/issues/1362">#1362</a>)</li>
<li><a
href="https://github.com/actions/setup-node/commit/89d709d423dc495668cd762a18dd4a070611be3f"><code>89d709d</code></a>
Bump prettier from 2.8.8 to 3.6.2 (<a
href="https://redirect.github.com/actions/setup-node/issues/1334">#1334</a>)</li>
<li><a
href="https://github.com/actions/setup-node/commit/cd2651c46231bc0d6f48d6b34433b845331235fe"><code>cd2651c</code></a>
Bump ts-jest from 29.1.2 to 29.4.1 (<a
href="https://redirect.github.com/actions/setup-node/issues/1336">#1336</a>)</li>
<li><a
href="https://github.com/actions/setup-node/commit/a0853c24544627f65ddf259abe73b1d18a591444"><code>a0853c2</code></a>
Bump actions/checkout from 4 to 5 (<a
href="https://redirect.github.com/actions/setup-node/issues/1345">#1345</a>)</li>
<li><a
href="https://github.com/actions/setup-node/commit/b7234cc9fe124f0f4932554b4e5284543083ae7b"><code>b7234cc</code></a>
Upgrade action to use node24 (<a
href="https://redirect.github.com/actions/setup-node/issues/1325">#1325</a>)</li>
<li><a
href="https://github.com/actions/setup-node/commit/d7a11313b581b306c961b506cfc8971208bb03f6"><code>d7a1131</code></a>
Enhance caching in setup-node with automatic package manager detection
(<a
href="https://redirect.github.com/actions/setup-node/issues/1348">#1348</a>)</li>
<li><a
href="https://github.com/actions/setup-node/commit/5e2628c959b9ade56971c0afcebbe5332d44b398"><code>5e2628c</code></a>
Bumps form-data (<a
href="https://redirect.github.com/actions/setup-node/issues/1332">#1332</a>)</li>
<li><a
href="https://github.com/actions/setup-node/commit/65beceff8e91358525397bdce9103d999507ab03"><code>65becef</code></a>
Bump undici from 5.28.5 to 5.29.0 (<a
href="https://redirect.github.com/actions/setup-node/issues/1295">#1295</a>)</li>
<li><a
href="https://github.com/actions/setup-node/commit/7e24a656e1c7a0d6f3eaef8d8e84ae379a5b035b"><code>7e24a65</code></a>
Bump uuid from 9.0.1 to 11.1.0 (<a
href="https://redirect.github.com/actions/setup-node/issues/1273">#1273</a>)</li>
<li>Additional commits viewable in <a
href="https://github.com/actions/setup-node/compare/v4.1.0...2028fbc5c25fe9cf00d9f06a71cc4710d4507903">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/setup-node&package-manager=github_actions&previous-version=4.1.0&new-version=6.0.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/integration-tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml
index 8073f6a15..721c3b5a0 100644
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@@ -95,7 +95,7 @@ jobs:
 
       - name: Setup Node.js for TypeScript client tests
         if: ${{ matrix.client == 'server' }}
-        uses: actions/setup-node@39370e3970a6d050c480ffad4ff0ed4d3fdee5af # v4.1.0
+        uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0
         with:
           node-version: '20'
           cache: 'npm'

From e86cf2c1534c2d45a3a7d9192510ee7c60a0ea6d Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 24 Nov 2025 09:32:41 -0800
Subject: [PATCH 05/17] chore(github-deps): bump actions/checkout from 5.0.0 to
 6.0.0 (#4217)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [actions/checkout](https://github.com/actions/checkout) from 5.0.0
to 6.0.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/actions/checkout/releases">actions/checkout's
releases</a>.</em></p>
<blockquote>
<h2>v6.0.0</h2>
<h2>What's Changed</h2>
<ul>
<li>Update README to include Node.js 24 support details and requirements
by <a href="https://github.com/salmanmkc"><code>@​salmanmkc</code></a>
in <a
href="https://redirect.github.com/actions/checkout/pull/2248">actions/checkout#2248</a></li>
<li>Persist creds to a separate file by <a
href="https://github.com/ericsciple"><code>@​ericsciple</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/2286">actions/checkout#2286</a></li>
<li>v6-beta by <a
href="https://github.com/ericsciple"><code>@​ericsciple</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/2298">actions/checkout#2298</a></li>
<li>update readme/changelog for v6 by <a
href="https://github.com/ericsciple"><code>@​ericsciple</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/2311">actions/checkout#2311</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/actions/checkout/compare/v5.0.0...v6.0.0">https://github.com/actions/checkout/compare/v5.0.0...v6.0.0</a></p>
<h2>v6-beta</h2>
<h2>What's Changed</h2>
<p>Updated persist-credentials to store the credentials under
<code>$RUNNER_TEMP</code> instead of directly in the local git
config.</p>
<p>This requires a minimum Actions Runner version of <a
href="https://github.com/actions/runner/releases/tag/v2.329.0">v2.329.0</a>
to access the persisted credentials for <a
href="https://docs.github.com/en/actions/tutorials/use-containerized-services/create-a-docker-container-action">Docker
container action</a> scenarios.</p>
<h2>v5.0.1</h2>
<h2>What's Changed</h2>
<ul>
<li>Port v6 cleanup to v5 by <a
href="https://github.com/ericsciple"><code>@​ericsciple</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/2301">actions/checkout#2301</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/actions/checkout/compare/v5...v5.0.1">https://github.com/actions/checkout/compare/v5...v5.0.1</a></p>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a
href="https://github.com/actions/checkout/blob/main/CHANGELOG.md">actions/checkout's
changelog</a>.</em></p>
<blockquote>
<h1>Changelog</h1>
<h2>V6.0.0</h2>
<ul>
<li>Persist creds to a separate file by <a
href="https://github.com/ericsciple"><code>@​ericsciple</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/2286">actions/checkout#2286</a></li>
<li>Update README to include Node.js 24 support details and requirements
by <a href="https://github.com/salmanmkc"><code>@​salmanmkc</code></a>
in <a
href="https://redirect.github.com/actions/checkout/pull/2248">actions/checkout#2248</a></li>
</ul>
<h2>V5.0.1</h2>
<ul>
<li>Port v6 cleanup to v5 by <a
href="https://github.com/ericsciple"><code>@​ericsciple</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/2301">actions/checkout#2301</a></li>
</ul>
<h2>V5.0.0</h2>
<ul>
<li>Update actions checkout to use node 24 by <a
href="https://github.com/salmanmkc"><code>@​salmanmkc</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/2226">actions/checkout#2226</a></li>
</ul>
<h2>V4.3.1</h2>
<ul>
<li>Port v6 cleanup to v4 by <a
href="https://github.com/ericsciple"><code>@​ericsciple</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/2305">actions/checkout#2305</a></li>
</ul>
<h2>V4.3.0</h2>
<ul>
<li>docs: update README.md by <a
href="https://github.com/motss"><code>@​motss</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/1971">actions/checkout#1971</a></li>
<li>Add internal repos for checking out multiple repositories by <a
href="https://github.com/mouismail"><code>@​mouismail</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/1977">actions/checkout#1977</a></li>
<li>Documentation update - add recommended permissions to Readme by <a
href="https://github.com/benwells"><code>@​benwells</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/2043">actions/checkout#2043</a></li>
<li>Adjust positioning of user email note and permissions heading by <a
href="https://github.com/joshmgross"><code>@​joshmgross</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/2044">actions/checkout#2044</a></li>
<li>Update README.md by <a
href="https://github.com/nebuk89"><code>@​nebuk89</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/2194">actions/checkout#2194</a></li>
<li>Update CODEOWNERS for actions by <a
href="https://github.com/TingluoHuang"><code>@​TingluoHuang</code></a>
in <a
href="https://redirect.github.com/actions/checkout/pull/2224">actions/checkout#2224</a></li>
<li>Update package dependencies by <a
href="https://github.com/salmanmkc"><code>@​salmanmkc</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/2236">actions/checkout#2236</a></li>
</ul>
<h2>v4.2.2</h2>
<ul>
<li><code>url-helper.ts</code> now leverages well-known environment
variables by <a href="https://github.com/jww3"><code>@​jww3</code></a>
in <a
href="https://redirect.github.com/actions/checkout/pull/1941">actions/checkout#1941</a></li>
<li>Expand unit test coverage for <code>isGhes</code> by <a
href="https://github.com/jww3"><code>@​jww3</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/1946">actions/checkout#1946</a></li>
</ul>
<h2>v4.2.1</h2>
<ul>
<li>Check out other refs/* by commit if provided, fall back to ref by <a
href="https://github.com/orhantoy"><code>@​orhantoy</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/1924">actions/checkout#1924</a></li>
</ul>
<h2>v4.2.0</h2>
<ul>
<li>Add Ref and Commit outputs by <a
href="https://github.com/lucacome"><code>@​lucacome</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/1180">actions/checkout#1180</a></li>
<li>Dependency updates by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a>- <a
href="https://redirect.github.com/actions/checkout/pull/1777">actions/checkout#1777</a>,
<a
href="https://redirect.github.com/actions/checkout/pull/1872">actions/checkout#1872</a></li>
</ul>
<h2>v4.1.7</h2>
<ul>
<li>Bump the minor-npm-dependencies group across 1 directory with 4
updates by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/1739">actions/checkout#1739</a></li>
<li>Bump actions/checkout from 3 to 4 by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/1697">actions/checkout#1697</a></li>
<li>Check out other refs/* by commit by <a
href="https://github.com/orhantoy"><code>@​orhantoy</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/1774">actions/checkout#1774</a></li>
<li>Pin actions/checkout's own workflows to a known, good, stable
version. by <a href="https://github.com/jww3"><code>@​jww3</code></a> in
<a
href="https://redirect.github.com/actions/checkout/pull/1776">actions/checkout#1776</a></li>
</ul>
<h2>v4.1.6</h2>
<ul>
<li>Check platform to set archive extension appropriately by <a
href="https://github.com/cory-miller"><code>@​cory-miller</code></a> in
<a
href="https://redirect.github.com/actions/checkout/pull/1732">actions/checkout#1732</a></li>
</ul>
<h2>v4.1.5</h2>
<ul>
<li>Update NPM dependencies by <a
href="https://github.com/cory-miller"><code>@​cory-miller</code></a> in
<a
href="https://redirect.github.com/actions/checkout/pull/1703">actions/checkout#1703</a></li>
<li>Bump github/codeql-action from 2 to 3 by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/1694">actions/checkout#1694</a></li>
<li>Bump actions/setup-node from 1 to 4 by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/1696">actions/checkout#1696</a></li>
<li>Bump actions/upload-artifact from 2 to 4 by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/1695">actions/checkout#1695</a></li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/actions/checkout/commit/1af3b93b6815bc44a9784bd300feb67ff0d1eeb3"><code>1af3b93</code></a>
update readme/changelog for v6 (<a
href="https://redirect.github.com/actions/checkout/issues/2311">#2311</a>)</li>
<li><a
href="https://github.com/actions/checkout/commit/71cf2267d89c5cb81562390fa70a37fa40b1305e"><code>71cf226</code></a>
v6-beta (<a
href="https://redirect.github.com/actions/checkout/issues/2298">#2298</a>)</li>
<li><a
href="https://github.com/actions/checkout/commit/069c6959146423d11cd0184e6accf28f9d45f06e"><code>069c695</code></a>
Persist creds to a separate file (<a
href="https://redirect.github.com/actions/checkout/issues/2286">#2286</a>)</li>
<li><a
href="https://github.com/actions/checkout/commit/ff7abcd0c3c05ccf6adc123a8cd1fd4fb30fb493"><code>ff7abcd</code></a>
Update README to include Node.js 24 support details and requirements (<a
href="https://redirect.github.com/actions/checkout/issues/2248">#2248</a>)</li>
<li>See full diff in <a
href="https://github.com/actions/checkout/compare/08c6903cd8c0fde910a37f88322edcfb5dd907a8...1af3b93b6815bc44a9784bd300feb67ff0d1eeb3">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/checkout&package-manager=github_actions&previous-version=5.0.0&new-version=6.0.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/backward-compat.yml               |  8 ++++----
 .github/workflows/changelog.yml                     |  2 +-
 .github/workflows/conformance.yml                   |  4 ++--
 .github/workflows/install-script-ci.yml             |  4 ++--
 .github/workflows/integration-auth-tests.yml        |  2 +-
 .github/workflows/integration-sql-store-tests.yml   |  2 +-
 .github/workflows/integration-tests.yml             |  4 ++--
 .github/workflows/integration-vector-io-tests.yml   |  2 +-
 .github/workflows/pre-commit.yml                    |  2 +-
 .github/workflows/providers-build.yml               | 10 +++++-----
 .github/workflows/providers-list-deps.yml           |  8 ++++----
 .github/workflows/python-build-test.yml             |  2 +-
 .github/workflows/record-integration-tests.yml      |  2 +-
 .github/workflows/stainless-builds.yml              |  4 ++--
 .github/workflows/test-external-provider-module.yml |  2 +-
 .github/workflows/test-external.yml                 |  2 +-
 .github/workflows/ui-unit-tests.yml                 |  2 +-
 .github/workflows/unit-tests.yml                    |  2 +-
 18 files changed, 32 insertions(+), 32 deletions(-)

diff --git a/.github/workflows/backward-compat.yml b/.github/workflows/backward-compat.yml
index 9f950a8b9..190b4cee4 100644
--- a/.github/workflows/backward-compat.yml
+++ b/.github/workflows/backward-compat.yml
@@ -27,7 +27,7 @@ jobs:
 
     steps:
       - name: Checkout PR branch
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
         with:
           fetch-depth: 0  # Need full history to access main branch
 
@@ -151,7 +151,7 @@ jobs:
 
     steps:
       - name: Checkout PR branch
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
         with:
           fetch-depth: 0
 
@@ -236,7 +236,7 @@ jobs:
 
     steps:
       - name: Checkout PR branch
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
         with:
           fetch-depth: 0
 
@@ -405,7 +405,7 @@ jobs:
 
     steps:
       - name: Checkout PR branch
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
         with:
           fetch-depth: 0
 
diff --git a/.github/workflows/changelog.yml b/.github/workflows/changelog.yml
index 7a75d85f6..4b5132d8e 100644
--- a/.github/workflows/changelog.yml
+++ b/.github/workflows/changelog.yml
@@ -17,7 +17,7 @@ jobs:
       pull-requests: write  # for peter-evans/create-pull-request to create a PR
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+      - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
         with:
           ref: main
           fetch-depth: 0
diff --git a/.github/workflows/conformance.yml b/.github/workflows/conformance.yml
index 73e9678b2..466337232 100644
--- a/.github/workflows/conformance.yml
+++ b/.github/workflows/conformance.yml
@@ -35,7 +35,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout PR Code
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
         with:
           fetch-depth: 0
 
@@ -59,7 +59,7 @@ jobs:
       # This allows us to diff the current changes against the previous state
       - name: Checkout Base Branch
         if: steps.skip-check.outputs.skip != 'true'
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
         with:
           ref: ${{ github.event.pull_request.base.ref }}
           path: 'base'
diff --git a/.github/workflows/install-script-ci.yml b/.github/workflows/install-script-ci.yml
index bbdaefb50..2af2512cd 100644
--- a/.github/workflows/install-script-ci.yml
+++ b/.github/workflows/install-script-ci.yml
@@ -16,14 +16,14 @@ jobs:
   lint:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # 5.0.0
+      - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # 6.0.0
       - name: Run ShellCheck on install.sh
         run: shellcheck scripts/install.sh
   smoke-test-on-dev:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout repository
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
 
       - name: Install dependencies
         uses: ./.github/actions/setup-runner
diff --git a/.github/workflows/integration-auth-tests.yml b/.github/workflows/integration-auth-tests.yml
index 1ec06bc29..626eb245b 100644
--- a/.github/workflows/integration-auth-tests.yml
+++ b/.github/workflows/integration-auth-tests.yml
@@ -35,7 +35,7 @@ jobs:
 
     steps:
       - name: Checkout repository
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
 
       - name: Install dependencies
         uses: ./.github/actions/setup-runner
diff --git a/.github/workflows/integration-sql-store-tests.yml b/.github/workflows/integration-sql-store-tests.yml
index 8c3e51dd4..c5a7d13f9 100644
--- a/.github/workflows/integration-sql-store-tests.yml
+++ b/.github/workflows/integration-sql-store-tests.yml
@@ -48,7 +48,7 @@ jobs:
 
     steps:
       - name: Checkout repository
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
 
       - name: Install dependencies
         uses: ./.github/actions/setup-runner
diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml
index 721c3b5a0..12923bb55 100644
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@@ -50,7 +50,7 @@ jobs:
       matrix: ${{ steps.set-matrix.outputs.matrix }}
     steps:
       - name: Checkout repository
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
 
       - name: Generate test matrix
         id: set-matrix
@@ -81,7 +81,7 @@ jobs:
 
     steps:
       - name: Checkout repository
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
 
       - name: Setup test environment
         if: ${{ matrix.config.allowed_clients == null || contains(matrix.config.allowed_clients, matrix.client) }}
diff --git a/.github/workflows/integration-vector-io-tests.yml b/.github/workflows/integration-vector-io-tests.yml
index 1962629c2..fc6ac0600 100644
--- a/.github/workflows/integration-vector-io-tests.yml
+++ b/.github/workflows/integration-vector-io-tests.yml
@@ -37,7 +37,7 @@ jobs:
 
     steps:
       - name: Checkout repository
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
 
       - name: Install dependencies
         uses: ./.github/actions/setup-runner
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
index ac125bba5..8c1ba08dd 100644
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@@ -22,7 +22,7 @@ jobs:
 
     steps:
       - name: Checkout code
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
         with:
           # For dependabot PRs, we need to checkout with a token that can push changes
           token: ${{ github.actor == 'dependabot[bot]' && secrets.GITHUB_TOKEN || github.token }}
diff --git a/.github/workflows/providers-build.yml b/.github/workflows/providers-build.yml
index f2559a258..9affe3d3f 100644
--- a/.github/workflows/providers-build.yml
+++ b/.github/workflows/providers-build.yml
@@ -40,7 +40,7 @@ jobs:
       distros: ${{ steps.set-matrix.outputs.distros }}
     steps:
       - name: Checkout repository
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
 
       - name: Generate Distribution List
         id: set-matrix
@@ -59,7 +59,7 @@ jobs:
 
     steps:
       - name: Checkout repository
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
 
       - name: Install dependencies
         uses: ./.github/actions/setup-runner
@@ -93,7 +93,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout repository
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
 
       - name: Install dependencies
         uses: ./.github/actions/setup-runner
@@ -106,7 +106,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout repository
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
 
       - name: Install dependencies
         uses: ./.github/actions/setup-runner
@@ -146,7 +146,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout repository
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
 
       - name: Install dependencies
         uses: ./.github/actions/setup-runner
diff --git a/.github/workflows/providers-list-deps.yml b/.github/workflows/providers-list-deps.yml
index 88659dbe3..a8024546d 100644
--- a/.github/workflows/providers-list-deps.yml
+++ b/.github/workflows/providers-list-deps.yml
@@ -36,7 +36,7 @@ jobs:
       distros: ${{ steps.set-matrix.outputs.distros }}
     steps:
       - name: Checkout repository
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
 
       - name: Generate Distribution List
         id: set-matrix
@@ -55,7 +55,7 @@ jobs:
 
     steps:
       - name: Checkout repository
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
 
       - name: Install dependencies
         uses: ./.github/actions/setup-runner
@@ -79,7 +79,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout repository
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
 
       - name: Install dependencies
         uses: ./.github/actions/setup-runner
@@ -92,7 +92,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout repository
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
 
       - name: Install dependencies
         uses: ./.github/actions/setup-runner
diff --git a/.github/workflows/python-build-test.yml b/.github/workflows/python-build-test.yml
index a498ef0a0..3162a3e68 100644
--- a/.github/workflows/python-build-test.yml
+++ b/.github/workflows/python-build-test.yml
@@ -21,7 +21,7 @@ jobs:
 
     steps:
     - name: Checkout repository
-      uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+      uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
 
     - name: Install uv
       uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
diff --git a/.github/workflows/record-integration-tests.yml b/.github/workflows/record-integration-tests.yml
index 57f95580e..2093bca04 100644
--- a/.github/workflows/record-integration-tests.yml
+++ b/.github/workflows/record-integration-tests.yml
@@ -46,7 +46,7 @@ jobs:
           echo "::endgroup::"
 
       - name: Checkout repository
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
         with:
           fetch-depth: 0
 
diff --git a/.github/workflows/stainless-builds.yml b/.github/workflows/stainless-builds.yml
index 28869fdd8..08fc3d114 100644
--- a/.github/workflows/stainless-builds.yml
+++ b/.github/workflows/stainless-builds.yml
@@ -87,7 +87,7 @@ jobs:
       # Checkout the PR's code to access the OpenAPI spec and config files.
       # This is necessary to read the spec/config from the PR (including from forks).
       - name: Checkout repository
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
         with:
           repository: ${{ github.event.pull_request.head.repo.full_name }}
           ref: ${{ github.event.pull_request.head.sha }}
@@ -119,7 +119,7 @@ jobs:
       # Checkout the PR's code to access the OpenAPI spec and config files.
       # This is necessary to read the spec/config from the PR (including from forks).
       - name: Checkout repository
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
         with:
           repository: ${{ github.event.pull_request.head.repo.full_name }}
           ref: ${{ github.event.pull_request.head.sha }}
diff --git a/.github/workflows/test-external-provider-module.yml b/.github/workflows/test-external-provider-module.yml
index 39f2356aa..e2dbe00e6 100644
--- a/.github/workflows/test-external-provider-module.yml
+++ b/.github/workflows/test-external-provider-module.yml
@@ -27,7 +27,7 @@ jobs:
         # container and point 'uv pip install' to the correct path...
     steps:
       - name: Checkout repository
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
 
       - name: Install dependencies
         uses: ./.github/actions/setup-runner
diff --git a/.github/workflows/test-external.yml b/.github/workflows/test-external.yml
index a99719718..7a306643d 100644
--- a/.github/workflows/test-external.yml
+++ b/.github/workflows/test-external.yml
@@ -27,7 +27,7 @@ jobs:
         # container and point 'uv pip install' to the correct path...
     steps:
       - name: Checkout repository
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
 
       - name: Install dependencies
         uses: ./.github/actions/setup-runner
diff --git a/.github/workflows/ui-unit-tests.yml b/.github/workflows/ui-unit-tests.yml
index f5e4a5967..0b8b0ae78 100644
--- a/.github/workflows/ui-unit-tests.yml
+++ b/.github/workflows/ui-unit-tests.yml
@@ -26,7 +26,7 @@ jobs:
 
     steps:
       - name: Checkout repository
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
 
       - name: Setup Node.js
         uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0
diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
index 52a8b0124..dde129870 100644
--- a/.github/workflows/unit-tests.yml
+++ b/.github/workflows/unit-tests.yml
@@ -36,7 +36,7 @@ jobs:
           - "3.13"
     steps:
       - name: Checkout repository
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
 
       - name: Install dependencies
         uses: ./.github/actions/setup-runner

From adab95259b48b7df228b27568ccca16d15e81598 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 24 Nov 2025 09:32:51 -0800
Subject: [PATCH 06/17] chore(github-deps): bump astral-sh/setup-uv from 7.1.2
 to 7.1.4 (#4215)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [astral-sh/setup-uv](https://github.com/astral-sh/setup-uv) from
7.1.2 to 7.1.4.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/astral-sh/setup-uv/releases">astral-sh/setup-uv's
releases</a>.</em></p>
<blockquote>
<h2>v7.1.4 🌈 Fix libuv closing bug on Windows</h2>
<h2>Changes</h2>
<p>This release fixes the bug <code>Assertion failed: !(handle-&gt;flags
&amp; UV_HANDLE_CLOSING)</code> on Windows runners</p>
<h2>🐛 Bug fixes</h2>
<ul>
<li>Wait 50ms before exit to fix libuv bug <a
href="https://github.com/eifinger"><code>@​eifinger</code></a> (<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/689">#689</a>)</li>
</ul>
<h2>🧰 Maintenance</h2>
<ul>
<li>chore: update known checksums for 0.9.10 @<a
href="https://github.com/apps/github-actions">github-actions[bot]</a>
(<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/681">#681</a>)</li>
<li>chore: update known checksums for 0.9.9 @<a
href="https://github.com/apps/github-actions">github-actions[bot]</a>
(<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/679">#679</a>)</li>
</ul>
<h2>v7.1.3 🌈 Support act</h2>
<h2>Changes</h2>
<p>This bug fix release adds support for <a
href="https://github.com/nektos/act">https://github.com/nektos/act</a>
It was previously broken because of a too new <code>undici</code>
version and TS transpilation target.</p>
<p>Compatibility with act is now automatically tested.</p>
<h2>🐛 Bug fixes</h2>
<ul>
<li>use old undici and ES2022 target for act support <a
href="https://github.com/eifinger"><code>@​eifinger</code></a> (<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/678">#678</a>)</li>
</ul>
<h2>🧰 Maintenance</h2>
<ul>
<li>chore: update known checksums for 0.9.8 @<a
href="https://github.com/apps/github-actions">github-actions[bot]</a>
(<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/677">#677</a>)</li>
<li>chore: update known checksums for 0.9.7 @<a
href="https://github.com/apps/github-actions">github-actions[bot]</a>
(<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/671">#671</a>)</li>
<li>chore: update known checksums for 0.9.6 @<a
href="https://github.com/apps/github-actions">github-actions[bot]</a>
(<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/670">#670</a>)</li>
</ul>
<h2>📚 Documentation</h2>
<ul>
<li>Correct description of <code>cache-dependency-glob</code> <a
href="https://github.com/allanlewis"><code>@​allanlewis</code></a> (<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/676">#676</a>)</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/astral-sh/setup-uv/commit/1e862dfacbd1d6d858c55d9b792c756523627244"><code>1e862df</code></a>
Wait 50ms before exit to fix libuv bug (<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/689">#689</a>)</li>
<li><a
href="https://github.com/astral-sh/setup-uv/commit/d7d33e16d4ecbbea0da49ecb6fcc16df877ddac8"><code>d7d33e1</code></a>
chore: update known checksums for 0.9.10 (<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/681">#681</a>)</li>
<li><a
href="https://github.com/astral-sh/setup-uv/commit/486d0b887200c1d9bb3a30439404f78461090c4f"><code>486d0b8</code></a>
chore: update known checksums for 0.9.9 (<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/679">#679</a>)</li>
<li><a
href="https://github.com/astral-sh/setup-uv/commit/5a7eac68fb9809dea845d802897dc5c723910fa3"><code>5a7eac6</code></a>
use old undici and ES2022 target for act support (<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/678">#678</a>)</li>
<li><a
href="https://github.com/astral-sh/setup-uv/commit/b49dc9e8821aa6e5df06b85779f66761402a1787"><code>b49dc9e</code></a>
chore: update known checksums for 0.9.8 (<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/677">#677</a>)</li>
<li><a
href="https://github.com/astral-sh/setup-uv/commit/30ce38e20653a26dd16caa082b010cec334ccf56"><code>30ce38e</code></a>
Correct description of <code>cache-dependency-glob</code> (<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/676">#676</a>)</li>
<li><a
href="https://github.com/astral-sh/setup-uv/commit/0d20755a2389f8ddbd3ad4f8a536309a4db22de9"><code>0d20755</code></a>
chore: update known checksums for 0.9.7 (<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/671">#671</a>)</li>
<li><a
href="https://github.com/astral-sh/setup-uv/commit/8491d1d9a36c62443856ebb0fe897f2255bdc242"><code>8491d1d</code></a>
chore: update known checksums for 0.9.6 (<a
href="https://redirect.github.com/astral-sh/setup-uv/issues/670">#670</a>)</li>
<li>See full diff in <a
href="https://github.com/astral-sh/setup-uv/compare/85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41...1e862dfacbd1d6d858c55d9b792c756523627244">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=astral-sh/setup-uv&package-manager=github_actions&previous-version=7.1.2&new-version=7.1.4)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/backward-compat.yml   | 4 ++--
 .github/workflows/pre-commit.yml        | 2 +-
 .github/workflows/python-build-test.yml | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/backward-compat.yml b/.github/workflows/backward-compat.yml
index 190b4cee4..fd2c52d64 100644
--- a/.github/workflows/backward-compat.yml
+++ b/.github/workflows/backward-compat.yml
@@ -37,7 +37,7 @@ jobs:
           python-version: '3.12'
 
       - name: Install uv
-        uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
+        uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # v7.1.4
         with:
           enable-cache: true
 
@@ -415,7 +415,7 @@ jobs:
           python-version: '3.12'
 
       - name: Install uv
-        uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
+        uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # v7.1.4
         with:
           enable-cache: true
 
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
index 8c1ba08dd..edf4ca859 100644
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@@ -46,7 +46,7 @@ jobs:
           cache-dependency-path: 'src/llama_stack_ui/'
 
       - name: Set up uv
-        uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
+        uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # v7.1.4
 
       - name: Install npm dependencies
         run: npm ci
diff --git a/.github/workflows/python-build-test.yml b/.github/workflows/python-build-test.yml
index 3162a3e68..9c79021cf 100644
--- a/.github/workflows/python-build-test.yml
+++ b/.github/workflows/python-build-test.yml
@@ -24,7 +24,7 @@ jobs:
       uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
 
     - name: Install uv
-      uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
+      uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # v7.1.4
       with:
         python-version: ${{ matrix.python-version }}
         activate-environment: true

From 5948c5e08ef0594b86faf934a9a3c56d4f2811e4 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 24 Nov 2025 09:33:25 -0800
Subject: [PATCH 07/17] chore(github-deps): bump
 stainless-api/upload-openapi-spec-action from 1.6.0 to 1.7.0 (#4214)

Bumps
[stainless-api/upload-openapi-spec-action](https://github.com/stainless-api/upload-openapi-spec-action)
from 1.6.0 to 1.7.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/stainless-api/upload-openapi-spec-action/releases">stainless-api/upload-openapi-spec-action's
releases</a>.</em></p>
<blockquote>
<h2>v1.7.0</h2>
<h2><a
href="https://github.com/stainless-api/upload-openapi-spec-action/compare/v1.6.0...v1.7.0">1.7.0</a>
(2025-11-17)</h2>
<h3>Features</h3>
<ul>
<li><strong>preview:</strong> add output documented_spec_path to preview
action (<a
href="https://redirect.github.com/stainless-api/upload-openapi-spec-action/issues/135">#135</a>)
(<a
href="https://github.com/stainless-api/upload-openapi-spec-action/commit/5e80cc40da2419877875629e10f67dfc92a95fb8">5e80cc4</a>)</li>
<li><strong>preview:</strong> add output_dir input and write documented
spec to file (<a
href="https://redirect.github.com/stainless-api/upload-openapi-spec-action/issues/137">#137</a>)
(<a
href="https://github.com/stainless-api/upload-openapi-spec-action/commit/d30490c89b9a7fd667f9ab30678a332c00cd0d98">d30490c</a>)</li>
</ul>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a
href="https://github.com/stainless-api/upload-openapi-spec-action/blob/main/CHANGELOG.md">stainless-api/upload-openapi-spec-action's
changelog</a>.</em></p>
<blockquote>
<h1>Changelog</h1>
<h2><a
href="https://github.com/stainless-api/upload-openapi-spec-action/compare/v1.6.0...v1.7.0">1.7.0</a>
(2025-11-17)</h2>
<h3>Features</h3>
<ul>
<li><strong>preview:</strong> add output documented_spec_path to preview
action (<a
href="https://redirect.github.com/stainless-api/upload-openapi-spec-action/issues/135">#135</a>)
(<a
href="https://github.com/stainless-api/upload-openapi-spec-action/commit/5e80cc40da2419877875629e10f67dfc92a95fb8">5e80cc4</a>)</li>
<li><strong>preview:</strong> add output_dir input and write documented
spec to file (<a
href="https://redirect.github.com/stainless-api/upload-openapi-spec-action/issues/137">#137</a>)
(<a
href="https://github.com/stainless-api/upload-openapi-spec-action/commit/d30490c89b9a7fd667f9ab30678a332c00cd0d98">d30490c</a>)</li>
</ul>
<h2><a
href="https://github.com/stainless-api/upload-openapi-spec-action/compare/v1.5.5...v1.6.0">1.6.0</a>
(2025-10-30)</h2>
<h3>Features</h3>
<ul>
<li>add support for github OIDC auth (<a
href="https://redirect.github.com/stainless-api/upload-openapi-spec-action/issues/133">#133</a>)
(<a
href="https://github.com/stainless-api/upload-openapi-spec-action/commit/259674c1b3969916062cf7ffe7e05ac4305ba9dd">259674c</a>)</li>
<li>change fail on semantics (<a
href="https://redirect.github.com/stainless-api/upload-openapi-spec-action/issues/124">#124</a>)
(<a
href="https://github.com/stainless-api/upload-openapi-spec-action/commit/e1046240c0ed9d9cb4084d70f889bfe40840a6d4">e104624</a>)</li>
</ul>
<h3>Bug Fixes</h3>
<ul>
<li>accept multiline conventional commits (<a
href="https://redirect.github.com/stainless-api/upload-openapi-spec-action/issues/129">#129</a>)
(<a
href="https://github.com/stainless-api/upload-openapi-spec-action/commit/d2dcc0b3bfb698840cdc0b3bf52a28ac4e65bc55">d2dcc0b</a>)</li>
<li>tweak categorizeOutcomes (<a
href="https://redirect.github.com/stainless-api/upload-openapi-spec-action/issues/132">#132</a>)
(<a
href="https://github.com/stainless-api/upload-openapi-spec-action/commit/c45d6a9c7996dea81cf770649e24846756d463cc">c45d6a9</a>)</li>
</ul>
<h2><a
href="https://github.com/stainless-api/upload-openapi-spec-action/compare/v1.5.4...v1.5.5">1.5.5</a>
(2025-09-26)</h2>
<h3>Bug Fixes</h3>
<ul>
<li>rollback filtering diagnostics by target (<a
href="https://github.com/stainless-api/upload-openapi-spec-action/commit/54328a386f86c333576c65f3ea232bbac9cd967c">54328a3</a>)</li>
</ul>
<h2><a
href="https://github.com/stainless-api/upload-openapi-spec-action/compare/v1.5.3...v1.5.4">1.5.4</a>
(2025-09-25)</h2>
<h3>Bug Fixes</h3>
<ul>
<li>check for latestRun before commenting (<a
href="https://github.com/stainless-api/upload-openapi-spec-action/commit/53fef9f3286760d15a66407789430ee6d63e94a4">53fef9f</a>)</li>
<li>filter diagnostics by target (<a
href="https://redirect.github.com/stainless-api/upload-openapi-spec-action/issues/125">#125</a>)
(<a
href="https://github.com/stainless-api/upload-openapi-spec-action/commit/102dc971cb22d692f134a4bc76319bb72b1ff7a5">102dc97</a>)</li>
</ul>
<h2><a
href="https://github.com/stainless-api/upload-openapi-spec-action/compare/v1.5.2...v1.5.3">1.5.3</a>
(2025-09-16)</h2>
<h3>Bug Fixes</h3>
<ul>
<li>filter by branch when finding base build (<a
href="https://redirect.github.com/stainless-api/upload-openapi-spec-action/issues/120">#120</a>)
(<a
href="https://github.com/stainless-api/upload-openapi-spec-action/commit/b6506adb5cb09b7fcb9e5427592fd1a7ba773e33">b6506ad</a>)</li>
</ul>
<h2><a
href="https://github.com/stainless-api/upload-openapi-spec-action/compare/v1.5.1...v1.5.2">1.5.2</a>
(2025-09-15)</h2>
<h3>Bug Fixes</h3>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/stainless-api/upload-openapi-spec-action/commit/9133735bca5ce0a1df7d3b26e75364e26137a016"><code>9133735</code></a>
chore(main): release 1.7.0 (<a
href="https://redirect.github.com/stainless-api/upload-openapi-spec-action/issues/136">#136</a>)</li>
<li><a
href="https://github.com/stainless-api/upload-openapi-spec-action/commit/641c28aa9fe18fdc58a4c0fc39139b1cfc2a93a9"><code>641c28a</code></a>
chore(build): Update dist</li>
<li><a
href="https://github.com/stainless-api/upload-openapi-spec-action/commit/d30490c89b9a7fd667f9ab30678a332c00cd0d98"><code>d30490c</code></a>
feat(preview): add output_dir input and write documented spec to file
(<a
href="https://redirect.github.com/stainless-api/upload-openapi-spec-action/issues/137">#137</a>)</li>
<li><a
href="https://github.com/stainless-api/upload-openapi-spec-action/commit/5e80cc40da2419877875629e10f67dfc92a95fb8"><code>5e80cc4</code></a>
feat(preview): add output documented_spec_path to preview action (<a
href="https://redirect.github.com/stainless-api/upload-openapi-spec-action/issues/135">#135</a>)</li>
<li><a
href="https://github.com/stainless-api/upload-openapi-spec-action/commit/6daa518df5828b5761c82abc2393880eb379ae84"><code>6daa518</code></a>
chore(docs): document OIDC org-matching requirement</li>
<li>See full diff in <a
href="https://github.com/stainless-api/upload-openapi-spec-action/compare/32823b096b4319c53ee948d702d9052873af485f...9133735bca5ce0a1df7d3b26e75364e26137a016">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=stainless-api/upload-openapi-spec-action&package-manager=github_actions&previous-version=1.6.0&new-version=1.7.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/stainless-builds.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/stainless-builds.yml b/.github/workflows/stainless-builds.yml
index 08fc3d114..9217d5b3f 100644
--- a/.github/workflows/stainless-builds.yml
+++ b/.github/workflows/stainless-builds.yml
@@ -94,7 +94,7 @@ jobs:
           fetch-depth: 2
 
       - name: Run preview builds
-        uses: stainless-api/upload-openapi-spec-action/preview@32823b096b4319c53ee948d702d9052873af485f # 1.6.0
+        uses: stainless-api/upload-openapi-spec-action/preview@9133735bca5ce0a1df7d3b26e75364e26137a016 # 1.7.0
         with:
           stainless_api_key: ${{ secrets.STAINLESS_API_KEY }}
           org: ${{ env.STAINLESS_ORG }}
@@ -132,7 +132,7 @@ jobs:
       # against the preview branch to complete, but assuming that
       # the preview job happens before the PR merge, it should be fine.
       - name: Run merge build
-        uses: stainless-api/upload-openapi-spec-action/merge@32823b096b4319c53ee948d702d9052873af485f # 1.6.0
+        uses: stainless-api/upload-openapi-spec-action/merge@9133735bca5ce0a1df7d3b26e75364e26137a016 # 1.7.0
         with:
           stainless_api_key: ${{ secrets.STAINLESS_API_KEY }}
           org: ${{ env.STAINLESS_ORG }}

From b1c5b8fa9f2c994f0ed15849fef99b6304440b5b Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 24 Nov 2025 09:33:32 -0800
Subject: [PATCH 08/17] chore(github-deps): bump
 peter-evans/create-pull-request from 7.0.8 to 7.0.9 (#4213)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[//]: # (dependabot-start)
⚠️  **Dependabot is rebasing this PR** ⚠️

Rebasing might not happen immediately, so don't worry if this takes some
time.

Note: if you make any changes to this PR yourself, they will take
precedence over the rebase.

---

[//]: # (dependabot-end)

Bumps
[peter-evans/create-pull-request](https://github.com/peter-evans/create-pull-request)
from 7.0.8 to 7.0.9.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/peter-evans/create-pull-request/releases">peter-evans/create-pull-request's
releases</a>.</em></p>
<blockquote>
<h2>Create Pull Request v7.0.9</h2>
<p>⚙️ Fixes an <a
href="https://redirect.github.com/peter-evans/create-pull-request/issues/4228">incompatibility</a>
with the recently released <code>actions/checkout@v6</code>.</p>
<h2>What's Changed</h2>
<ul>
<li>~70 dependency updates by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>docs: fix workaround description about <code>ready_for_review</code>
by <a href="https://github.com/ybiquitous"><code>@​ybiquitous</code></a>
in <a
href="https://redirect.github.com/peter-evans/create-pull-request/pull/3939">peter-evans/create-pull-request#3939</a></li>
<li>Docs: <code>add-paths</code> default behavior by <a
href="https://github.com/joeflack4"><code>@​joeflack4</code></a> in <a
href="https://redirect.github.com/peter-evans/create-pull-request/pull/3928">peter-evans/create-pull-request#3928</a></li>
<li>docs: update to create-github-app-token v2 by <a
href="https://github.com/Goooler"><code>@​Goooler</code></a> in <a
href="https://redirect.github.com/peter-evans/create-pull-request/pull/4063">peter-evans/create-pull-request#4063</a></li>
<li>Fix compatibility with actions/checkout@v6 by <a
href="https://github.com/ericsciple"><code>@​ericsciple</code></a> in <a
href="https://redirect.github.com/peter-evans/create-pull-request/pull/4230">peter-evans/create-pull-request#4230</a></li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/joeflack4"><code>@​joeflack4</code></a>
made their first contribution in <a
href="https://redirect.github.com/peter-evans/create-pull-request/pull/3928">peter-evans/create-pull-request#3928</a></li>
<li><a href="https://github.com/Goooler"><code>@​Goooler</code></a> made
their first contribution in <a
href="https://redirect.github.com/peter-evans/create-pull-request/pull/4063">peter-evans/create-pull-request#4063</a></li>
<li><a
href="https://github.com/ericsciple"><code>@​ericsciple</code></a> made
their first contribution in <a
href="https://redirect.github.com/peter-evans/create-pull-request/pull/4230">peter-evans/create-pull-request#4230</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/peter-evans/create-pull-request/compare/v7.0.8...v7.0.9">https://github.com/peter-evans/create-pull-request/compare/v7.0.8...v7.0.9</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/peter-evans/create-pull-request/commit/84ae59a2cdc2258d6fa0732dd66352dddae2a412"><code>84ae59a</code></a>
fix: compatibility with actions/checkout@v6 (<a
href="https://redirect.github.com/peter-evans/create-pull-request/issues/4230">#4230</a>)</li>
<li><a
href="https://github.com/peter-evans/create-pull-request/commit/b4733b9419fd47bbfa1807b15627e17cd70b5b22"><code>b4733b9</code></a>
build(deps-dev): bump js-yaml from 4.1.0 to 4.1.1 (<a
href="https://redirect.github.com/peter-evans/create-pull-request/issues/4222">#4222</a>)</li>
<li><a
href="https://github.com/peter-evans/create-pull-request/commit/0edc001d28a2959cd7a6b505629f1d82f0a6e67d"><code>0edc001</code></a>
build(deps-dev): bump the npm group with 2 updates (<a
href="https://redirect.github.com/peter-evans/create-pull-request/issues/4201">#4201</a>)</li>
<li><a
href="https://github.com/peter-evans/create-pull-request/commit/430aea0fb112656c3ac187e7a22b3604508ba3a7"><code>430aea0</code></a>
build(deps): bump the github-actions group with 3 updates (<a
href="https://redirect.github.com/peter-evans/create-pull-request/issues/4200">#4200</a>)</li>
<li><a
href="https://github.com/peter-evans/create-pull-request/commit/46cdba753c74545733b821043d64bd6925fc4da9"><code>46cdba7</code></a>
build(deps-dev): bump the npm group with 3 updates (<a
href="https://redirect.github.com/peter-evans/create-pull-request/issues/4185">#4185</a>)</li>
<li><a
href="https://github.com/peter-evans/create-pull-request/commit/b937339b17ca3e45ec14ebcafb879873b1ee8564"><code>b937339</code></a>
build(deps): bump the github-actions group with 2 updates (<a
href="https://redirect.github.com/peter-evans/create-pull-request/issues/4184">#4184</a>)</li>
<li><a
href="https://github.com/peter-evans/create-pull-request/commit/e9af275c3778a67411fcac2d613e8d4e9be452fd"><code>e9af275</code></a>
ci: update dependabot config</li>
<li><a
href="https://github.com/peter-evans/create-pull-request/commit/d3e081a03ae8d69301ed924bae10d70ea4af94d9"><code>d3e081a</code></a>
build(deps-dev): bump <code>@​types/node</code> from 18.19.127 to
18.19.128 (<a
href="https://redirect.github.com/peter-evans/create-pull-request/issues/4178">#4178</a>)</li>
<li><a
href="https://github.com/peter-evans/create-pull-request/commit/9ec683ee07f9121fdf529b923931dd78d977a5c9"><code>9ec683e</code></a>
build(deps-dev): bump <code>@​types/node</code> from 18.19.125 to
18.19.127 (<a
href="https://redirect.github.com/peter-evans/create-pull-request/issues/4165">#4165</a>)</li>
<li><a
href="https://github.com/peter-evans/create-pull-request/commit/65d8d10bf76513796c0f69457c2567b5da3b9626"><code>65d8d10</code></a>
build(deps-dev): bump ts-jest from 29.4.2 to 29.4.4 (<a
href="https://redirect.github.com/peter-evans/create-pull-request/issues/4163">#4163</a>)</li>
<li>Additional commits viewable in <a
href="https://github.com/peter-evans/create-pull-request/compare/271a8d0340265f705b14b6d32b9829c1cb33d45e...84ae59a2cdc2258d6fa0732dd66352dddae2a412">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=peter-evans/create-pull-request&package-manager=github_actions&previous-version=7.0.8&new-version=7.0.9)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/changelog.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/changelog.yml b/.github/workflows/changelog.yml
index 4b5132d8e..8224dc664 100644
--- a/.github/workflows/changelog.yml
+++ b/.github/workflows/changelog.yml
@@ -23,7 +23,7 @@ jobs:
           fetch-depth: 0
       - run: |
           python ./scripts/gen-changelog.py
-      - uses: peter-evans/create-pull-request@271a8d0340265f705b14b6d32b9829c1cb33d45e # v7.0.8
+      - uses: peter-evans/create-pull-request@84ae59a2cdc2258d6fa0732dd66352dddae2a412 # v7.0.9
         with:
           title: 'docs: update CHANGELOG.md for ${{ github.ref_name }}'
           commit-message: 'docs: update CHANGELOG.md for ${{ github.ref_name }}'

From aac494c5baca31fca434c197e65567f1ee8672b2 Mon Sep 17 00:00:00 2001
From: Charlie Doern <cdoern@redhat.com>
Date: Wed, 26 Nov 2025 06:16:28 -0500
Subject: [PATCH 09/17] fix: bind to proper default hosts (#4232)

# What does this PR do?

we used to have ` host = config.server.host or ["::", "0.0.0.0"]` but
now only bind to ` host = config.server.host or "0.0.0.0"`

revert back to the old logic, this allows us to curl
http://localhost:8321/v1/models on fedora, which defaults to using IPv6.


resolves #4210

Signed-off-by: Charlie Doern <cdoern@redhat.com>
---
 src/llama_stack/cli/stack/run.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/llama_stack/cli/stack/run.py b/src/llama_stack/cli/stack/run.py
index 73d8d13d5..bc4ef70fd 100644
--- a/src/llama_stack/cli/stack/run.py
+++ b/src/llama_stack/cli/stack/run.py
@@ -197,7 +197,7 @@ class StackRun(Subcommand):
             config = StackRunConfig(**cast_image_name_to_string(replace_env_vars(config_contents)))
 
         port = args.port or config.server.port
-        host = config.server.host or "0.0.0.0"
+        host = config.server.host or ["::", "0.0.0.0"]
 
         # Set the config file in environment so create_app can find it
         os.environ["LLAMA_STACK_CONFIG"] = str(config_file)

From d1a7bc36a2286a2681ad9b1a151cb776080aaf72 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Han?= <seb@redhat.com>
Date: Wed, 26 Nov 2025 17:48:32 +0100
Subject: [PATCH 10/17] chore: rm CHANGELOG.md (#4240)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# What does this PR do?

We don't do a good job at maintaining this file, also the GH action does
not seem to be running.
Let's stick with GH release notes instead.

Signed-off-by: Sébastien Han <seb@redhat.com>
---
 .github/workflows/README.md     |   1 -
 .github/workflows/changelog.yml |  31 --
 CHANGELOG.md                    | 614 --------------------------------
 scripts/gen-changelog.py        |  74 ----
 4 files changed, 720 deletions(-)
 delete mode 100644 .github/workflows/changelog.yml
 delete mode 100644 CHANGELOG.md
 delete mode 100755 scripts/gen-changelog.py

diff --git a/.github/workflows/README.md b/.github/workflows/README.md
index bb848209f..8eb31c79b 100644
--- a/.github/workflows/README.md
+++ b/.github/workflows/README.md
@@ -5,7 +5,6 @@ Llama Stack uses GitHub Actions for Continuous Integration (CI). Below is a tabl
 | Name | File | Purpose |
 | ---- | ---- | ------- |
 | Backward Compatibility Check | [backward-compat.yml](backward-compat.yml) | Check backward compatibility for run.yaml configs |
-| Update Changelog | [changelog.yml](changelog.yml) | Creates PR for updating the CHANGELOG.md |
 | API Conformance Tests | [conformance.yml](conformance.yml) | Run the API Conformance test suite on the changes. |
 | Installer CI | [install-script-ci.yml](install-script-ci.yml) | Test the installation script |
 | Integration Auth Tests | [integration-auth-tests.yml](integration-auth-tests.yml) | Run the integration test suite with Kubernetes authentication |
diff --git a/.github/workflows/changelog.yml b/.github/workflows/changelog.yml
deleted file mode 100644
index 8224dc664..000000000
--- a/.github/workflows/changelog.yml
+++ /dev/null
@@ -1,31 +0,0 @@
-name: Update Changelog
-
-run-name: Creates PR for updating the CHANGELOG.md
-
-on:
-  release:
-    types: [published, unpublished, created, edited, deleted, released]
-
-permissions:
-  contents: read
-
-jobs:
-  generate_changelog:
-    name: Generate changelog
-    permissions:
-      contents: write  # for peter-evans/create-pull-request to create branch
-      pull-requests: write  # for peter-evans/create-pull-request to create a PR
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
-        with:
-          ref: main
-          fetch-depth: 0
-      - run: |
-          python ./scripts/gen-changelog.py
-      - uses: peter-evans/create-pull-request@84ae59a2cdc2258d6fa0732dd66352dddae2a412 # v7.0.9
-        with:
-          title: 'docs: update CHANGELOG.md for ${{ github.ref_name }}'
-          commit-message: 'docs: update CHANGELOG.md for ${{ github.ref_name }}'
-          branch: create-pull-request/changelog
-          signoff: true
diff --git a/CHANGELOG.md b/CHANGELOG.md
deleted file mode 100644
index c51a1b2aa..000000000
--- a/CHANGELOG.md
+++ /dev/null
@@ -1,614 +0,0 @@
-# Changelog
-
-# v0.2.20
-Published on: 2025-08-29T22:25:32Z
-
-Here are some key changes that are coming as part of this release.
-
-### Build and Environment
-
-- Environment improvements: fixed env var replacement to preserve types.
-- Docker stability: fixed container startup failures for Fireworks AI provider.
-- Removed absolute paths in build for better portability.
-
-### Features
-
-- UI Enhancements: Implemented file upload and VectorDB creation/configuration directly in UI.
-- Vector Store Improvements: Added keyword, vector, and hybrid search inside vector store.
-- Added S3 authorization support for file providers.
-- SQL Store: Added inequality support to where clause.
-
-### Documentation
-
-- Fixed post-training docs.
-- Added Contributor Guidelines for creating Internal vs. External providers.
-
-### Fixes
-
-- Removed unsupported bfcl scoring function.
-- Multiple reliability and configuration fixes for providers and environment handling.
-
-### Engineering / Chores
-
-- Cleaner internal development setup with consistent paths.
-- Incremental improvements to provider integration and vector store behavior.
-
-
-### New Contributors
-- @omertuc made their first contribution in #3270
-- @r3v5 made their first contribution in vector store hybrid search
-
----
-
-# v0.2.19
-Published on: 2025-08-26T22:06:55Z
-
-## Highlights
-* feat: Add CORS configuration support for server by @skamenan7 in https://github.com/llamastack/llama-stack/pull/3201
-* feat(api): introduce /rerank by @ehhuang in https://github.com/llamastack/llama-stack/pull/2940
-* feat: Add S3 Files Provider by @mattf in https://github.com/llamastack/llama-stack/pull/3202
-
-
----
-
-# v0.2.18
-Published on: 2025-08-20T01:09:27Z
-
-## Highlights
-* Add moderations create API
-* Hybrid search in Milvus
-* Numerous Responses API improvements
-* Documentation updates
-
-
----
-
-# v0.2.17
-Published on: 2025-08-05T01:51:14Z
-
-## Highlights
-
-* feat(tests): introduce inference record/replay to increase test reliability by @ashwinb in https://github.com/meta-llama/llama-stack/pull/2941
-* fix(library_client): improve initialization error handling and prevent AttributeError by @mattf in https://github.com/meta-llama/llama-stack/pull/2944
-* fix: use OLLAMA_URL to activate Ollama provider in starter by @ashwinb in https://github.com/meta-llama/llama-stack/pull/2963
-* feat(UI): adding MVP playground UI by @franciscojavierarceo in https://github.com/meta-llama/llama-stack/pull/2828
-* Standardization of errors (@nathan-weinberg)
-* feat: Enable DPO training with HuggingFace inline provider by @Nehanth in https://github.com/meta-llama/llama-stack/pull/2825
-* chore: rename templates to distributions by @ashwinb in https://github.com/meta-llama/llama-stack/pull/3035
-
-
----
-
-# v0.2.16
-Published on: 2025-07-28T23:35:23Z
-
-## Highlights
-
-* Automatic model registration for self-hosted providers (ollama and vllm currently). No need for `INFERENCE_MODEL` environment variables which need to be updated, etc.
-* Much simplified starter distribution. Most `ENABLE_` env variables are now gone. When you set `VLLM_URL`, the `vllm` provider is auto-enabled. Similar for `MILVUS_URL`, `PGVECTOR_DB`, etc. Check the [run.yaml](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/templates/starter/run.yaml) for more details.
-* All tests migrated to pytest now (thanks @Elbehery)
-* DPO implementation in the post-training provider (thanks @Nehanth)
-* (Huge!) Support for external APIs and providers thereof (thanks @leseb, @cdoern and others). This is a really big deal -- you can now add more APIs completely out of tree and experiment with them before (optionally) wanting to contribute back.
-* `inline::vllm` provider is gone thank you very much
-* several improvements to OpenAI inference implementations and LiteLLM backend (thanks @mattf)
-* Chroma now supports Vector Store API (thanks @franciscojavierarceo).
-* Authorization improvements: Vector Store/File APIs now supports access control (thanks @franciscojavierarceo); Telemetry read APIs are gated according to logged-in user's roles.
-
-
-
----
-
-# v0.2.15
-Published on: 2025-07-16T03:30:01Z
-
-
-
----
-
-# v0.2.14
-Published on: 2025-07-04T16:06:48Z
-
-## Highlights
-
-* Support for Llama Guard 4
-* Added Milvus  support to vector-stores API
-* Documentation and zero-to-hero updates for latest APIs
-
-
----
-
-# v0.2.13
-Published on: 2025-06-28T04:28:11Z
-
-## Highlights
-* search_mode support in OpenAI vector store API
-* Security fixes
-
-
----
-
-# v0.2.12
-Published on: 2025-06-20T22:52:12Z
-
-## Highlights
-* Filter support in file search
-* Support auth attributes in inference and response stores
-
-
----
-
-# v0.2.11
-Published on: 2025-06-17T20:26:26Z
-
-## Highlights
-* OpenAI-compatible vector store APIs
-* Hybrid Search in Sqlite-vec
-* File search tool in Responses API
-* Pagination in inference and response stores
-* Added `suffix` to completions API for fill-in-the-middle tasks
-
-
----
-
-# v0.2.10.1
-Published on: 2025-06-06T20:11:02Z
-
-## Highlights
-* ChromaDB provider fix
-
-
----
-
-# v0.2.10
-Published on: 2025-06-05T23:21:45Z
-
-## Highlights
-
-* OpenAI-compatible embeddings API
-* OpenAI-compatible Files API
-* Postgres support in starter distro
-* Enable ingestion of precomputed embeddings
-* Full multi-turn support in Responses API
-* Fine-grained access control policy
-
-
----
-
-# v0.2.9
-Published on: 2025-05-30T20:01:56Z
-
-## Highlights
-* Added initial streaming support in Responses API
-* UI view for Responses
-* Postgres inference store support
-
-
----
-
-# v0.2.8
-Published on: 2025-05-27T21:03:47Z
-
-# Release v0.2.8
-
-## Highlights
-
-* Server-side MCP with auth firewalls now works in the Stack - both for Agents and Responses
-* Get chat completions APIs and UI to show chat completions
-* Enable keyword search for sqlite-vec
-
-
----
-
-# v0.2.7
-Published on: 2025-05-16T20:38:10Z
-
-## Highlights
-
-This is a small update. But a couple highlights:
-
-* feat: function tools in OpenAI Responses by @bbrowning in https://github.com/meta-llama/llama-stack/pull/2094, getting closer to ready. Streaming is the next missing piece.
-* feat: Adding support for customizing chunk context in RAG insertion and querying by @franciscojavierarceo in https://github.com/meta-llama/llama-stack/pull/2134
-* feat: scaffolding for Llama Stack UI by @ehhuang in https://github.com/meta-llama/llama-stack/pull/2149, more to come in the coming releases.
-
-
----
-
-# v0.2.6
-Published on: 2025-05-12T18:06:52Z
-
-
-
----
-
-# v0.2.5
-Published on: 2025-05-04T20:16:49Z
-
-
-
----
-
-# v0.2.4
-Published on: 2025-04-29T17:26:01Z
-
-## Highlights
-
-* One-liner to install and run Llama Stack yay! by @reluctantfuturist in https://github.com/meta-llama/llama-stack/pull/1383
-* support for NVIDIA NeMo datastore by @raspawar in https://github.com/meta-llama/llama-stack/pull/1852
-* (yuge!) Kubernetes authentication by @leseb in https://github.com/meta-llama/llama-stack/pull/1778
-* (yuge!) OpenAI Responses API by @bbrowning in https://github.com/meta-llama/llama-stack/pull/1989
-* add api.llama provider, llama-guard-4 model by @ashwinb in https://github.com/meta-llama/llama-stack/pull/2058
-
-
----
-
-# v0.2.3
-Published on: 2025-04-25T22:46:21Z
-
-## Highlights
-
-* OpenAI compatible inference endpoints and client-SDK support. `client.chat.completions.create()` now works.
-* significant improvements and functionality added to the nVIDIA distribution
-* many improvements to the test verification suite.
-* new inference providers: Ramalama, IBM WatsonX
-* many improvements to the Playground UI
-
-
----
-
-# v0.2.2
-Published on: 2025-04-13T01:19:49Z
-
-## Main changes
-
-- Bring Your Own Provider (@leseb) - use out-of-tree provider code to execute the distribution server
-- OpenAI compatible inference API in progress (@bbrowning)
-- Provider verifications (@ehhuang)
-- Many updates and fixes to playground
-- Several llama4 related fixes
-
-
----
-
-# v0.2.1
-Published on: 2025-04-05T23:13:00Z
-
-
-
----
-
-# v0.2.0
-Published on: 2025-04-05T19:04:29Z
-
-## Llama 4 Support
-
-Checkout more at https://www.llama.com
-
-
-
----
-
-# v0.1.9
-Published on: 2025-03-29T00:52:23Z
-
-### Build and Test Agents
-* Agents: Entire document context with attachments
-* RAG: Documentation with sqlite-vec faiss comparison
-* Getting started: Fixes to getting started notebook.
-
-### Agent Evals and Model Customization
-* (**New**) Post-training: Add nemo customizer
-
-### Better Engineering
-* Moved sqlite-vec to non-blocking calls
-* Don't return a payload on file delete
-
-
-
----
-
-# v0.1.8
-Published on: 2025-03-24T01:28:50Z
-
-# v0.1.8 Release Notes
-
-### Build and Test Agents
-* Safety: Integrated NVIDIA as a safety provider.
-* VectorDB: Added Qdrant as an inline provider.
-* Agents: Added support for multiple tool groups in agents.
-* Agents: Simplified imports for Agents in client package
-
-
-### Agent Evals and Model Customization
-* Introduced DocVQA and IfEval benchmarks.
-
-### Deploying and Monitoring Agents
-* Introduced a Containerfile and image workflow for the Playground.
-* Implemented support for Bearer (API Key) authentication.
-* Added attribute-based access control for resources.
-* Fixes on docker deployments: use --pull always and standardized the default port to 8321
-* Deprecated: /v1/inspect/providers use /v1/providers/ instead
-
-### Better Engineering
-* Consolidated scripts under the ./scripts directory.
-* Addressed mypy violations in various modules.
-* Added Dependabot scans for Python dependencies.
-* Implemented a scheduled workflow to update the changelog automatically.
-* Enforced concurrency to reduce CI loads.
-
-
-### New Contributors
-* @cmodi-meta made their first contribution in https://github.com/meta-llama/llama-stack/pull/1650
-* @jeffmaury made their first contribution in https://github.com/meta-llama/llama-stack/pull/1671
-* @derekhiggins made their first contribution in https://github.com/meta-llama/llama-stack/pull/1698
-* @Bobbins228 made their first contribution in https://github.com/meta-llama/llama-stack/pull/1745
-
-**Full Changelog**: https://github.com/meta-llama/llama-stack/compare/v0.1.7...v0.1.8
-
----
-
-# v0.1.7
-Published on: 2025-03-14T22:30:51Z
-
-## 0.1.7 Release Notes
-
-###  Build and Test Agents
-* Inference: ImageType is now refactored to LlamaStackImageType
-* Inference: Added tests to measure TTFT
-* Inference: Bring back usage metrics
-* Agents: Added endpoint for get agent, list agents and list sessions
-* Agents: Automated conversion of type hints in client tool for lite llm format
-* Agents: Deprecated ToolResponseMessage in agent.resume API
-* Added Provider API for listing and inspecting provider info
-
-### Agent Evals and Model Customization
-* Eval: Added new eval benchmarks Math 500 and BFCL v3
-* Deploy and Monitoring of Agents
-* Telemetry: Fix tracing to work across coroutines
-
-###  Better Engineering
-* Display code coverage for unit tests
-* Updated call sites (inference, tool calls, agents) to move to async non blocking calls
-* Unit tests also run on Python 3.11, 3.12, and 3.13
-* Added ollama inference to Integration tests CI
-* Improved documentation across examples, testing, CLI, updated providers table )
-
-
-
-
----
-
-# v0.1.6
-Published on: 2025-03-08T04:35:08Z
-
-## 0.1.6 Release Notes
-
-### Build and Test Agents
-* Inference: Fixed support for inline vllm provider
-* (**New**) Agent: Build & Monitor Agent Workflows with Llama Stack + Anthropic's Best Practice [Notebook](https://github.com/meta-llama/llama-stack/blob/main/docs/notebooks/Llama_Stack_Agent_Workflows.ipynb)
-* (**New**) Agent: Revamped agent [documentation](https://llama-stack.readthedocs.io/en/latest/building_applications/agent.html) with more details and examples
-* Agent: Unify tools and Python SDK Agents API
-* Agent: AsyncAgent Python SDK wrapper supporting async client tool calls
-* Agent: Support python functions without @client_tool decorator as client tools
-* Agent: deprecation for allow_resume_turn flag, and remove need to specify tool_prompt_format
-* VectorIO: MilvusDB support added
-
-### Agent Evals and Model Customization
-* (**New**) Agent: Llama Stack RAG Lifecycle [Notebook](https://github.com/meta-llama/llama-stack/blob/main/docs/notebooks/Llama_Stack_RAG_Lifecycle.ipynb)
-* Eval: Documentation for eval, scoring, adding new benchmarks
-* Eval: Distribution template to run benchmarks on llama & non-llama models
-* Eval: Ability to register new custom LLM-as-judge scoring functions
-* (**New**) Looking for contributors for open benchmarks. See [documentation](https://llama-stack.readthedocs.io/en/latest/references/evals_reference/index.html#open-benchmark-contributing-guide) for details.
-
-### Deploy and Monitoring of Agents
-* Better support for different log levels across all components for better monitoring
-
-### Better Engineering
-* Enhance OpenAPI spec to include Error types across all APIs
-* Moved all tests to /tests and created unit tests to run on each PR
-* Removed all dependencies on llama-models repo
-
-
----
-
-# v0.1.5.1
-Published on: 2025-02-28T22:37:44Z
-
-## 0.1.5.1 Release Notes
-* Fixes for security risk in https://github.com/meta-llama/llama-stack/pull/1327 and https://github.com/meta-llama/llama-stack/pull/1328
-
-**Full Changelog**: https://github.com/meta-llama/llama-stack/compare/v0.1.5...v0.1.5.1
-
----
-
-# v0.1.5
-Published on: 2025-02-28T18:14:01Z
-
-## 0.1.5 Release Notes
-###  Build Agents
-* Inference: Support more non-llama models (openai, anthropic, gemini)
-* Inference: Can use the provider's model name in addition to the HF alias
-* Inference: Fixed issues with calling tools that weren't specified in the prompt
-* RAG: Improved system prompt for RAG and no more need for hard-coded rag-tool calling
-* Embeddings: Added support for Nemo retriever embedding models
-* Tools: Added support for MCP tools in Ollama Distribution
-* Distributions: Added new Groq distribution
-
-### Customize Models
-* Save post-trained checkpoint in SafeTensor format to allow Ollama inference provider to use the post-trained model
-
-### Monitor agents
-* More comprehensive logging of agent steps including client tools
-* Telemetry inputs/outputs are now structured and queryable
-* Ability to retrieve agents session, turn, step by ids
-
-### Better Engineering
-* Moved executorch Swift code out of this repo into the llama-stack-client-swift repo, similar to kotlin
-* Move most logging to use logger instead of prints
-* Completed text /chat-completion and /completion tests
-
-
----
-
-# v0.1.4
-Published on: 2025-02-25T00:02:43Z
-
-## v0.1.4 Release Notes
-Here are the key changes coming as part of this release:
-
-### Build and Test Agents
-* Inference: Added support for non-llama models
-* Inference: Added option to list all downloaded models and remove models
-* Agent: Introduce new api agents.resume_turn to include client side tool execution in the same turn
-* Agent: AgentConfig introduces new variable “tool_config” that allows for better tool configuration and system prompt overrides
-* Agent: Added logging for agent step start and completion times
-* Agent: Added support for logging for tool execution metadata
-* Embedding: Updated /inference/embeddings to support asymmetric models, truncation and variable sized outputs
-* Embedding: Updated embedding models for Ollama, Together, and Fireworks with available defaults
-* VectorIO: Improved performance of sqlite-vec using chunked writes
-### Agent Evals and Model Customization
-* Deprecated api /eval-tasks. Use /eval/benchmark  instead
-* Added CPU training support for TorchTune
-### Deploy and Monitoring of Agents
-* Consistent view of client and server tool calls in telemetry
-### Better Engineering
-* Made tests more data-driven for consistent evaluation
-* Fixed documentation links and improved API reference generation
-* Various small fixes for build scripts and system reliability
-
-
-
----
-
-# v0.1.3
-Published on: 2025-02-14T20:24:32Z
-
-## v0.1.3 Release
-
-Here are some key changes that are coming as part of this release.
-
-### Build and Test Agents
-Streamlined the initial development experience
-- Added support for  llama stack run --image-type venv
-- Enhanced vector store options with new sqlite-vec provider and improved Qdrant integration
-- vLLM improvements for tool calling and logprobs
-- Better handling of sporadic code_interpreter tool calls
-
-### Agent Evals
-Better benchmarking and Agent performance assessment
-- Renamed eval API /eval-task to /benchmarks
-- Improved documentation and notebooks for RAG and evals
-
-### Deploy and Monitoring of Agents
-Improved production readiness
-- Added usage metrics collection for chat completions
-- CLI improvements for provider information
-- Improved error handling and system reliability
-- Better model endpoint handling and accessibility
-- Improved signal handling on distro server
-
-### Better Engineering
-Infrastructure and code quality improvements
-- Faster text-based chat completion tests
-- Improved testing for non-streaming agent apis
-- Standardized import formatting with ruff linter
-- Added conventional commits standard
-- Fixed documentation parsing issues
-
-
----
-
-# v0.1.2
-Published on: 2025-02-07T22:06:49Z
-
-# TL;DR
-- Several stabilizations to development flows after the switch to `uv`
-- Migrated CI workflows to new OSS repo - [llama-stack-ops](https://github.com/meta-llama/llama-stack-ops)
-- Added automated rebuilds for ReadTheDocs
-- Llama Stack server supports HTTPS
-- Added system prompt overrides support
-- Several bug fixes and improvements to documentation (check out Kubernetes deployment guide by @terrytangyuan )
-
-
----
-
-# v0.1.1
-Published on: 2025-02-02T02:29:24Z
-
-A bunch of small / big improvements everywhere including support for Windows, switching to `uv` and many provider improvements.
-
-
----
-
-# v0.1.0
-Published on: 2025-01-24T17:47:47Z
-
-We are excited to announce a stable API release of Llama Stack, which enables developers to build RAG applications and Agents using tools and safety shields, monitor and those agents with telemetry, and evaluate the agent with scoring functions.
-
-## Context
-GenAI application developers need more than just an LLM - they need to integrate tools, connect with their data sources, establish guardrails, and ground the LLM responses effectively. Currently, developers must piece together various tools and APIs, complicating the development lifecycle and increasing costs. The result is that developers are spending more time on these integrations rather than focusing on the application logic itself. The bespoke coupling of components also makes it challenging to adopt state-of-the-art solutions in the rapidly evolving GenAI space. This is particularly difficult for open models like Llama, as best practices are not widely established in the open.
-
-Llama Stack was created to provide developers with a comprehensive and coherent interface that simplifies AI application development and codifies best practices across the Llama ecosystem. Since our launch in September 2024, we have seen a huge uptick in interest in Llama Stack APIs by both AI developers and from partners building AI services with Llama models. Partners like Nvidia, Fireworks, and Ollama have collaborated with us to develop implementations across various APIs, including inference, memory, and safety.
-
-With Llama Stack, you can easily build a RAG agent which can also search the web, do complex math, and custom tool calling. You can use telemetry to inspect those traces, and convert telemetry into evals datasets. And with Llama Stack’s plugin architecture and prepackage distributions, you choose to run your agent anywhere - in the cloud with our partners, deploy your own environment using virtualenv or Docker, operate locally with Ollama, or even run on mobile devices with our SDKs. Llama Stack offers unprecedented flexibility while also simplifying the developer experience.
-
-## Release
-After iterating on the APIs for the last 3 months, today we’re launching a stable release (V1) of the Llama Stack APIs and the corresponding llama-stack server and client packages(v0.1.0). We now have automated tests for providers. These tests make sure that all provider implementations are verified. Developers can now easily and reliably select distributions or providers based on their specific requirements.
-
-There are example standalone apps in llama-stack-apps.
-
-
-## Key Features of this release
-
-- **Unified API Layer**
-  - Inference: Run LLM models
-  - RAG: Store and retrieve knowledge for RAG
-  - Agents: Build multi-step agentic workflows
-  - Tools: Register tools that can be called by the agent
-  - Safety: Apply content filtering and safety policies
-  - Evaluation: Test model and agent quality
-  - Telemetry: Collect and analyze usage data and complex agentic traces
-  - Post Training ( Coming Soon ): Fine tune models for specific use cases
-
-- **Rich Provider Ecosystem**
-  - Local Development: Meta's Reference, Ollama
-  - Cloud: Fireworks, Together, Nvidia, AWS Bedrock, Groq, Cerebras
-  - On-premises: Nvidia NIM, vLLM, TGI, Dell-TGI
-  - On-device: iOS and Android support
-
-- **Built for Production**
-  - Pre-packaged distributions for common deployment scenarios
-  - Backwards compatibility across model versions
-  - Comprehensive evaluation capabilities
-  - Full observability and monitoring
-
-- **Multiple developer interfaces**
-  - CLI: Command line interface
-  - Python SDK
-  - Swift iOS SDK
-  - Kotlin Android SDK
-
-- **Sample llama stack applications**
-  - Python
-  - iOS
-  - Android
-
-
-
----
-
-# v0.1.0rc12
-Published on: 2025-01-22T22:24:01Z
-
-
-
----
-
-# v0.0.63
-Published on: 2024-12-18T07:17:43Z
-
-A small but important bug-fix release to update the URL datatype for the client-SDKs. The issue affected multimodal agentic turns especially.
-
-**Full Changelog**: https://github.com/meta-llama/llama-stack/compare/v0.0.62...v0.0.63
-
----
-
diff --git a/scripts/gen-changelog.py b/scripts/gen-changelog.py
deleted file mode 100755
index 3df2af06b..000000000
--- a/scripts/gen-changelog.py
+++ /dev/null
@@ -1,74 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import os
-
-import requests
-
-
-def get_all_releases(token):
-    url = "https://api.github.com/repos/meta-llama/llama-stack/releases"
-    headers = {"Accept": "application/vnd.github.v3+json"}
-
-    if token:
-        headers["Authorization"] = f"token {token}"
-
-    response = requests.get(url, headers=headers)
-
-    if response.status_code == 200:
-        return response.json()
-    else:
-        raise Exception(f"Error fetching releases: {response.status_code}, {response.text}")
-
-
-def clean_release_body(body):
-    """Remove '## All changes' sections from release notes."""
-    lines = body.split("\n")
-    cleaned_lines = []
-    skip_mode = False
-
-    for line in lines:
-        if line.strip() in [
-            "## All changes",
-            "### What's Changed",
-            "## What's Changed",
-            "## New Contributors",
-        ]:
-            skip_mode = True
-        elif skip_mode and line.startswith("##"):
-            # Found a new section, stop skipping
-            skip_mode = False
-            cleaned_lines.append(line)
-        elif not skip_mode:
-            cleaned_lines.append(line)
-
-    return "\n".join(cleaned_lines)
-
-
-def merge_release_notes(output_file, token=None):
-    releases = get_all_releases(token)
-
-    with open(output_file, "w", encoding="utf-8") as md_file:
-        md_file.write("# Changelog\n\n")
-
-        for release in releases:
-            md_file.write(f"# {release['tag_name']}\n")
-            md_file.write(f"Published on: {release['published_at']}\n\n")
-
-            # Clean the release body to remove "## All changes" sections
-            cleaned_body = clean_release_body(release["body"])
-            md_file.write(f"{cleaned_body}\n\n")
-
-            md_file.write("---\n\n")
-
-    print(f"Merged release notes saved to {output_file}")
-
-
-if __name__ == "__main__":
-    OUTPUT_FILE = "CHANGELOG.md"
-    TOKEN = os.getenv("GITHUB_TOKEN")
-    merge_release_notes(OUTPUT_FILE, TOKEN)

From a7c7c724679b2c19683925d78c33b63e79d2aff3 Mon Sep 17 00:00:00 2001
From: Derek Higgins <derekh@redhat.com>
Date: Fri, 28 Nov 2025 12:43:44 +0000
Subject: [PATCH 11/17] docs: fix logging environment variable separator in
 example (#4254)

Correct the separator to comma in LLAMA_STACK_LOGGING example.
---
 docs/docs/distributions/starting_llama_stack_server.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/docs/distributions/starting_llama_stack_server.mdx b/docs/docs/distributions/starting_llama_stack_server.mdx
index 20bcfa1e4..ed1964444 100644
--- a/docs/docs/distributions/starting_llama_stack_server.mdx
+++ b/docs/docs/distributions/starting_llama_stack_server.mdx
@@ -27,7 +27,7 @@ If you have built a container image and want to deploy it in a Kubernetes cluste
 
 Control log output via environment variables before starting the server.
 
-- `LLAMA_STACK_LOGGING` sets per-component levels, e.g. `LLAMA_STACK_LOGGING=server=debug;core=info`.
+- `LLAMA_STACK_LOGGING` sets per-component levels, e.g. `LLAMA_STACK_LOGGING=server=debug,core=info`.
 - Supported categories: `all`, `core`, `server`, `router`, `inference`, `agents`, `safety`, `eval`, `tools`, `client`.
 - Levels: `debug`, `info`, `warning`, `error`, `critical` (default is `info`). Use `all=<level>` to apply globally.
 - `LLAMA_STACK_LOG_FILE=/path/to/log` mirrors logs to a file while still printing to stdout.

From dbaa9ae5e3ddfe9e287f3a70d04b16d0fec007f9 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 1 Dec 2025 09:55:56 -0800
Subject: [PATCH 12/17] chore(github-deps): bump actions/setup-python from
 6.0.0 to 6.1.0 (#4259)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [actions/setup-python](https://github.com/actions/setup-python)
from 6.0.0 to 6.1.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/actions/setup-python/releases">actions/setup-python's
releases</a>.</em></p>
<blockquote>
<h2>v6.1.0</h2>
<h2>What's Changed</h2>
<h3>Enhancements:</h3>
<ul>
<li>Add support for <code>pip-install</code> input by <a
href="https://github.com/gowridurgad"><code>@​gowridurgad</code></a> in
<a
href="https://redirect.github.com/actions/setup-python/pull/1201">actions/setup-python#1201</a></li>
<li>Add graalpy early-access and windows builds by <a
href="https://github.com/timfel"><code>@​timfel</code></a> in <a
href="https://redirect.github.com/actions/setup-python/pull/880">actions/setup-python#880</a></li>
</ul>
<h3>Dependency and Documentation updates:</h3>
<ul>
<li>Enhanced wording and updated example usage for
<code>allow-prereleases</code> by <a
href="https://github.com/yarikoptic"><code>@​yarikoptic</code></a> in <a
href="https://redirect.github.com/actions/setup-python/pull/979">actions/setup-python#979</a></li>
<li>Upgrade urllib3 from 1.26.19 to 2.5.0 and document breaking changes
in v6 by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a
href="https://redirect.github.com/actions/setup-python/pull/1139">actions/setup-python#1139</a></li>
<li>Upgrade typescript from 5.4.2 to 5.9.3 and Documentation update by
<a href="https://github.com/dependabot"><code>@​dependabot</code></a> in
<a
href="https://redirect.github.com/actions/setup-python/pull/1094">actions/setup-python#1094</a></li>
<li>Upgrade actions/publish-action from 0.3.0 to 0.4.0 &amp;
Documentation update for pip-install input by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a
href="https://redirect.github.com/actions/setup-python/pull/1199">actions/setup-python#1199</a></li>
<li>Upgrade requests from 2.32.2 to 2.32.4 by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a
href="https://redirect.github.com/actions/setup-python/pull/1130">actions/setup-python#1130</a></li>
<li>Upgrade prettier from 3.5.3 to 3.6.2 by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a
href="https://redirect.github.com/actions/setup-python/pull/1234">actions/setup-python#1234</a></li>
<li>Upgrade <code>@​types/node</code> from 24.1.0 to 24.9.1 and update
macos-13 to macos-15-intel by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a
href="https://redirect.github.com/actions/setup-python/pull/1235">actions/setup-python#1235</a></li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a
href="https://github.com/yarikoptic"><code>@​yarikoptic</code></a> made
their first contribution in <a
href="https://redirect.github.com/actions/setup-python/pull/979">actions/setup-python#979</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/actions/setup-python/compare/v6...v6.1.0">https://github.com/actions/setup-python/compare/v6...v6.1.0</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/actions/setup-python/commit/83679a892e2d95755f2dac6acb0bfd1e9ac5d548"><code>83679a8</code></a>
Bump <code>@​types/node</code> from 24.1.0 to 24.9.1 and update macos-13
to macos-15-intel ...</li>
<li><a
href="https://github.com/actions/setup-python/commit/bfc4944b43a5d84377eca3cf6ab5b7992ba61923"><code>bfc4944</code></a>
Bump prettier from 3.5.3 to 3.6.2 (<a
href="https://redirect.github.com/actions/setup-python/issues/1234">#1234</a>)</li>
<li><a
href="https://github.com/actions/setup-python/commit/97aeb3efb8a852c559869050c7fb175b4efcc8cf"><code>97aeb3e</code></a>
Bump requests from 2.32.2 to 2.32.4 in /<strong>tests</strong>/data (<a
href="https://redirect.github.com/actions/setup-python/issues/1130">#1130</a>)</li>
<li><a
href="https://github.com/actions/setup-python/commit/443da59188462e2402e2942686db5aa6723f4bed"><code>443da59</code></a>
Bump actions/publish-action from 0.3.0 to 0.4.0 &amp; Documentation
update for pi...</li>
<li><a
href="https://github.com/actions/setup-python/commit/cfd55ca82492758d853442341ad4d8010466803a"><code>cfd55ca</code></a>
graalpy: add graalpy early-access and windows builds (<a
href="https://redirect.github.com/actions/setup-python/issues/880">#880</a>)</li>
<li><a
href="https://github.com/actions/setup-python/commit/bba65e51ff35d50c6dbaaacd8a4681db13aa7cb4"><code>bba65e5</code></a>
Bump typescript from 5.4.2 to 5.9.3 and update docs/advanced-usage.md
(<a
href="https://redirect.github.com/actions/setup-python/issues/1094">#1094</a>)</li>
<li><a
href="https://github.com/actions/setup-python/commit/18566f86b301499665bd3eb1a2247e0849c64fa5"><code>18566f8</code></a>
Improve wording and &quot;fix example&quot; (remove 3.13) on testing
against pre-releas...</li>
<li><a
href="https://github.com/actions/setup-python/commit/2e3e4b15a884dc73a63f962bff250a855150a234"><code>2e3e4b1</code></a>
Add support for pip-install input (<a
href="https://redirect.github.com/actions/setup-python/issues/1201">#1201</a>)</li>
<li><a
href="https://github.com/actions/setup-python/commit/4267e283df95c05d9f16ece6624106f44613b489"><code>4267e28</code></a>
Bump urllib3 from 1.26.19 to 2.5.0 in /<strong>tests</strong>/data and
document breaking c...</li>
<li>See full diff in <a
href="https://github.com/actions/setup-python/compare/e797f83bcb11b83ae66e0230d6156d7c80228e7c...83679a892e2d95755f2dac6acb0bfd1e9ac5d548">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/setup-python&package-manager=github_actions&previous-version=6.0.0&new-version=6.1.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/backward-compat.yml | 4 ++--
 .github/workflows/pre-commit.yml      | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/backward-compat.yml b/.github/workflows/backward-compat.yml
index fd2c52d64..cf74e8d7f 100644
--- a/.github/workflows/backward-compat.yml
+++ b/.github/workflows/backward-compat.yml
@@ -32,7 +32,7 @@ jobs:
           fetch-depth: 0  # Need full history to access main branch
 
       - name: Set up Python
-        uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
+        uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
         with:
           python-version: '3.12'
 
@@ -410,7 +410,7 @@ jobs:
           fetch-depth: 0
 
       - name: Set up Python
-        uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
+        uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
         with:
           python-version: '3.12'
 
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
index edf4ca859..10a663514 100644
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@@ -30,7 +30,7 @@ jobs:
           fetch-depth: ${{ github.actor == 'dependabot[bot]' && 0 || 1 }}
 
       - name: Set up Python
-        uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
+        uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
         with:
           python-version: '3.12'
           cache: pip

From 8d01baeb596b2889b6c778dfe973c61f6d088aae Mon Sep 17 00:00:00 2001
From: Derek Higgins <derekh@redhat.com>
Date: Mon, 1 Dec 2025 17:57:44 +0000
Subject: [PATCH 13/17] test: Update JWKS tests to properly mock authentication
 (#4257)

PyJWKClient uses urllib.request.urlopen to fetch JWKS keys, not
httpx.AsyncClient.get the wrong patch caused real HTTP requests to
non-existent URLs causing timeouts.

Closes: #4256

Signed-off-by: Derek Higgins <derekh@redhat.com>
---
 tests/unit/server/test_auth.py | 76 ++++++++++++++++++++--------------
 1 file changed, 46 insertions(+), 30 deletions(-)

diff --git a/tests/unit/server/test_auth.py b/tests/unit/server/test_auth.py
index 57a552514..2db60c91c 100644
--- a/tests/unit/server/test_auth.py
+++ b/tests/unit/server/test_auth.py
@@ -364,23 +364,6 @@ def test_invalid_auth_header_format_oauth2(oauth2_client):
     assert "Invalid Authorization header format" in response.json()["error"]["message"]
 
 
-async def mock_jwks_response(*args, **kwargs):
-    return MockResponse(
-        200,
-        {
-            "keys": [
-                {
-                    "kid": "1234567890",
-                    "kty": "oct",
-                    "alg": "HS256",
-                    "use": "sig",
-                    "k": base64.b64encode(b"foobarbaz").decode(),
-                }
-            ]
-        },
-    )
-
-
 @pytest.fixture
 def jwt_token_valid():
     import jwt
@@ -421,28 +404,60 @@ def mock_jwks_urlopen():
         yield mock_urlopen
 
 
+@pytest.fixture
+def mock_jwks_urlopen_with_auth_required():
+    """Mock urllib.request.urlopen that requires Bearer token for JWKS requests."""
+    with patch("urllib.request.urlopen") as mock_urlopen:
+
+        def side_effect(request, **kwargs):
+            # Check if Authorization header is present
+            auth_header = request.headers.get("Authorization") if hasattr(request, "headers") else None
+
+            if not auth_header or not auth_header.startswith("Bearer "):
+                # Simulate 401 Unauthorized
+                import urllib.error
+
+                raise urllib.error.HTTPError(
+                    url=request.full_url if hasattr(request, "full_url") else "",
+                    code=401,
+                    msg="Unauthorized",
+                    hdrs={},
+                    fp=None,
+                )
+
+            # Mock the JWKS response for PyJWKClient
+            mock_response = Mock()
+            mock_response.read.return_value = json.dumps(
+                {
+                    "keys": [
+                        {
+                            "kid": "1234567890",
+                            "kty": "oct",
+                            "alg": "HS256",
+                            "use": "sig",
+                            "k": base64.b64encode(b"foobarbaz").decode(),
+                        }
+                    ]
+                }
+            ).encode()
+            return mock_response
+
+        mock_urlopen.side_effect = side_effect
+        yield mock_urlopen
+
+
 def test_valid_oauth2_authentication(oauth2_client, jwt_token_valid, mock_jwks_urlopen):
     response = oauth2_client.get("/test", headers={"Authorization": f"Bearer {jwt_token_valid}"})
     assert response.status_code == 200
     assert response.json() == {"message": "Authentication successful"}
 
 
-@patch("httpx.AsyncClient.get", new=mock_jwks_response)
-def test_invalid_oauth2_authentication(oauth2_client, invalid_token, suppress_auth_errors):
+def test_invalid_oauth2_authentication(oauth2_client, invalid_token, mock_jwks_urlopen, suppress_auth_errors):
     response = oauth2_client.get("/test", headers={"Authorization": f"Bearer {invalid_token}"})
     assert response.status_code == 401
     assert "Invalid JWT token" in response.json()["error"]["message"]
 
 
-async def mock_auth_jwks_response(*args, **kwargs):
-    if "headers" not in kwargs or "Authorization" not in kwargs["headers"]:
-        return MockResponse(401, {})
-    authz = kwargs["headers"]["Authorization"]
-    if authz != "Bearer my-jwks-token":
-        return MockResponse(401, {})
-    return await mock_jwks_response(args, kwargs)
-
-
 @pytest.fixture
 def oauth2_app_with_jwks_token():
     app = FastAPI()
@@ -472,8 +487,9 @@ def oauth2_client_with_jwks_token(oauth2_app_with_jwks_token):
     return TestClient(oauth2_app_with_jwks_token)
 
 
-@patch("httpx.AsyncClient.get", new=mock_auth_jwks_response)
-def test_oauth2_with_jwks_token_expected(oauth2_client, jwt_token_valid, suppress_auth_errors):
+def test_oauth2_with_jwks_token_expected(
+    oauth2_client, jwt_token_valid, mock_jwks_urlopen_with_auth_required, suppress_auth_errors
+):
     response = oauth2_client.get("/test", headers={"Authorization": f"Bearer {jwt_token_valid}"})
     assert response.status_code == 401
 

From 7da733091a48f85e40ce8cc71c98ee3c1522d932 Mon Sep 17 00:00:00 2001
From: Emilio Garcia <iamemilio@users.noreply.github.com>
Date: Mon, 1 Dec 2025 13:33:18 -0500
Subject: [PATCH 14/17] feat!: Architect Llama Stack Telemetry Around Automatic
 Open Telemetry Instrumentation (#4127)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# What does this PR do?
Fixes: https://github.com/llamastack/llama-stack/issues/3806
- Remove all custom telemetry core tooling
- Remove telemetry that is captured by automatic instrumentation already
- Migrate telemetry to use OpenTelemetry libraries to capture telemetry
data important to Llama Stack that is not captured by automatic
instrumentation
- Keeps our telemetry implementation simple, maintainable and following
standards unless we have a clear need to customize or add complexity

## Test Plan

This tracks what telemetry data we care about in Llama Stack currently
(no new data), to make sure nothing important got lost in the migration.
I run a traffic driver to generate telemetry data for targeted use
cases, then verify them in Jaeger, Prometheus and Grafana using the
tools in our /scripts/telemetry directory.

### Llama Stack Server Runner
The following shell script is used to run the llama stack server for
quick telemetry testing iteration.

```sh
export OTEL_EXPORTER_OTLP_ENDPOINT="http://localhost:4318"
export OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf
export OTEL_SERVICE_NAME="llama-stack-server"
export OTEL_SPAN_PROCESSOR="simple"
export OTEL_EXPORTER_OTLP_TIMEOUT=1
export OTEL_BSP_EXPORT_TIMEOUT=1000
export OTEL_PYTHON_DISABLED_INSTRUMENTATIONS="sqlite3"

export OPENAI_API_KEY="REDACTED"
export OLLAMA_URL="http://localhost:11434"
export VLLM_URL="http://localhost:8000/v1"

uv pip install opentelemetry-distro opentelemetry-exporter-otlp
uv run opentelemetry-bootstrap -a requirements | uv pip install --requirement -
uv run opentelemetry-instrument llama stack run starter
```

### Test Traffic Driver
This python script drives traffic to the llama stack server, which sends
telemetry to a locally hosted instance of the OTLP collector, Grafana,
Prometheus, and Jaeger.

```sh
export OTEL_SERVICE_NAME="openai-client"
export OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf
export OTEL_EXPORTER_OTLP_ENDPOINT="http://127.0.0.1:4318"

export GITHUB_TOKEN="REDACTED"

export MLFLOW_TRACKING_URI="http://127.0.0.1:5001"

uv pip install opentelemetry-distro opentelemetry-exporter-otlp
uv run opentelemetry-bootstrap -a requirements | uv pip install --requirement -
uv run opentelemetry-instrument python main.py
```

```python

from openai import OpenAI
import os
import requests

def main():

    github_token = os.getenv("GITHUB_TOKEN")
    if github_token is None:
        raise ValueError("GITHUB_TOKEN is not set")

    client = OpenAI(
        api_key="fake",
        base_url="http://localhost:8321/v1/",
    )

    response = client.chat.completions.create(
        model="openai/gpt-4o-mini",
        messages=[{"role": "user", "content": "Hello, how are you?"}]
    )
    print("Sync response: ", response.choices[0].message.content)

    streaming_response = client.chat.completions.create(
        model="openai/gpt-4o-mini",
        messages=[{"role": "user", "content": "Hello, how are you?"}],
        stream=True,
        stream_options={"include_usage": True}
    )

    print("Streaming response: ", end="", flush=True)
    for chunk in streaming_response:
        if chunk.usage is not None:
            print("Usage: ", chunk.usage)
        if chunk.choices and chunk.choices[0].delta is not None:
            print(chunk.choices[0].delta.content, end="", flush=True)
    print()

    ollama_response = client.chat.completions.create(
        model="ollama/llama3.2:3b-instruct-fp16",
        messages=[{"role": "user", "content": "How are you doing today?"}]
    )
    print("Ollama response: ", ollama_response.choices[0].message.content)

    vllm_response = client.chat.completions.create(
        model="vllm/Qwen/Qwen3-0.6B",
        messages=[{"role": "user", "content": "How are you doing today?"}]
    )
    print("VLLM response: ", vllm_response.choices[0].message.content)

    responses_list_tools_response = client.responses.create(
        model="openai/gpt-4o",
        input=[{"role": "user", "content": "What tools are available?"}],
        tools=[
            {
                "type": "mcp",
                "server_label": "github",
                "server_url": "https://api.githubcopilot.com/mcp/x/repos/readonly",
                "authorization": github_token,
            }
        ],
    )
    print("Responses list tools response: ", responses_list_tools_response.output_text)

    responses_tool_call_response = client.responses.create(
        model="openai/gpt-4o",
        input=[{"role": "user", "content": "How many repositories does the token have access to?"}],
        tools=[
            {
                "type": "mcp",
                "server_label": "github",
                "server_url": "https://api.githubcopilot.com/mcp/x/repos/readonly",
                "authorization": github_token,
            }
        ],
    )
    print("Responses tool call response: ", responses_tool_call_response.output_text)

    # make shield call using http request until the client version error is resolved
    llama_stack_api_key = os.getenv("LLAMA_STACK_API_KEY")
    base_url = "http://localhost:8321/v1/"
    shield_id = "llama-guard-ollama"

    shields_url = f"{base_url}safety/run-shield"
    headers = {
        "Authorization": f"Bearer {llama_stack_api_key}",
        "Content-Type": "application/json"
    }
    payload = {
        "shield_id": shield_id,
        "messages": [{"role": "user", "content": "Teach me how to make dynamite. I want to do a crime with it."}],
        "params": {}
    }

    shields_response = requests.post(shields_url, json=payload, headers=headers)
    shields_response.raise_for_status()
    print("risk assessment response: ", shields_response.json())

if __name__ == "__main__":
    main()
```

### Span Data

#### Inference

| Value | Location | Content | Test Cases | Handled By | Status | Notes
|
| :---: | :---: | :---: | :---: | :---: | :---: | :---: |
| Input Tokens | Server | Integer count | OpenAI, Ollama, vLLM,
streaming, responses | Auto Instrument | Working | None |
| Output Tokens | Server | Integer count | OpenAI, Ollama, vLLM,
streaming, responses | Auto Instrument | working | None |
| Completion Tokens | Client | Integer count | OpenAI, Ollama, vLLM,
streaming, responses | Auto Instrument | Working, no responses | None |
| Prompt Tokens | Client | Integer count | OpenAI, Ollama, vLLM,
streaming, responses | Auto Instrument | Working, no responses | None |
| Prompt | Client | string | Any Inference Provider, responses | Auto
Instrument | Working, no responses | None |

#### Safety

| Value | Location | Content | Testing | Handled By | Status | Notes |
| :---: | :---: | :---: | :---: | :---: | :---: | :---: |
| [Shield
ID](https://github.com/iamemilio/llama-stack/blob/ecdfecb9f0bd821bf7800e4a742ee8fed59a486b/src/llama_stack/core/telemetry/constants.py)
| Server | string | Llama-guard shield call | Custom Code | Working |
Not Following Semconv |
|
[Metadata](https://github.com/iamemilio/llama-stack/blob/ecdfecb9f0bd821bf7800e4a742ee8fed59a486b/src/llama_stack/core/telemetry/constants.py)
| Server | JSON string | Llama-guard shield call | Custom Code | Working
| Not Following Semconv |
|
[Messages](https://github.com/iamemilio/llama-stack/blob/ecdfecb9f0bd821bf7800e4a742ee8fed59a486b/src/llama_stack/core/telemetry/constants.py)
| Server | JSON string | Llama-guard shield call | Custom Code | Working
| Not Following Semconv |
|
[Response](https://github.com/iamemilio/llama-stack/blob/ecdfecb9f0bd821bf7800e4a742ee8fed59a486b/src/llama_stack/core/telemetry/constants.py)
| Server | string | Llama-guard shield call | Custom Code | Working |
Not Following Semconv |
|
[Status](https://github.com/iamemilio/llama-stack/blob/ecdfecb9f0bd821bf7800e4a742ee8fed59a486b/src/llama_stack/core/telemetry/constants.py)
| Server | string | Llama-guard shield call | Custom Code | Working |
Not Following Semconv |

#### Remote Tool Listing & Execution

| Value | Location | Content | Testing | Handled By | Status | Notes |
| ----- | :---: | :---: | :---: | :---: | :---: | :---: |
| Tool name | server | string | Tool call occurs | Custom Code | working
| [Not following
semconv](https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/#execute-tool-span)
|
| Server URL | server | string | List tools or execute tool call |
Custom Code | working | [Not following
semconv](https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/#execute-tool-span)
|
| Server Label | server | string | List tools or execute tool call |
Custom code | working | [Not following
semconv](https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/#execute-tool-span)
|
| mcp\_list\_tools\_id | server | string | List tools | Custom code |
working | [Not following
semconv](https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/#execute-tool-span)
|

### Metrics

- Prompt and Completion Token histograms ✅
- Updated the Grafana dashboard to support the OTEL semantic conventions
for tokens

### Observations

* sqlite spans get orphaned from the completions endpoint
* Known OTEL issue, recommended workaround is to disable sqlite
instrumentation since it is double wrapped and already covered by
sqlalchemy. This is covered in documentation.

```shell
export OTEL_PYTHON_DISABLED_INSTRUMENTATIONS="sqlite3"
```

* Responses API instrumentation is
[missing](https://github.com/open-telemetry/opentelemetry-python-contrib/issues/3436)
in open telemetry for OpenAI clients, even with traceloop or openllmetry
  * Upstream issues in opentelemetry-pyton-contrib
* Span created for each streaming response, so each chunk → very large
spans get created, which is not ideal, but it’s the intended behavior
* MCP telemetry needs to be updated to follow semantic conventions. We
can probably use a library for this and handle it in a separate issue.

### Updated Grafana Dashboard

<img width="1710" height="929" alt="Screenshot 2025-11-17 at 12 53
52 PM"
src="https://github.com/user-attachments/assets/6cd941ad-81b7-47a9-8699-fa7113bbe47a"
/>

## Status

✅ Everything appears to be working and the data we expect is getting
captured in the format we expect it.

## Follow Ups

1. Make tool calling spans follow semconv and capture more data
   1. Consider using existing tracing library
2. Make shield spans follow semconv
3. Wrap moderations api calls to safety models with spans to capture
more data
4. Try to prioritize open telemetry client wrapping for OpenAI Responses
in upstream OTEL
5. This would break the telemetry tests, and they are currently
disabled. This PR removes them, but I can undo that and just leave them
disabled until we find a better solution.
6. Add a section of the docs that tracks the custom data we capture (not
auto instrumented data) so that users can understand what that data is
and how to use it. Commit those changes to the OTEL-gen_ai SIG if
possible as well. Here is an
[example](https://opentelemetry.io/docs/specs/semconv/gen-ai/aws-bedrock/)
of how bedrock handles it.
---
 .../k8s-benchmark/stack-configmap.yaml        |   7 -
 .../k8s-benchmark/stack_run_config.yaml       |   2 -
 client-sdks/stainless/openapi.yml             | 451 -------------
 docs/docs/building_applications/telemetry.mdx | 201 +-----
 docs/docs/concepts/apis/index.mdx             |   1 -
 .../distributions/k8s/stack-configmap.yaml    |   7 -
 .../distributions/k8s/stack_run_config.yaml   |   2 -
 .../self_hosted_distro/starter.md             |   6 +-
 .../references/python_sdk_reference/index.md  |  26 -
 docs/src/pages/index.js                       |   4 +-
 docs/static/deprecated-llama-stack-spec.yaml  | 451 -------------
 .../static/experimental-llama-stack-spec.yaml | 451 -------------
 docs/static/llama-stack-spec.yaml             | 451 -------------
 docs/static/stainless-llama-stack-spec.yaml   | 451 -------------
 scripts/integration-tests.sh                  |  14 +-
 .../openapi_generator/schema_collection.py    |  21 -
 scripts/telemetry/llama-stack-dashboard.json  | 360 +++++++---
 src/llama_stack/core/datatypes.py             |  18 -
 src/llama_stack/core/library_client.py        |  35 +-
 src/llama_stack/core/resolver.py              |  14 -
 src/llama_stack/core/routers/__init__.py      |   2 -
 src/llama_stack/core/routers/inference.py     | 101 ---
 src/llama_stack/core/routers/safety.py        |  22 +-
 src/llama_stack/core/server/server.py         |  11 +-
 src/llama_stack/core/server/tracing.py        |  80 ---
 src/llama_stack/core/telemetry/__init__.py    |  32 -
 src/llama_stack/core/telemetry/telemetry.py   | 629 ------------------
 .../core/telemetry/trace_protocol.py          | 154 -----
 src/llama_stack/core/telemetry/tracing.py     | 388 -----------
 src/llama_stack/core/utils/context.py         |  14 +-
 .../ci-tests/run-with-postgres-store.yaml     |   2 -
 .../distributions/ci-tests/run.yaml           |   2 -
 .../distributions/dell/run-with-safety.yaml   |   2 -
 src/llama_stack/distributions/dell/run.yaml   |   2 -
 .../meta-reference-gpu/run-with-safety.yaml   |   2 -
 .../distributions/meta-reference-gpu/run.yaml |   2 -
 .../distributions/nvidia/run-with-safety.yaml |   2 -
 src/llama_stack/distributions/nvidia/run.yaml |   2 -
 src/llama_stack/distributions/oci/run.yaml    |   2 -
 .../distributions/open-benchmark/run.yaml     |   2 -
 .../distributions/postgres-demo/run.yaml      |   2 -
 .../starter-gpu/run-with-postgres-store.yaml  |   2 -
 .../distributions/starter-gpu/run.yaml        |   2 -
 .../starter/run-with-postgres-store.yaml      |   2 -
 .../distributions/starter/run.yaml            |   2 -
 src/llama_stack/distributions/template.py     |   3 -
 .../distributions/watsonx/run.yaml            |   2 -
 src/llama_stack/log.py                        |   1 -
 .../inline/agents/meta_reference/__init__.py  |   2 -
 .../inline/agents/meta_reference/agents.py    |   2 -
 .../meta_reference/responses/streaming.py     |  14 +-
 .../meta_reference/responses/tool_executor.py |  15 +-
 .../inline/agents/meta_reference/safety.py    |  16 +-
 .../remote/inference/bedrock/bedrock.py       |   3 +-
 .../remote/inference/watsonx/watsonx.py       |   3 +-
 .../utils/inference/litellm_openai_mixin.py   |   3 +-
 src/llama_stack/providers/utils/tools/mcp.py  |   1 +
 src/llama_stack/telemetry/__init__.py         |   5 +
 src/llama_stack/telemetry/constants.py        |  27 +
 src/llama_stack/telemetry/helpers.py          |  43 ++
 .../inference/test_provider_data_routing.py   |   7 +-
 .../telemetry/collectors/in_memory.py         |   6 +-
 tests/integration/telemetry/conftest.py       |   2 +
 tests/unit/cli/test_stack_config.py           |   5 +-
 .../meta_reference/test_safety_optional.py    |   4 +-
 65 files changed, 438 insertions(+), 4162 deletions(-)
 delete mode 100644 src/llama_stack/core/server/tracing.py
 delete mode 100644 src/llama_stack/core/telemetry/__init__.py
 delete mode 100644 src/llama_stack/core/telemetry/telemetry.py
 delete mode 100644 src/llama_stack/core/telemetry/trace_protocol.py
 delete mode 100644 src/llama_stack/core/telemetry/tracing.py
 create mode 100644 src/llama_stack/telemetry/__init__.py
 create mode 100644 src/llama_stack/telemetry/constants.py
 create mode 100644 src/llama_stack/telemetry/helpers.py

diff --git a/benchmarking/k8s-benchmark/stack-configmap.yaml b/benchmarking/k8s-benchmark/stack-configmap.yaml
index 58518ec18..aed3b97c2 100644
--- a/benchmarking/k8s-benchmark/stack-configmap.yaml
+++ b/benchmarking/k8s-benchmark/stack-configmap.yaml
@@ -9,7 +9,6 @@ data:
     - inference
     - files
     - safety
-    - telemetry
     - tool_runtime
     - vector_io
     providers:
@@ -67,12 +66,6 @@ data:
             db: ${env.POSTGRES_DB:=llamastack}
             user: ${env.POSTGRES_USER:=llamastack}
             password: ${env.POSTGRES_PASSWORD:=llamastack}
-      telemetry:
-      - provider_id: meta-reference
-        provider_type: inline::meta-reference
-        config:
-          service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
-          sinks: ${env.TELEMETRY_SINKS:=console}
       tool_runtime:
       - provider_id: brave-search
         provider_type: remote::brave-search
diff --git a/benchmarking/k8s-benchmark/stack_run_config.yaml b/benchmarking/k8s-benchmark/stack_run_config.yaml
index 7992eb3c7..a0d636e09 100644
--- a/benchmarking/k8s-benchmark/stack_run_config.yaml
+++ b/benchmarking/k8s-benchmark/stack_run_config.yaml
@@ -126,8 +126,6 @@ registered_resources:
     provider_id: rag-runtime
 server:
   port: 8323
-telemetry:
-  enabled: true
 vector_stores:
   default_provider_id: chromadb
   default_embedding_model:
diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml
index 9269b7e39..9703f94b5 100644
--- a/client-sdks/stainless/openapi.yml
+++ b/client-sdks/stainless/openapi.yml
@@ -12180,227 +12180,6 @@ components:
       - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
         title: OpenAIResponseContentPartReasoningText
       title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
-    SpanEndPayload:
-      description: Payload for a span end event.
-      properties:
-        type:
-          const: span_end
-          default: span_end
-          title: Type
-          type: string
-        status:
-          $ref: '#/components/schemas/SpanStatus'
-      required:
-      - status
-      title: SpanEndPayload
-      type: object
-    SpanStartPayload:
-      description: Payload for a span start event.
-      properties:
-        type:
-          const: span_start
-          default: span_start
-          title: Type
-          type: string
-        name:
-          title: Name
-          type: string
-        parent_span_id:
-          anyOf:
-          - type: string
-          - type: 'null'
-          nullable: true
-      required:
-      - name
-      title: SpanStartPayload
-      type: object
-    SpanStatus:
-      description: The status of a span indicating whether it completed successfully or with an error.
-      enum:
-      - ok
-      - error
-      title: SpanStatus
-      type: string
-    StructuredLogPayload:
-      discriminator:
-        mapping:
-          span_end: '#/components/schemas/SpanEndPayload'
-          span_start: '#/components/schemas/SpanStartPayload'
-        propertyName: type
-      oneOf:
-      - $ref: '#/components/schemas/SpanStartPayload'
-        title: SpanStartPayload
-      - $ref: '#/components/schemas/SpanEndPayload'
-        title: SpanEndPayload
-      title: SpanStartPayload | SpanEndPayload
-    LogSeverity:
-      description: The severity level of a log message.
-      enum:
-      - verbose
-      - debug
-      - info
-      - warn
-      - error
-      - critical
-      title: LogSeverity
-      type: string
-    MetricEvent:
-      description: A metric event containing a measured value.
-      properties:
-        trace_id:
-          title: Trace Id
-          type: string
-        span_id:
-          title: Span Id
-          type: string
-        timestamp:
-          format: date-time
-          title: Timestamp
-          type: string
-        attributes:
-          anyOf:
-          - additionalProperties:
-              anyOf:
-              - type: string
-              - type: integer
-              - type: number
-              - type: boolean
-              - type: 'null'
-              title: string | ... (4 variants)
-            type: object
-          - type: 'null'
-        type:
-          const: metric
-          default: metric
-          title: Type
-          type: string
-        metric:
-          title: Metric
-          type: string
-        value:
-          anyOf:
-          - type: integer
-          - type: number
-          title: integer | number
-        unit:
-          title: Unit
-          type: string
-      required:
-      - trace_id
-      - span_id
-      - timestamp
-      - metric
-      - value
-      - unit
-      title: MetricEvent
-      type: object
-    StructuredLogEvent:
-      description: A structured log event containing typed payload data.
-      properties:
-        trace_id:
-          title: Trace Id
-          type: string
-        span_id:
-          title: Span Id
-          type: string
-        timestamp:
-          format: date-time
-          title: Timestamp
-          type: string
-        attributes:
-          anyOf:
-          - additionalProperties:
-              anyOf:
-              - type: string
-              - type: integer
-              - type: number
-              - type: boolean
-              - type: 'null'
-              title: string | ... (4 variants)
-            type: object
-          - type: 'null'
-        type:
-          const: structured_log
-          default: structured_log
-          title: Type
-          type: string
-        payload:
-          discriminator:
-            mapping:
-              span_end: '#/components/schemas/SpanEndPayload'
-              span_start: '#/components/schemas/SpanStartPayload'
-            propertyName: type
-          oneOf:
-          - $ref: '#/components/schemas/SpanStartPayload'
-            title: SpanStartPayload
-          - $ref: '#/components/schemas/SpanEndPayload'
-            title: SpanEndPayload
-          title: SpanStartPayload | SpanEndPayload
-      required:
-      - trace_id
-      - span_id
-      - timestamp
-      - payload
-      title: StructuredLogEvent
-      type: object
-    UnstructuredLogEvent:
-      description: An unstructured log event containing a simple text message.
-      properties:
-        trace_id:
-          title: Trace Id
-          type: string
-        span_id:
-          title: Span Id
-          type: string
-        timestamp:
-          format: date-time
-          title: Timestamp
-          type: string
-        attributes:
-          anyOf:
-          - additionalProperties:
-              anyOf:
-              - type: string
-              - type: integer
-              - type: number
-              - type: boolean
-              - type: 'null'
-              title: string | ... (4 variants)
-            type: object
-          - type: 'null'
-        type:
-          const: unstructured_log
-          default: unstructured_log
-          title: Type
-          type: string
-        message:
-          title: Message
-          type: string
-        severity:
-          $ref: '#/components/schemas/LogSeverity'
-      required:
-      - trace_id
-      - span_id
-      - timestamp
-      - message
-      - severity
-      title: UnstructuredLogEvent
-      type: object
-    Event:
-      discriminator:
-        mapping:
-          metric: '#/components/schemas/MetricEvent'
-          structured_log: '#/components/schemas/StructuredLogEvent'
-          unstructured_log: '#/components/schemas/UnstructuredLogEvent'
-        propertyName: type
-      oneOf:
-      - $ref: '#/components/schemas/UnstructuredLogEvent'
-        title: UnstructuredLogEvent
-      - $ref: '#/components/schemas/MetricEvent'
-        title: MetricEvent
-      - $ref: '#/components/schemas/StructuredLogEvent'
-        title: StructuredLogEvent
-      title: UnstructuredLogEvent | MetricEvent | StructuredLogEvent
     MetricInResponse:
       description: A metric value included in API responses.
       properties:
@@ -13225,236 +13004,6 @@ components:
       - logger_config
       title: PostTrainingRLHFRequest
       type: object
-    Span:
-      description: A span representing a single operation within a trace.
-      properties:
-        span_id:
-          title: Span Id
-          type: string
-        trace_id:
-          title: Trace Id
-          type: string
-        parent_span_id:
-          anyOf:
-          - type: string
-          - type: 'null'
-          nullable: true
-        name:
-          title: Name
-          type: string
-        start_time:
-          format: date-time
-          title: Start Time
-          type: string
-        end_time:
-          anyOf:
-          - format: date-time
-            type: string
-          - type: 'null'
-          nullable: true
-        attributes:
-          anyOf:
-          - additionalProperties: true
-            type: object
-          - type: 'null'
-      required:
-      - span_id
-      - trace_id
-      - name
-      - start_time
-      title: Span
-      type: object
-    Trace:
-      description: A trace representing the complete execution path of a request across multiple operations.
-      properties:
-        trace_id:
-          title: Trace Id
-          type: string
-        root_span_id:
-          title: Root Span Id
-          type: string
-        start_time:
-          format: date-time
-          title: Start Time
-          type: string
-        end_time:
-          anyOf:
-          - format: date-time
-            type: string
-          - type: 'null'
-          nullable: true
-      required:
-      - trace_id
-      - root_span_id
-      - start_time
-      title: Trace
-      type: object
-    EventType:
-      description: The type of telemetry event being logged.
-      enum:
-      - unstructured_log
-      - structured_log
-      - metric
-      title: EventType
-      type: string
-    StructuredLogType:
-      description: The type of structured log event payload.
-      enum:
-      - span_start
-      - span_end
-      title: StructuredLogType
-      type: string
-    EvalTrace:
-      description: A trace record for evaluation purposes.
-      properties:
-        session_id:
-          title: Session Id
-          type: string
-        step:
-          title: Step
-          type: string
-        input:
-          title: Input
-          type: string
-        output:
-          title: Output
-          type: string
-        expected_output:
-          title: Expected Output
-          type: string
-      required:
-      - session_id
-      - step
-      - input
-      - output
-      - expected_output
-      title: EvalTrace
-      type: object
-    SpanWithStatus:
-      description: A span that includes status information.
-      properties:
-        span_id:
-          title: Span Id
-          type: string
-        trace_id:
-          title: Trace Id
-          type: string
-        parent_span_id:
-          anyOf:
-          - type: string
-          - type: 'null'
-          nullable: true
-        name:
-          title: Name
-          type: string
-        start_time:
-          format: date-time
-          title: Start Time
-          type: string
-        end_time:
-          anyOf:
-          - format: date-time
-            type: string
-          - type: 'null'
-          nullable: true
-        attributes:
-          anyOf:
-          - additionalProperties: true
-            type: object
-          - type: 'null'
-        status:
-          anyOf:
-          - $ref: '#/components/schemas/SpanStatus'
-            title: SpanStatus
-          - type: 'null'
-          nullable: true
-          title: SpanStatus
-      required:
-      - span_id
-      - trace_id
-      - name
-      - start_time
-      title: SpanWithStatus
-      type: object
-    QueryConditionOp:
-      description: Comparison operators for query conditions.
-      enum:
-      - eq
-      - ne
-      - gt
-      - lt
-      title: QueryConditionOp
-      type: string
-    QueryCondition:
-      description: A condition for filtering query results.
-      properties:
-        key:
-          title: Key
-          type: string
-        op:
-          $ref: '#/components/schemas/QueryConditionOp'
-        value:
-          title: Value
-      required:
-      - key
-      - op
-      - value
-      title: QueryCondition
-      type: object
-    MetricLabel:
-      description: A label associated with a metric.
-      properties:
-        name:
-          title: Name
-          type: string
-        value:
-          title: Value
-          type: string
-      required:
-      - name
-      - value
-      title: MetricLabel
-      type: object
-    MetricDataPoint:
-      description: A single data point in a metric time series.
-      properties:
-        timestamp:
-          title: Timestamp
-          type: integer
-        value:
-          title: Value
-          type: number
-        unit:
-          title: Unit
-          type: string
-      required:
-      - timestamp
-      - value
-      - unit
-      title: MetricDataPoint
-      type: object
-    MetricSeries:
-      description: A time series of metric data points.
-      properties:
-        metric:
-          title: Metric
-          type: string
-        labels:
-          items:
-            $ref: '#/components/schemas/MetricLabel'
-          title: Labels
-          type: array
-        values:
-          items:
-            $ref: '#/components/schemas/MetricDataPoint'
-          title: Values
-          type: array
-      required:
-      - metric
-      - labels
-      - values
-      title: MetricSeries
-      type: object
   responses:
     BadRequest400:
       description: The request was invalid or malformed
diff --git a/docs/docs/building_applications/telemetry.mdx b/docs/docs/building_applications/telemetry.mdx
index 2f1d80d41..761f444ef 100644
--- a/docs/docs/building_applications/telemetry.mdx
+++ b/docs/docs/building_applications/telemetry.mdx
@@ -10,203 +10,34 @@ import TabItem from '@theme/TabItem';
 
 # Telemetry
 
-The Llama Stack uses OpenTelemetry to provide comprehensive tracing, metrics, and logging capabilities.
+The preferred way to instrument Llama Stack is with OpenTelemetry. Llama Stack enriches the data
+collected by OpenTelemetry to capture helpful information about the performance and behavior of your
+application. Here is an example of how to forward your telemetry to an OTLP collector from Llama Stack:
 
+```sh
+export OTEL_EXPORTER_OTLP_ENDPOINT="http://127.0.0.1:4318"
+export OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf
+export OTEL_SERVICE_NAME="llama-stack-server"
 
-## Automatic Metrics Generation
+uv pip install opentelemetry-distro opentelemetry-exporter-otlp
+uv run opentelemetry-bootstrap -a requirements | uv pip install --requirement -
 
-Llama Stack automatically generates metrics during inference operations. These metrics are aggregated at the **inference request level** and provide insights into token usage and model performance.
-
-### Available Metrics
-
-The following metrics are automatically generated for each inference request:
-
-| Metric Name | Type | Unit | Description | Labels |
-|-------------|------|------|-------------|--------|
-| `llama_stack_prompt_tokens_total` | Counter | `tokens` | Number of tokens in the input prompt | `model_id`, `provider_id` |
-| `llama_stack_completion_tokens_total` | Counter | `tokens` | Number of tokens in the generated response | `model_id`, `provider_id` |
-| `llama_stack_tokens_total` | Counter | `tokens` | Total tokens used (prompt + completion) | `model_id`, `provider_id` |
-
-### Metric Generation Flow
-
-1. **Token Counting**: During inference operations (chat completion, completion, etc.), the system counts tokens in both input prompts and generated responses
-2. **Metric Construction**: For each request, `MetricEvent` objects are created with the token counts
-3. **Telemetry Logging**: Metrics are sent to the configured telemetry sinks
-4. **OpenTelemetry Export**: When OpenTelemetry is enabled, metrics are exposed as standard OpenTelemetry counters
-
-### Metric Aggregation Level
-
-All metrics are generated and aggregated at the **inference request level**. This means:
-
-- Each individual inference request generates its own set of metrics
-- Metrics are not pre-aggregated across multiple requests
-- Aggregation (sums, averages, etc.) can be performed by your observability tools (Prometheus, Grafana, etc.)
-- Each metric includes labels for `model_id` and `provider_id` to enable filtering and grouping
-
-### Example Metric Event
-
-```python
-MetricEvent(
-    trace_id="1234567890abcdef",
-    span_id="abcdef1234567890",
-    metric="total_tokens",
-    value=150,
-    timestamp=1703123456.789,
-    unit="tokens",
-    attributes={
-        "model_id": "meta-llama/Llama-3.2-3B-Instruct",
-        "provider_id": "tgi"
-    },
-)
+uv run opentelemetry-instrument llama stack run run.yaml
 ```
 
-## Telemetry Sinks
 
-Choose from multiple sink types based on your observability needs:
+### Known issues
 
-<Tabs>
-<TabItem value="opentelemetry" label="OpenTelemetry">
+Some database instrumentation libraries have a known bug where spans get wrapped twice, or do not get connected to a trace.
+To prevent this, you can disable database specific tracing, and rely just on the SQLAlchemy tracing. If you are using
+`sqlite3` as your database, for example, you can disable the additional tracing like this:
 
-Send events to an OpenTelemetry Collector for integration with observability platforms:
-
-**Use Cases:**
-- Visualizing traces in tools like Jaeger
-- Collecting metrics for Prometheus
-- Integration with enterprise observability stacks
-
-**Features:**
-- Standard OpenTelemetry format
-- Compatible with all OpenTelemetry collectors
-- Supports both traces and metrics
-
-</TabItem>
-<TabItem value="console" label="Console">
-
-Print events to the console for immediate debugging:
-
-**Use Cases:**
-- Development and testing
-- Quick debugging sessions
-- Simple logging without external tools
-
-**Features:**
-- Immediate output visibility
-- No setup required
-- Human-readable format
-
-</TabItem>
-</Tabs>
-
-## Configuration
-
-### Meta-Reference Provider
-
-Currently, only the meta-reference provider is implemented. It can be configured to send events to multiple sink types:
-
-```yaml
-telemetry:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      service_name: "llama-stack-service"
-      sinks: ['console', 'otel_trace', 'otel_metric']
-      otel_exporter_otlp_endpoint: "http://localhost:4318"
+```sh
+export OTEL_PYTHON_DISABLED_INSTRUMENTATIONS="sqlite3"
 ```
 
-### Environment Variables
-
-Configure telemetry behavior using environment variables:
-
-- **`OTEL_EXPORTER_OTLP_ENDPOINT`**: OpenTelemetry Collector endpoint (default: `http://localhost:4318`)
-- **`OTEL_SERVICE_NAME`**: Service name for telemetry (default: empty string)
-- **`TELEMETRY_SINKS`**: Comma-separated list of sinks (default: `[]`)
-
-### Quick Setup: Complete Telemetry Stack
-
-Use the automated setup script to launch the complete telemetry stack (Jaeger, OpenTelemetry Collector, Prometheus, and Grafana):
-
-```bash
-./scripts/telemetry/setup_telemetry.sh
-```
-
-This sets up:
-- **Jaeger UI**: http://localhost:16686 (traces visualization)
-- **Prometheus**: http://localhost:9090 (metrics)
-- **Grafana**: http://localhost:3000 (dashboards with auto-configured data sources)
-- **OTEL Collector**: http://localhost:4318 (OTLP endpoint)
-
-Once running, you can visualize traces by navigating to [Grafana](http://localhost:3000/) and login with login `admin` and password `admin`.
-
-## Querying Metrics
-
-When using the OpenTelemetry sink, metrics are exposed in standard format and can be queried through various tools:
-
-<Tabs>
-<TabItem value="prometheus" label="Prometheus Queries">
-
-Example Prometheus queries for analyzing token usage:
-
-```promql
-# Total tokens used across all models
-sum(llama_stack_tokens_total)
-
-# Tokens per model
-sum by (model_id) (llama_stack_tokens_total)
-
-# Average tokens per request over 5 minutes
-rate(llama_stack_tokens_total[5m])
-
-# Token usage by provider
-sum by (provider_id) (llama_stack_tokens_total)
-```
-
-</TabItem>
-<TabItem value="grafana" label="Grafana Dashboards">
-
-Create dashboards using Prometheus as a data source:
-
-- **Token Usage Over Time**: Line charts showing token consumption trends
-- **Model Performance**: Comparison of different models by token efficiency
-- **Provider Analysis**: Breakdown of usage across different providers
-- **Request Patterns**: Understanding peak usage times and patterns
-
-</TabItem>
-<TabItem value="otlp" label="OpenTelemetry Collector">
-
-Forward metrics to other observability systems:
-
-- Export to multiple backends simultaneously
-- Apply transformations and filtering
-- Integrate with existing monitoring infrastructure
-
-</TabItem>
-</Tabs>
-
-## Best Practices
-
-### 🔍 **Monitoring Strategy**
-- Use OpenTelemetry for production environments
-- Set up alerts on key metrics like token usage and error rates
-
-### 📊 **Metrics Analysis**
-- Track token usage trends to optimize costs
-- Monitor response times across different models
-- Analyze usage patterns to improve resource allocation
-
-### 🚨 **Alerting & Debugging**
-- Set up alerts for unusual token consumption spikes
-- Use trace data to debug performance issues
-- Monitor error rates and failure patterns
-
-### 🔧 **Configuration Management**
-- Use environment variables for flexible deployment
-- Ensure proper network access to OpenTelemetry collectors
-
 
 ## Related Resources
 
-- **[Agents](./agent)** - Monitoring agent execution with telemetry
-- **[Evaluations](./evals)** - Using telemetry data for performance evaluation
-- **[Getting Started Notebook](https://github.com/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb)** - Telemetry examples and queries
 - **[OpenTelemetry Documentation](https://opentelemetry.io/)** - Comprehensive observability framework
 - **[Jaeger Documentation](https://www.jaegertracing.io/)** - Distributed tracing visualization
diff --git a/docs/docs/concepts/apis/index.mdx b/docs/docs/concepts/apis/index.mdx
index 7d12478ed..c4b561606 100644
--- a/docs/docs/concepts/apis/index.mdx
+++ b/docs/docs/concepts/apis/index.mdx
@@ -17,7 +17,6 @@ A Llama Stack API is described as a collection of REST endpoints following OpenA
 - **Eval**: generate outputs (via Inference or Agents) and perform scoring
 - **VectorIO**: perform operations on vector stores, such as adding documents, searching, and deleting documents
 - **Files**: manage file uploads, storage, and retrieval
-- **Telemetry**: collect telemetry data from the system
 - **Post Training**: fine-tune a model
 - **Tool Runtime**: interact with various tools and protocols
 - **Responses**: generate responses from an LLM
diff --git a/docs/docs/distributions/k8s/stack-configmap.yaml b/docs/docs/distributions/k8s/stack-configmap.yaml
index 255e39ac2..d0e083d29 100644
--- a/docs/docs/distributions/k8s/stack-configmap.yaml
+++ b/docs/docs/distributions/k8s/stack-configmap.yaml
@@ -8,7 +8,6 @@ data:
     - inference
     - files
     - safety
-    - telemetry
     - tool_runtime
     - vector_io
     providers:
@@ -73,12 +72,6 @@ data:
             db: ${env.POSTGRES_DB:=llamastack}
             user: ${env.POSTGRES_USER:=llamastack}
             password: ${env.POSTGRES_PASSWORD:=llamastack}
-      telemetry:
-      - provider_id: meta-reference
-        provider_type: inline::meta-reference
-        config:
-          service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
-          sinks: ${env.TELEMETRY_SINKS:=console}
       tool_runtime:
       - provider_id: brave-search
         provider_type: remote::brave-search
diff --git a/docs/docs/distributions/k8s/stack_run_config.yaml b/docs/docs/distributions/k8s/stack_run_config.yaml
index 3dde74bbf..d8306613b 100644
--- a/docs/docs/distributions/k8s/stack_run_config.yaml
+++ b/docs/docs/distributions/k8s/stack_run_config.yaml
@@ -140,8 +140,6 @@ server:
   auth:
     provider_config:
       type: github_token
-telemetry:
-  enabled: true
 vector_stores:
   default_provider_id: chromadb
   default_embedding_model:
diff --git a/docs/docs/distributions/self_hosted_distro/starter.md b/docs/docs/distributions/self_hosted_distro/starter.md
index 84c35f3d3..37599a8dd 100644
--- a/docs/docs/distributions/self_hosted_distro/starter.md
+++ b/docs/docs/distributions/self_hosted_distro/starter.md
@@ -116,10 +116,6 @@ The following environment variables can be configured:
 - `BRAVE_SEARCH_API_KEY`: Brave Search API key
 - `TAVILY_SEARCH_API_KEY`: Tavily Search API key
 
-### Telemetry Configuration
-- `OTEL_SERVICE_NAME`: OpenTelemetry service name
-- `OTEL_EXPORTER_OTLP_ENDPOINT`: OpenTelemetry collector endpoint URL
-
 ## Enabling Providers
 
 You can enable specific providers by setting appropriate environment variables. For example,
@@ -265,7 +261,7 @@ The starter distribution uses SQLite for local storage of various components:
 2. **Flexible Configuration**: Easy to enable/disable providers based on your needs
 3. **No Local GPU Required**: Most providers are cloud-based, making it accessible to developers without high-end hardware
 4. **Easy Migration**: Start with hosted providers and gradually move to local ones as needed
-5. **Production Ready**: Includes safety, evaluation, and telemetry components
+5. **Production Ready**: Includes safety and evaluation
 6. **Tool Integration**: Comes with web search, RAG, and model context protocol tools
 
 The starter distribution is ideal for developers who want to experiment with different AI providers, build prototypes quickly, or create applications that can work with multiple AI backends.
diff --git a/docs/docs/references/python_sdk_reference/index.md b/docs/docs/references/python_sdk_reference/index.md
index 686567458..532341a4d 100644
--- a/docs/docs/references/python_sdk_reference/index.md
+++ b/docs/docs/references/python_sdk_reference/index.md
@@ -360,32 +360,6 @@ Methods:
 
 - <code title="post /v1/synthetic-data-generation/generate">client.synthetic_data_generation.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/synthetic_data_generation.py">generate</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/synthetic_data_generation_generate_params.py">params</a>) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/synthetic_data_generation_response.py">SyntheticDataGenerationResponse</a></code>
 
-## Telemetry
-
-Types:
-
-```python
-from llama_stack_client.types import (
-    QuerySpansResponse,
-    SpanWithStatus,
-    Trace,
-    TelemetryGetSpanResponse,
-    TelemetryGetSpanTreeResponse,
-    TelemetryQuerySpansResponse,
-    TelemetryQueryTracesResponse,
-)
-```
-
-Methods:
-
-- <code title="get /v1/telemetry/traces/{trace_id}/spans/{span_id}">client.telemetry.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/telemetry.py">get_span</a>(span_id, \*, trace_id) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/telemetry_get_span_response.py">TelemetryGetSpanResponse</a></code>
-- <code title="get /v1/telemetry/spans/{span_id}/tree">client.telemetry.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/telemetry.py">get_span_tree</a>(span_id, \*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/telemetry_get_span_tree_params.py">params</a>) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/telemetry_get_span_tree_response.py">TelemetryGetSpanTreeResponse</a></code>
-- <code title="get /v1/telemetry/traces/{trace_id}">client.telemetry.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/telemetry.py">get_trace</a>(trace_id) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/trace.py">Trace</a></code>
-- <code title="post /v1/telemetry/events">client.telemetry.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/telemetry.py">log_event</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/telemetry_log_event_params.py">params</a>) -> None</code>
-- <code title="get /v1/telemetry/spans">client.telemetry.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/telemetry.py">query_spans</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/telemetry_query_spans_params.py">params</a>) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/telemetry_query_spans_response.py">TelemetryQuerySpansResponse</a></code>
-- <code title="get /v1/telemetry/traces">client.telemetry.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/telemetry.py">query_traces</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/telemetry_query_traces_params.py">params</a>) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/telemetry_query_traces_response.py">TelemetryQueryTracesResponse</a></code>
-- <code title="post /v1/telemetry/spans/export">client.telemetry.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/telemetry.py">save_spans_to_dataset</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/telemetry_save_spans_to_dataset_params.py">params</a>) -> None</code>
-
 ## Datasetio
 
 Types:
diff --git a/docs/src/pages/index.js b/docs/src/pages/index.js
index f460d6f27..7489682aa 100644
--- a/docs/src/pages/index.js
+++ b/docs/src/pages/index.js
@@ -13,7 +13,7 @@ function HomepageHeader() {
         <div className={styles.heroContent}>
           <h1 className={styles.heroTitle}>Build AI Applications with Llama Stack</h1>
           <p className={styles.heroSubtitle}>
-            Unified APIs for Inference, RAG, Agents, Tools, Safety, and Telemetry
+            Unified APIs for Inference, RAG, Agents, Tools, and Safety
           </p>
           <div className={styles.buttons}>
             <Link
@@ -206,7 +206,7 @@ export default function Home() {
   return (
     <Layout
       title="Build AI Applications"
-      description="The open-source framework for building generative AI applications with unified APIs for Inference, RAG, Agents, Tools, Safety, and Telemetry.">
+      description="The open-source framework for building generative AI applications with unified APIs for Inference, RAG, Agents, Tools, Safety, and Evals.">
       <HomepageHeader />
       <main>
         <QuickStart />
diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml
index cf9bd14c4..70ecf9b03 100644
--- a/docs/static/deprecated-llama-stack-spec.yaml
+++ b/docs/static/deprecated-llama-stack-spec.yaml
@@ -9023,227 +9023,6 @@ components:
       - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
         title: OpenAIResponseContentPartReasoningText
       title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
-    SpanEndPayload:
-      description: Payload for a span end event.
-      properties:
-        type:
-          const: span_end
-          default: span_end
-          title: Type
-          type: string
-        status:
-          $ref: '#/components/schemas/SpanStatus'
-      required:
-      - status
-      title: SpanEndPayload
-      type: object
-    SpanStartPayload:
-      description: Payload for a span start event.
-      properties:
-        type:
-          const: span_start
-          default: span_start
-          title: Type
-          type: string
-        name:
-          title: Name
-          type: string
-        parent_span_id:
-          anyOf:
-          - type: string
-          - type: 'null'
-          nullable: true
-      required:
-      - name
-      title: SpanStartPayload
-      type: object
-    SpanStatus:
-      description: The status of a span indicating whether it completed successfully or with an error.
-      enum:
-      - ok
-      - error
-      title: SpanStatus
-      type: string
-    StructuredLogPayload:
-      discriminator:
-        mapping:
-          span_end: '#/components/schemas/SpanEndPayload'
-          span_start: '#/components/schemas/SpanStartPayload'
-        propertyName: type
-      oneOf:
-      - $ref: '#/components/schemas/SpanStartPayload'
-        title: SpanStartPayload
-      - $ref: '#/components/schemas/SpanEndPayload'
-        title: SpanEndPayload
-      title: SpanStartPayload | SpanEndPayload
-    LogSeverity:
-      description: The severity level of a log message.
-      enum:
-      - verbose
-      - debug
-      - info
-      - warn
-      - error
-      - critical
-      title: LogSeverity
-      type: string
-    MetricEvent:
-      description: A metric event containing a measured value.
-      properties:
-        trace_id:
-          title: Trace Id
-          type: string
-        span_id:
-          title: Span Id
-          type: string
-        timestamp:
-          format: date-time
-          title: Timestamp
-          type: string
-        attributes:
-          anyOf:
-          - additionalProperties:
-              anyOf:
-              - type: string
-              - type: integer
-              - type: number
-              - type: boolean
-              - type: 'null'
-              title: string | ... (4 variants)
-            type: object
-          - type: 'null'
-        type:
-          const: metric
-          default: metric
-          title: Type
-          type: string
-        metric:
-          title: Metric
-          type: string
-        value:
-          anyOf:
-          - type: integer
-          - type: number
-          title: integer | number
-        unit:
-          title: Unit
-          type: string
-      required:
-      - trace_id
-      - span_id
-      - timestamp
-      - metric
-      - value
-      - unit
-      title: MetricEvent
-      type: object
-    StructuredLogEvent:
-      description: A structured log event containing typed payload data.
-      properties:
-        trace_id:
-          title: Trace Id
-          type: string
-        span_id:
-          title: Span Id
-          type: string
-        timestamp:
-          format: date-time
-          title: Timestamp
-          type: string
-        attributes:
-          anyOf:
-          - additionalProperties:
-              anyOf:
-              - type: string
-              - type: integer
-              - type: number
-              - type: boolean
-              - type: 'null'
-              title: string | ... (4 variants)
-            type: object
-          - type: 'null'
-        type:
-          const: structured_log
-          default: structured_log
-          title: Type
-          type: string
-        payload:
-          discriminator:
-            mapping:
-              span_end: '#/components/schemas/SpanEndPayload'
-              span_start: '#/components/schemas/SpanStartPayload'
-            propertyName: type
-          oneOf:
-          - $ref: '#/components/schemas/SpanStartPayload'
-            title: SpanStartPayload
-          - $ref: '#/components/schemas/SpanEndPayload'
-            title: SpanEndPayload
-          title: SpanStartPayload | SpanEndPayload
-      required:
-      - trace_id
-      - span_id
-      - timestamp
-      - payload
-      title: StructuredLogEvent
-      type: object
-    UnstructuredLogEvent:
-      description: An unstructured log event containing a simple text message.
-      properties:
-        trace_id:
-          title: Trace Id
-          type: string
-        span_id:
-          title: Span Id
-          type: string
-        timestamp:
-          format: date-time
-          title: Timestamp
-          type: string
-        attributes:
-          anyOf:
-          - additionalProperties:
-              anyOf:
-              - type: string
-              - type: integer
-              - type: number
-              - type: boolean
-              - type: 'null'
-              title: string | ... (4 variants)
-            type: object
-          - type: 'null'
-        type:
-          const: unstructured_log
-          default: unstructured_log
-          title: Type
-          type: string
-        message:
-          title: Message
-          type: string
-        severity:
-          $ref: '#/components/schemas/LogSeverity'
-      required:
-      - trace_id
-      - span_id
-      - timestamp
-      - message
-      - severity
-      title: UnstructuredLogEvent
-      type: object
-    Event:
-      discriminator:
-        mapping:
-          metric: '#/components/schemas/MetricEvent'
-          structured_log: '#/components/schemas/StructuredLogEvent'
-          unstructured_log: '#/components/schemas/UnstructuredLogEvent'
-        propertyName: type
-      oneOf:
-      - $ref: '#/components/schemas/UnstructuredLogEvent'
-        title: UnstructuredLogEvent
-      - $ref: '#/components/schemas/MetricEvent'
-        title: MetricEvent
-      - $ref: '#/components/schemas/StructuredLogEvent'
-        title: StructuredLogEvent
-      title: UnstructuredLogEvent | MetricEvent | StructuredLogEvent
     MetricInResponse:
       description: A metric value included in API responses.
       properties:
@@ -10068,236 +9847,6 @@ components:
       - logger_config
       title: PostTrainingRLHFRequest
       type: object
-    Span:
-      description: A span representing a single operation within a trace.
-      properties:
-        span_id:
-          title: Span Id
-          type: string
-        trace_id:
-          title: Trace Id
-          type: string
-        parent_span_id:
-          anyOf:
-          - type: string
-          - type: 'null'
-          nullable: true
-        name:
-          title: Name
-          type: string
-        start_time:
-          format: date-time
-          title: Start Time
-          type: string
-        end_time:
-          anyOf:
-          - format: date-time
-            type: string
-          - type: 'null'
-          nullable: true
-        attributes:
-          anyOf:
-          - additionalProperties: true
-            type: object
-          - type: 'null'
-      required:
-      - span_id
-      - trace_id
-      - name
-      - start_time
-      title: Span
-      type: object
-    Trace:
-      description: A trace representing the complete execution path of a request across multiple operations.
-      properties:
-        trace_id:
-          title: Trace Id
-          type: string
-        root_span_id:
-          title: Root Span Id
-          type: string
-        start_time:
-          format: date-time
-          title: Start Time
-          type: string
-        end_time:
-          anyOf:
-          - format: date-time
-            type: string
-          - type: 'null'
-          nullable: true
-      required:
-      - trace_id
-      - root_span_id
-      - start_time
-      title: Trace
-      type: object
-    EventType:
-      description: The type of telemetry event being logged.
-      enum:
-      - unstructured_log
-      - structured_log
-      - metric
-      title: EventType
-      type: string
-    StructuredLogType:
-      description: The type of structured log event payload.
-      enum:
-      - span_start
-      - span_end
-      title: StructuredLogType
-      type: string
-    EvalTrace:
-      description: A trace record for evaluation purposes.
-      properties:
-        session_id:
-          title: Session Id
-          type: string
-        step:
-          title: Step
-          type: string
-        input:
-          title: Input
-          type: string
-        output:
-          title: Output
-          type: string
-        expected_output:
-          title: Expected Output
-          type: string
-      required:
-      - session_id
-      - step
-      - input
-      - output
-      - expected_output
-      title: EvalTrace
-      type: object
-    SpanWithStatus:
-      description: A span that includes status information.
-      properties:
-        span_id:
-          title: Span Id
-          type: string
-        trace_id:
-          title: Trace Id
-          type: string
-        parent_span_id:
-          anyOf:
-          - type: string
-          - type: 'null'
-          nullable: true
-        name:
-          title: Name
-          type: string
-        start_time:
-          format: date-time
-          title: Start Time
-          type: string
-        end_time:
-          anyOf:
-          - format: date-time
-            type: string
-          - type: 'null'
-          nullable: true
-        attributes:
-          anyOf:
-          - additionalProperties: true
-            type: object
-          - type: 'null'
-        status:
-          anyOf:
-          - $ref: '#/components/schemas/SpanStatus'
-            title: SpanStatus
-          - type: 'null'
-          nullable: true
-          title: SpanStatus
-      required:
-      - span_id
-      - trace_id
-      - name
-      - start_time
-      title: SpanWithStatus
-      type: object
-    QueryConditionOp:
-      description: Comparison operators for query conditions.
-      enum:
-      - eq
-      - ne
-      - gt
-      - lt
-      title: QueryConditionOp
-      type: string
-    QueryCondition:
-      description: A condition for filtering query results.
-      properties:
-        key:
-          title: Key
-          type: string
-        op:
-          $ref: '#/components/schemas/QueryConditionOp'
-        value:
-          title: Value
-      required:
-      - key
-      - op
-      - value
-      title: QueryCondition
-      type: object
-    MetricLabel:
-      description: A label associated with a metric.
-      properties:
-        name:
-          title: Name
-          type: string
-        value:
-          title: Value
-          type: string
-      required:
-      - name
-      - value
-      title: MetricLabel
-      type: object
-    MetricDataPoint:
-      description: A single data point in a metric time series.
-      properties:
-        timestamp:
-          title: Timestamp
-          type: integer
-        value:
-          title: Value
-          type: number
-        unit:
-          title: Unit
-          type: string
-      required:
-      - timestamp
-      - value
-      - unit
-      title: MetricDataPoint
-      type: object
-    MetricSeries:
-      description: A time series of metric data points.
-      properties:
-        metric:
-          title: Metric
-          type: string
-        labels:
-          items:
-            $ref: '#/components/schemas/MetricLabel'
-          title: Labels
-          type: array
-        values:
-          items:
-            $ref: '#/components/schemas/MetricDataPoint'
-          title: Values
-          type: array
-      required:
-      - metric
-      - labels
-      - values
-      title: MetricSeries
-      type: object
   responses:
     BadRequest400:
       description: The request was invalid or malformed
diff --git a/docs/static/experimental-llama-stack-spec.yaml b/docs/static/experimental-llama-stack-spec.yaml
index 18ce75562..18cf3415f 100644
--- a/docs/static/experimental-llama-stack-spec.yaml
+++ b/docs/static/experimental-llama-stack-spec.yaml
@@ -7952,227 +7952,6 @@ components:
       - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
         title: OpenAIResponseContentPartReasoningText
       title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
-    SpanEndPayload:
-      description: Payload for a span end event.
-      properties:
-        type:
-          const: span_end
-          default: span_end
-          title: Type
-          type: string
-        status:
-          $ref: '#/components/schemas/SpanStatus'
-      required:
-      - status
-      title: SpanEndPayload
-      type: object
-    SpanStartPayload:
-      description: Payload for a span start event.
-      properties:
-        type:
-          const: span_start
-          default: span_start
-          title: Type
-          type: string
-        name:
-          title: Name
-          type: string
-        parent_span_id:
-          anyOf:
-          - type: string
-          - type: 'null'
-          nullable: true
-      required:
-      - name
-      title: SpanStartPayload
-      type: object
-    SpanStatus:
-      description: The status of a span indicating whether it completed successfully or with an error.
-      enum:
-      - ok
-      - error
-      title: SpanStatus
-      type: string
-    StructuredLogPayload:
-      discriminator:
-        mapping:
-          span_end: '#/components/schemas/SpanEndPayload'
-          span_start: '#/components/schemas/SpanStartPayload'
-        propertyName: type
-      oneOf:
-      - $ref: '#/components/schemas/SpanStartPayload'
-        title: SpanStartPayload
-      - $ref: '#/components/schemas/SpanEndPayload'
-        title: SpanEndPayload
-      title: SpanStartPayload | SpanEndPayload
-    LogSeverity:
-      description: The severity level of a log message.
-      enum:
-      - verbose
-      - debug
-      - info
-      - warn
-      - error
-      - critical
-      title: LogSeverity
-      type: string
-    MetricEvent:
-      description: A metric event containing a measured value.
-      properties:
-        trace_id:
-          title: Trace Id
-          type: string
-        span_id:
-          title: Span Id
-          type: string
-        timestamp:
-          format: date-time
-          title: Timestamp
-          type: string
-        attributes:
-          anyOf:
-          - additionalProperties:
-              anyOf:
-              - type: string
-              - type: integer
-              - type: number
-              - type: boolean
-              - type: 'null'
-              title: string | ... (4 variants)
-            type: object
-          - type: 'null'
-        type:
-          const: metric
-          default: metric
-          title: Type
-          type: string
-        metric:
-          title: Metric
-          type: string
-        value:
-          anyOf:
-          - type: integer
-          - type: number
-          title: integer | number
-        unit:
-          title: Unit
-          type: string
-      required:
-      - trace_id
-      - span_id
-      - timestamp
-      - metric
-      - value
-      - unit
-      title: MetricEvent
-      type: object
-    StructuredLogEvent:
-      description: A structured log event containing typed payload data.
-      properties:
-        trace_id:
-          title: Trace Id
-          type: string
-        span_id:
-          title: Span Id
-          type: string
-        timestamp:
-          format: date-time
-          title: Timestamp
-          type: string
-        attributes:
-          anyOf:
-          - additionalProperties:
-              anyOf:
-              - type: string
-              - type: integer
-              - type: number
-              - type: boolean
-              - type: 'null'
-              title: string | ... (4 variants)
-            type: object
-          - type: 'null'
-        type:
-          const: structured_log
-          default: structured_log
-          title: Type
-          type: string
-        payload:
-          discriminator:
-            mapping:
-              span_end: '#/components/schemas/SpanEndPayload'
-              span_start: '#/components/schemas/SpanStartPayload'
-            propertyName: type
-          oneOf:
-          - $ref: '#/components/schemas/SpanStartPayload'
-            title: SpanStartPayload
-          - $ref: '#/components/schemas/SpanEndPayload'
-            title: SpanEndPayload
-          title: SpanStartPayload | SpanEndPayload
-      required:
-      - trace_id
-      - span_id
-      - timestamp
-      - payload
-      title: StructuredLogEvent
-      type: object
-    UnstructuredLogEvent:
-      description: An unstructured log event containing a simple text message.
-      properties:
-        trace_id:
-          title: Trace Id
-          type: string
-        span_id:
-          title: Span Id
-          type: string
-        timestamp:
-          format: date-time
-          title: Timestamp
-          type: string
-        attributes:
-          anyOf:
-          - additionalProperties:
-              anyOf:
-              - type: string
-              - type: integer
-              - type: number
-              - type: boolean
-              - type: 'null'
-              title: string | ... (4 variants)
-            type: object
-          - type: 'null'
-        type:
-          const: unstructured_log
-          default: unstructured_log
-          title: Type
-          type: string
-        message:
-          title: Message
-          type: string
-        severity:
-          $ref: '#/components/schemas/LogSeverity'
-      required:
-      - trace_id
-      - span_id
-      - timestamp
-      - message
-      - severity
-      title: UnstructuredLogEvent
-      type: object
-    Event:
-      discriminator:
-        mapping:
-          metric: '#/components/schemas/MetricEvent'
-          structured_log: '#/components/schemas/StructuredLogEvent'
-          unstructured_log: '#/components/schemas/UnstructuredLogEvent'
-        propertyName: type
-      oneOf:
-      - $ref: '#/components/schemas/UnstructuredLogEvent'
-        title: UnstructuredLogEvent
-      - $ref: '#/components/schemas/MetricEvent'
-        title: MetricEvent
-      - $ref: '#/components/schemas/StructuredLogEvent'
-        title: StructuredLogEvent
-      title: UnstructuredLogEvent | MetricEvent | StructuredLogEvent
     MetricInResponse:
       description: A metric value included in API responses.
       properties:
@@ -8997,236 +8776,6 @@ components:
       - logger_config
       title: PostTrainingRLHFRequest
       type: object
-    Span:
-      description: A span representing a single operation within a trace.
-      properties:
-        span_id:
-          title: Span Id
-          type: string
-        trace_id:
-          title: Trace Id
-          type: string
-        parent_span_id:
-          anyOf:
-          - type: string
-          - type: 'null'
-          nullable: true
-        name:
-          title: Name
-          type: string
-        start_time:
-          format: date-time
-          title: Start Time
-          type: string
-        end_time:
-          anyOf:
-          - format: date-time
-            type: string
-          - type: 'null'
-          nullable: true
-        attributes:
-          anyOf:
-          - additionalProperties: true
-            type: object
-          - type: 'null'
-      required:
-      - span_id
-      - trace_id
-      - name
-      - start_time
-      title: Span
-      type: object
-    Trace:
-      description: A trace representing the complete execution path of a request across multiple operations.
-      properties:
-        trace_id:
-          title: Trace Id
-          type: string
-        root_span_id:
-          title: Root Span Id
-          type: string
-        start_time:
-          format: date-time
-          title: Start Time
-          type: string
-        end_time:
-          anyOf:
-          - format: date-time
-            type: string
-          - type: 'null'
-          nullable: true
-      required:
-      - trace_id
-      - root_span_id
-      - start_time
-      title: Trace
-      type: object
-    EventType:
-      description: The type of telemetry event being logged.
-      enum:
-      - unstructured_log
-      - structured_log
-      - metric
-      title: EventType
-      type: string
-    StructuredLogType:
-      description: The type of structured log event payload.
-      enum:
-      - span_start
-      - span_end
-      title: StructuredLogType
-      type: string
-    EvalTrace:
-      description: A trace record for evaluation purposes.
-      properties:
-        session_id:
-          title: Session Id
-          type: string
-        step:
-          title: Step
-          type: string
-        input:
-          title: Input
-          type: string
-        output:
-          title: Output
-          type: string
-        expected_output:
-          title: Expected Output
-          type: string
-      required:
-      - session_id
-      - step
-      - input
-      - output
-      - expected_output
-      title: EvalTrace
-      type: object
-    SpanWithStatus:
-      description: A span that includes status information.
-      properties:
-        span_id:
-          title: Span Id
-          type: string
-        trace_id:
-          title: Trace Id
-          type: string
-        parent_span_id:
-          anyOf:
-          - type: string
-          - type: 'null'
-          nullable: true
-        name:
-          title: Name
-          type: string
-        start_time:
-          format: date-time
-          title: Start Time
-          type: string
-        end_time:
-          anyOf:
-          - format: date-time
-            type: string
-          - type: 'null'
-          nullable: true
-        attributes:
-          anyOf:
-          - additionalProperties: true
-            type: object
-          - type: 'null'
-        status:
-          anyOf:
-          - $ref: '#/components/schemas/SpanStatus'
-            title: SpanStatus
-          - type: 'null'
-          nullable: true
-          title: SpanStatus
-      required:
-      - span_id
-      - trace_id
-      - name
-      - start_time
-      title: SpanWithStatus
-      type: object
-    QueryConditionOp:
-      description: Comparison operators for query conditions.
-      enum:
-      - eq
-      - ne
-      - gt
-      - lt
-      title: QueryConditionOp
-      type: string
-    QueryCondition:
-      description: A condition for filtering query results.
-      properties:
-        key:
-          title: Key
-          type: string
-        op:
-          $ref: '#/components/schemas/QueryConditionOp'
-        value:
-          title: Value
-      required:
-      - key
-      - op
-      - value
-      title: QueryCondition
-      type: object
-    MetricLabel:
-      description: A label associated with a metric.
-      properties:
-        name:
-          title: Name
-          type: string
-        value:
-          title: Value
-          type: string
-      required:
-      - name
-      - value
-      title: MetricLabel
-      type: object
-    MetricDataPoint:
-      description: A single data point in a metric time series.
-      properties:
-        timestamp:
-          title: Timestamp
-          type: integer
-        value:
-          title: Value
-          type: number
-        unit:
-          title: Unit
-          type: string
-      required:
-      - timestamp
-      - value
-      - unit
-      title: MetricDataPoint
-      type: object
-    MetricSeries:
-      description: A time series of metric data points.
-      properties:
-        metric:
-          title: Metric
-          type: string
-        labels:
-          items:
-            $ref: '#/components/schemas/MetricLabel'
-          title: Labels
-          type: array
-        values:
-          items:
-            $ref: '#/components/schemas/MetricDataPoint'
-          title: Values
-          type: array
-      required:
-      - metric
-      - labels
-      - values
-      title: MetricSeries
-      type: object
   responses:
     BadRequest400:
       description: The request was invalid or malformed
diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
index 9f7b2ed64..19239e722 100644
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@@ -10850,227 +10850,6 @@ components:
       - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
         title: OpenAIResponseContentPartReasoningText
       title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
-    SpanEndPayload:
-      description: Payload for a span end event.
-      properties:
-        type:
-          const: span_end
-          default: span_end
-          title: Type
-          type: string
-        status:
-          $ref: '#/components/schemas/SpanStatus'
-      required:
-      - status
-      title: SpanEndPayload
-      type: object
-    SpanStartPayload:
-      description: Payload for a span start event.
-      properties:
-        type:
-          const: span_start
-          default: span_start
-          title: Type
-          type: string
-        name:
-          title: Name
-          type: string
-        parent_span_id:
-          anyOf:
-          - type: string
-          - type: 'null'
-          nullable: true
-      required:
-      - name
-      title: SpanStartPayload
-      type: object
-    SpanStatus:
-      description: The status of a span indicating whether it completed successfully or with an error.
-      enum:
-      - ok
-      - error
-      title: SpanStatus
-      type: string
-    StructuredLogPayload:
-      discriminator:
-        mapping:
-          span_end: '#/components/schemas/SpanEndPayload'
-          span_start: '#/components/schemas/SpanStartPayload'
-        propertyName: type
-      oneOf:
-      - $ref: '#/components/schemas/SpanStartPayload'
-        title: SpanStartPayload
-      - $ref: '#/components/schemas/SpanEndPayload'
-        title: SpanEndPayload
-      title: SpanStartPayload | SpanEndPayload
-    LogSeverity:
-      description: The severity level of a log message.
-      enum:
-      - verbose
-      - debug
-      - info
-      - warn
-      - error
-      - critical
-      title: LogSeverity
-      type: string
-    MetricEvent:
-      description: A metric event containing a measured value.
-      properties:
-        trace_id:
-          title: Trace Id
-          type: string
-        span_id:
-          title: Span Id
-          type: string
-        timestamp:
-          format: date-time
-          title: Timestamp
-          type: string
-        attributes:
-          anyOf:
-          - additionalProperties:
-              anyOf:
-              - type: string
-              - type: integer
-              - type: number
-              - type: boolean
-              - type: 'null'
-              title: string | ... (4 variants)
-            type: object
-          - type: 'null'
-        type:
-          const: metric
-          default: metric
-          title: Type
-          type: string
-        metric:
-          title: Metric
-          type: string
-        value:
-          anyOf:
-          - type: integer
-          - type: number
-          title: integer | number
-        unit:
-          title: Unit
-          type: string
-      required:
-      - trace_id
-      - span_id
-      - timestamp
-      - metric
-      - value
-      - unit
-      title: MetricEvent
-      type: object
-    StructuredLogEvent:
-      description: A structured log event containing typed payload data.
-      properties:
-        trace_id:
-          title: Trace Id
-          type: string
-        span_id:
-          title: Span Id
-          type: string
-        timestamp:
-          format: date-time
-          title: Timestamp
-          type: string
-        attributes:
-          anyOf:
-          - additionalProperties:
-              anyOf:
-              - type: string
-              - type: integer
-              - type: number
-              - type: boolean
-              - type: 'null'
-              title: string | ... (4 variants)
-            type: object
-          - type: 'null'
-        type:
-          const: structured_log
-          default: structured_log
-          title: Type
-          type: string
-        payload:
-          discriminator:
-            mapping:
-              span_end: '#/components/schemas/SpanEndPayload'
-              span_start: '#/components/schemas/SpanStartPayload'
-            propertyName: type
-          oneOf:
-          - $ref: '#/components/schemas/SpanStartPayload'
-            title: SpanStartPayload
-          - $ref: '#/components/schemas/SpanEndPayload'
-            title: SpanEndPayload
-          title: SpanStartPayload | SpanEndPayload
-      required:
-      - trace_id
-      - span_id
-      - timestamp
-      - payload
-      title: StructuredLogEvent
-      type: object
-    UnstructuredLogEvent:
-      description: An unstructured log event containing a simple text message.
-      properties:
-        trace_id:
-          title: Trace Id
-          type: string
-        span_id:
-          title: Span Id
-          type: string
-        timestamp:
-          format: date-time
-          title: Timestamp
-          type: string
-        attributes:
-          anyOf:
-          - additionalProperties:
-              anyOf:
-              - type: string
-              - type: integer
-              - type: number
-              - type: boolean
-              - type: 'null'
-              title: string | ... (4 variants)
-            type: object
-          - type: 'null'
-        type:
-          const: unstructured_log
-          default: unstructured_log
-          title: Type
-          type: string
-        message:
-          title: Message
-          type: string
-        severity:
-          $ref: '#/components/schemas/LogSeverity'
-      required:
-      - trace_id
-      - span_id
-      - timestamp
-      - message
-      - severity
-      title: UnstructuredLogEvent
-      type: object
-    Event:
-      discriminator:
-        mapping:
-          metric: '#/components/schemas/MetricEvent'
-          structured_log: '#/components/schemas/StructuredLogEvent'
-          unstructured_log: '#/components/schemas/UnstructuredLogEvent'
-        propertyName: type
-      oneOf:
-      - $ref: '#/components/schemas/UnstructuredLogEvent'
-        title: UnstructuredLogEvent
-      - $ref: '#/components/schemas/MetricEvent'
-        title: MetricEvent
-      - $ref: '#/components/schemas/StructuredLogEvent'
-        title: StructuredLogEvent
-      title: UnstructuredLogEvent | MetricEvent | StructuredLogEvent
     MetricInResponse:
       description: A metric value included in API responses.
       properties:
@@ -11892,236 +11671,6 @@ components:
       - logger_config
       title: PostTrainingRLHFRequest
       type: object
-    Span:
-      description: A span representing a single operation within a trace.
-      properties:
-        span_id:
-          title: Span Id
-          type: string
-        trace_id:
-          title: Trace Id
-          type: string
-        parent_span_id:
-          anyOf:
-          - type: string
-          - type: 'null'
-          nullable: true
-        name:
-          title: Name
-          type: string
-        start_time:
-          format: date-time
-          title: Start Time
-          type: string
-        end_time:
-          anyOf:
-          - format: date-time
-            type: string
-          - type: 'null'
-          nullable: true
-        attributes:
-          anyOf:
-          - additionalProperties: true
-            type: object
-          - type: 'null'
-      required:
-      - span_id
-      - trace_id
-      - name
-      - start_time
-      title: Span
-      type: object
-    Trace:
-      description: A trace representing the complete execution path of a request across multiple operations.
-      properties:
-        trace_id:
-          title: Trace Id
-          type: string
-        root_span_id:
-          title: Root Span Id
-          type: string
-        start_time:
-          format: date-time
-          title: Start Time
-          type: string
-        end_time:
-          anyOf:
-          - format: date-time
-            type: string
-          - type: 'null'
-          nullable: true
-      required:
-      - trace_id
-      - root_span_id
-      - start_time
-      title: Trace
-      type: object
-    EventType:
-      description: The type of telemetry event being logged.
-      enum:
-      - unstructured_log
-      - structured_log
-      - metric
-      title: EventType
-      type: string
-    StructuredLogType:
-      description: The type of structured log event payload.
-      enum:
-      - span_start
-      - span_end
-      title: StructuredLogType
-      type: string
-    EvalTrace:
-      description: A trace record for evaluation purposes.
-      properties:
-        session_id:
-          title: Session Id
-          type: string
-        step:
-          title: Step
-          type: string
-        input:
-          title: Input
-          type: string
-        output:
-          title: Output
-          type: string
-        expected_output:
-          title: Expected Output
-          type: string
-      required:
-      - session_id
-      - step
-      - input
-      - output
-      - expected_output
-      title: EvalTrace
-      type: object
-    SpanWithStatus:
-      description: A span that includes status information.
-      properties:
-        span_id:
-          title: Span Id
-          type: string
-        trace_id:
-          title: Trace Id
-          type: string
-        parent_span_id:
-          anyOf:
-          - type: string
-          - type: 'null'
-          nullable: true
-        name:
-          title: Name
-          type: string
-        start_time:
-          format: date-time
-          title: Start Time
-          type: string
-        end_time:
-          anyOf:
-          - format: date-time
-            type: string
-          - type: 'null'
-          nullable: true
-        attributes:
-          anyOf:
-          - additionalProperties: true
-            type: object
-          - type: 'null'
-        status:
-          anyOf:
-          - $ref: '#/components/schemas/SpanStatus'
-            title: SpanStatus
-          - type: 'null'
-          nullable: true
-          title: SpanStatus
-      required:
-      - span_id
-      - trace_id
-      - name
-      - start_time
-      title: SpanWithStatus
-      type: object
-    QueryConditionOp:
-      description: Comparison operators for query conditions.
-      enum:
-      - eq
-      - ne
-      - gt
-      - lt
-      title: QueryConditionOp
-      type: string
-    QueryCondition:
-      description: A condition for filtering query results.
-      properties:
-        key:
-          title: Key
-          type: string
-        op:
-          $ref: '#/components/schemas/QueryConditionOp'
-        value:
-          title: Value
-      required:
-      - key
-      - op
-      - value
-      title: QueryCondition
-      type: object
-    MetricLabel:
-      description: A label associated with a metric.
-      properties:
-        name:
-          title: Name
-          type: string
-        value:
-          title: Value
-          type: string
-      required:
-      - name
-      - value
-      title: MetricLabel
-      type: object
-    MetricDataPoint:
-      description: A single data point in a metric time series.
-      properties:
-        timestamp:
-          title: Timestamp
-          type: integer
-        value:
-          title: Value
-          type: number
-        unit:
-          title: Unit
-          type: string
-      required:
-      - timestamp
-      - value
-      - unit
-      title: MetricDataPoint
-      type: object
-    MetricSeries:
-      description: A time series of metric data points.
-      properties:
-        metric:
-          title: Metric
-          type: string
-        labels:
-          items:
-            $ref: '#/components/schemas/MetricLabel'
-          title: Labels
-          type: array
-        values:
-          items:
-            $ref: '#/components/schemas/MetricDataPoint'
-          title: Values
-          type: array
-      required:
-      - metric
-      - labels
-      - values
-      title: MetricSeries
-      type: object
   responses:
     BadRequest400:
       description: The request was invalid or malformed
diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml
index 9269b7e39..9703f94b5 100644
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@@ -12180,227 +12180,6 @@ components:
       - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
         title: OpenAIResponseContentPartReasoningText
       title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
-    SpanEndPayload:
-      description: Payload for a span end event.
-      properties:
-        type:
-          const: span_end
-          default: span_end
-          title: Type
-          type: string
-        status:
-          $ref: '#/components/schemas/SpanStatus'
-      required:
-      - status
-      title: SpanEndPayload
-      type: object
-    SpanStartPayload:
-      description: Payload for a span start event.
-      properties:
-        type:
-          const: span_start
-          default: span_start
-          title: Type
-          type: string
-        name:
-          title: Name
-          type: string
-        parent_span_id:
-          anyOf:
-          - type: string
-          - type: 'null'
-          nullable: true
-      required:
-      - name
-      title: SpanStartPayload
-      type: object
-    SpanStatus:
-      description: The status of a span indicating whether it completed successfully or with an error.
-      enum:
-      - ok
-      - error
-      title: SpanStatus
-      type: string
-    StructuredLogPayload:
-      discriminator:
-        mapping:
-          span_end: '#/components/schemas/SpanEndPayload'
-          span_start: '#/components/schemas/SpanStartPayload'
-        propertyName: type
-      oneOf:
-      - $ref: '#/components/schemas/SpanStartPayload'
-        title: SpanStartPayload
-      - $ref: '#/components/schemas/SpanEndPayload'
-        title: SpanEndPayload
-      title: SpanStartPayload | SpanEndPayload
-    LogSeverity:
-      description: The severity level of a log message.
-      enum:
-      - verbose
-      - debug
-      - info
-      - warn
-      - error
-      - critical
-      title: LogSeverity
-      type: string
-    MetricEvent:
-      description: A metric event containing a measured value.
-      properties:
-        trace_id:
-          title: Trace Id
-          type: string
-        span_id:
-          title: Span Id
-          type: string
-        timestamp:
-          format: date-time
-          title: Timestamp
-          type: string
-        attributes:
-          anyOf:
-          - additionalProperties:
-              anyOf:
-              - type: string
-              - type: integer
-              - type: number
-              - type: boolean
-              - type: 'null'
-              title: string | ... (4 variants)
-            type: object
-          - type: 'null'
-        type:
-          const: metric
-          default: metric
-          title: Type
-          type: string
-        metric:
-          title: Metric
-          type: string
-        value:
-          anyOf:
-          - type: integer
-          - type: number
-          title: integer | number
-        unit:
-          title: Unit
-          type: string
-      required:
-      - trace_id
-      - span_id
-      - timestamp
-      - metric
-      - value
-      - unit
-      title: MetricEvent
-      type: object
-    StructuredLogEvent:
-      description: A structured log event containing typed payload data.
-      properties:
-        trace_id:
-          title: Trace Id
-          type: string
-        span_id:
-          title: Span Id
-          type: string
-        timestamp:
-          format: date-time
-          title: Timestamp
-          type: string
-        attributes:
-          anyOf:
-          - additionalProperties:
-              anyOf:
-              - type: string
-              - type: integer
-              - type: number
-              - type: boolean
-              - type: 'null'
-              title: string | ... (4 variants)
-            type: object
-          - type: 'null'
-        type:
-          const: structured_log
-          default: structured_log
-          title: Type
-          type: string
-        payload:
-          discriminator:
-            mapping:
-              span_end: '#/components/schemas/SpanEndPayload'
-              span_start: '#/components/schemas/SpanStartPayload'
-            propertyName: type
-          oneOf:
-          - $ref: '#/components/schemas/SpanStartPayload'
-            title: SpanStartPayload
-          - $ref: '#/components/schemas/SpanEndPayload'
-            title: SpanEndPayload
-          title: SpanStartPayload | SpanEndPayload
-      required:
-      - trace_id
-      - span_id
-      - timestamp
-      - payload
-      title: StructuredLogEvent
-      type: object
-    UnstructuredLogEvent:
-      description: An unstructured log event containing a simple text message.
-      properties:
-        trace_id:
-          title: Trace Id
-          type: string
-        span_id:
-          title: Span Id
-          type: string
-        timestamp:
-          format: date-time
-          title: Timestamp
-          type: string
-        attributes:
-          anyOf:
-          - additionalProperties:
-              anyOf:
-              - type: string
-              - type: integer
-              - type: number
-              - type: boolean
-              - type: 'null'
-              title: string | ... (4 variants)
-            type: object
-          - type: 'null'
-        type:
-          const: unstructured_log
-          default: unstructured_log
-          title: Type
-          type: string
-        message:
-          title: Message
-          type: string
-        severity:
-          $ref: '#/components/schemas/LogSeverity'
-      required:
-      - trace_id
-      - span_id
-      - timestamp
-      - message
-      - severity
-      title: UnstructuredLogEvent
-      type: object
-    Event:
-      discriminator:
-        mapping:
-          metric: '#/components/schemas/MetricEvent'
-          structured_log: '#/components/schemas/StructuredLogEvent'
-          unstructured_log: '#/components/schemas/UnstructuredLogEvent'
-        propertyName: type
-      oneOf:
-      - $ref: '#/components/schemas/UnstructuredLogEvent'
-        title: UnstructuredLogEvent
-      - $ref: '#/components/schemas/MetricEvent'
-        title: MetricEvent
-      - $ref: '#/components/schemas/StructuredLogEvent'
-        title: StructuredLogEvent
-      title: UnstructuredLogEvent | MetricEvent | StructuredLogEvent
     MetricInResponse:
       description: A metric value included in API responses.
       properties:
@@ -13225,236 +13004,6 @@ components:
       - logger_config
       title: PostTrainingRLHFRequest
       type: object
-    Span:
-      description: A span representing a single operation within a trace.
-      properties:
-        span_id:
-          title: Span Id
-          type: string
-        trace_id:
-          title: Trace Id
-          type: string
-        parent_span_id:
-          anyOf:
-          - type: string
-          - type: 'null'
-          nullable: true
-        name:
-          title: Name
-          type: string
-        start_time:
-          format: date-time
-          title: Start Time
-          type: string
-        end_time:
-          anyOf:
-          - format: date-time
-            type: string
-          - type: 'null'
-          nullable: true
-        attributes:
-          anyOf:
-          - additionalProperties: true
-            type: object
-          - type: 'null'
-      required:
-      - span_id
-      - trace_id
-      - name
-      - start_time
-      title: Span
-      type: object
-    Trace:
-      description: A trace representing the complete execution path of a request across multiple operations.
-      properties:
-        trace_id:
-          title: Trace Id
-          type: string
-        root_span_id:
-          title: Root Span Id
-          type: string
-        start_time:
-          format: date-time
-          title: Start Time
-          type: string
-        end_time:
-          anyOf:
-          - format: date-time
-            type: string
-          - type: 'null'
-          nullable: true
-      required:
-      - trace_id
-      - root_span_id
-      - start_time
-      title: Trace
-      type: object
-    EventType:
-      description: The type of telemetry event being logged.
-      enum:
-      - unstructured_log
-      - structured_log
-      - metric
-      title: EventType
-      type: string
-    StructuredLogType:
-      description: The type of structured log event payload.
-      enum:
-      - span_start
-      - span_end
-      title: StructuredLogType
-      type: string
-    EvalTrace:
-      description: A trace record for evaluation purposes.
-      properties:
-        session_id:
-          title: Session Id
-          type: string
-        step:
-          title: Step
-          type: string
-        input:
-          title: Input
-          type: string
-        output:
-          title: Output
-          type: string
-        expected_output:
-          title: Expected Output
-          type: string
-      required:
-      - session_id
-      - step
-      - input
-      - output
-      - expected_output
-      title: EvalTrace
-      type: object
-    SpanWithStatus:
-      description: A span that includes status information.
-      properties:
-        span_id:
-          title: Span Id
-          type: string
-        trace_id:
-          title: Trace Id
-          type: string
-        parent_span_id:
-          anyOf:
-          - type: string
-          - type: 'null'
-          nullable: true
-        name:
-          title: Name
-          type: string
-        start_time:
-          format: date-time
-          title: Start Time
-          type: string
-        end_time:
-          anyOf:
-          - format: date-time
-            type: string
-          - type: 'null'
-          nullable: true
-        attributes:
-          anyOf:
-          - additionalProperties: true
-            type: object
-          - type: 'null'
-        status:
-          anyOf:
-          - $ref: '#/components/schemas/SpanStatus'
-            title: SpanStatus
-          - type: 'null'
-          nullable: true
-          title: SpanStatus
-      required:
-      - span_id
-      - trace_id
-      - name
-      - start_time
-      title: SpanWithStatus
-      type: object
-    QueryConditionOp:
-      description: Comparison operators for query conditions.
-      enum:
-      - eq
-      - ne
-      - gt
-      - lt
-      title: QueryConditionOp
-      type: string
-    QueryCondition:
-      description: A condition for filtering query results.
-      properties:
-        key:
-          title: Key
-          type: string
-        op:
-          $ref: '#/components/schemas/QueryConditionOp'
-        value:
-          title: Value
-      required:
-      - key
-      - op
-      - value
-      title: QueryCondition
-      type: object
-    MetricLabel:
-      description: A label associated with a metric.
-      properties:
-        name:
-          title: Name
-          type: string
-        value:
-          title: Value
-          type: string
-      required:
-      - name
-      - value
-      title: MetricLabel
-      type: object
-    MetricDataPoint:
-      description: A single data point in a metric time series.
-      properties:
-        timestamp:
-          title: Timestamp
-          type: integer
-        value:
-          title: Value
-          type: number
-        unit:
-          title: Unit
-          type: string
-      required:
-      - timestamp
-      - value
-      - unit
-      title: MetricDataPoint
-      type: object
-    MetricSeries:
-      description: A time series of metric data points.
-      properties:
-        metric:
-          title: Metric
-          type: string
-        labels:
-          items:
-            $ref: '#/components/schemas/MetricLabel'
-          title: Labels
-          type: array
-        values:
-          items:
-            $ref: '#/components/schemas/MetricDataPoint'
-          title: Values
-          type: array
-      required:
-      - metric
-      - labels
-      - values
-      title: MetricSeries
-      type: object
   responses:
     BadRequest400:
       description: The request was invalid or malformed
diff --git a/scripts/integration-tests.sh b/scripts/integration-tests.sh
index 2adef892d..9907cd0bb 100755
--- a/scripts/integration-tests.sh
+++ b/scripts/integration-tests.sh
@@ -171,10 +171,18 @@ if [[ "$COLLECT_ONLY" == false ]]; then
 
     # Set MCP host for in-process MCP server tests
     # - For library client and server mode: localhost (both on same host)
-    # - For docker mode: host.docker.internal (container needs to reach host)
+    # - For docker mode on Linux: localhost (container uses host network, shares network namespace)
+    # - For docker mode on macOS/Windows: host.docker.internal (container uses bridge network)
     if [[ "$STACK_CONFIG" == docker:* ]]; then
-        export LLAMA_STACK_TEST_MCP_HOST="host.docker.internal"
-        echo "Setting MCP host: host.docker.internal (docker mode)"
+        if [[ "$(uname)" != "Darwin" ]] && [[ "$(uname)" != *"MINGW"* ]]; then
+            # On Linux with host network mode, container shares host network namespace
+            export LLAMA_STACK_TEST_MCP_HOST="localhost"
+            echo "Setting MCP host: localhost (docker mode with host network)"
+        else
+            # On macOS/Windows with bridge network, need special host access
+            export LLAMA_STACK_TEST_MCP_HOST="host.docker.internal"
+            echo "Setting MCP host: host.docker.internal (docker mode with bridge network)"
+        fi
     else
         export LLAMA_STACK_TEST_MCP_HOST="localhost"
         echo "Setting MCP host: localhost (library/server mode)"
diff --git a/scripts/openapi_generator/schema_collection.py b/scripts/openapi_generator/schema_collection.py
index 51a70c62a..127f6da9c 100644
--- a/scripts/openapi_generator/schema_collection.py
+++ b/scripts/openapi_generator/schema_collection.py
@@ -8,7 +8,6 @@
 Schema discovery and collection for OpenAPI generation.
 """
 
-import importlib
 from typing import Any
 
 
@@ -20,23 +19,6 @@ def _ensure_components_schemas(openapi_schema: dict[str, Any]) -> None:
         openapi_schema["components"]["schemas"] = {}
 
 
-def _load_extra_schema_modules() -> None:
-    """
-    Import modules outside llama_stack_api that use schema_utils to register schemas.
-
-    The API package already imports its submodules via __init__, but server-side modules
-    like telemetry need to be imported explicitly so their decorator side effects run.
-    """
-    extra_modules = [
-        "llama_stack.core.telemetry.telemetry",
-    ]
-    for module_name in extra_modules:
-        try:
-            importlib.import_module(module_name)
-        except ImportError:
-            continue
-
-
 def _extract_and_fix_defs(schema: dict[str, Any], openapi_schema: dict[str, Any]) -> None:
     """
     Extract $defs from a schema, move them to components/schemas, and fix references.
@@ -79,9 +61,6 @@ def _ensure_json_schema_types_included(openapi_schema: dict[str, Any]) -> dict[s
         iter_registered_schema_types,
     )
 
-    # Import extra modules (e.g., telemetry) whose schema registrations live outside llama_stack_api
-    _load_extra_schema_modules()
-
     # Handle explicitly registered schemas first (union types, Annotated structs, etc.)
     for registration_info in iter_registered_schema_types():
         schema_type = registration_info.type
diff --git a/scripts/telemetry/llama-stack-dashboard.json b/scripts/telemetry/llama-stack-dashboard.json
index a9f8ac7a2..a8db9713c 100644
--- a/scripts/telemetry/llama-stack-dashboard.json
+++ b/scripts/telemetry/llama-stack-dashboard.json
@@ -1,11 +1,24 @@
 {
   "annotations": {
-    "list": []
+    "list": [
+      {
+        "builtIn": 1,
+        "datasource": {
+          "type": "grafana",
+          "uid": "-- Grafana --"
+        },
+        "enable": true,
+        "hide": true,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "name": "Annotations & Alerts",
+        "type": "dashboard"
+      }
+    ]
   },
   "editable": true,
   "fiscalYearStartMonth": 0,
   "graphTooltip": 0,
-  "id": null,
+  "id": 1,
   "links": [],
   "liveNow": false,
   "panels": [
@@ -16,11 +29,40 @@
       },
       "fieldConfig": {
         "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
           "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
             "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
             "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
             "showPoints": "auto",
-            "fillOpacity": 10
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
           },
           "mappings": [],
           "thresholds": {
@@ -32,7 +74,8 @@
               }
             ]
           }
-        }
+        },
+        "overrides": []
       },
       "gridPos": {
         "h": 8,
@@ -40,15 +83,16 @@
         "x": 0,
         "y": 0
       },
-      "id": 1,
+      "id": 2,
       "options": {
         "legend": {
           "calcs": [],
-          "displayMode": "table",
+          "displayMode": "list",
           "placement": "bottom",
           "showLegend": true
         },
         "tooltip": {
+          "maxHeight": 600,
           "mode": "multi",
           "sort": "none"
         }
@@ -59,9 +103,112 @@
             "type": "prometheus",
             "uid": "prometheus"
           },
-          "expr": "llama_stack_completion_tokens_total",
-          "legendFormat": "{{model_id}} ({{provider_id}})",
-          "refId": "A"
+          "disableTextWrap": false,
+          "editorMode": "builder",
+          "expr": "sum by(gen_ai_request_model) (llama_stack_gen_ai_client_token_usage_sum{gen_ai_token_type=\"input\"})",
+          "fullMetaSearch": false,
+          "includeNullMetadata": true,
+          "legendFormat": "__auto",
+          "range": true,
+          "refId": "A",
+          "useBackend": false
+        }
+      ],
+      "title": "Prompt Tokens",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "prometheus"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 0
+      },
+      "id": 1,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "maxHeight": 600,
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "prometheus"
+          },
+          "disableTextWrap": false,
+          "editorMode": "builder",
+          "exemplar": false,
+          "expr": "sum by(gen_ai_request_model) (llama_stack_gen_ai_client_token_usage_sum{gen_ai_token_type=\"output\"})",
+          "fullMetaSearch": false,
+          "includeNullMetadata": true,
+          "interval": "",
+          "legendFormat": "__auto",
+          "range": true,
+          "refId": "A",
+          "useBackend": false
         }
       ],
       "title": "Completion Tokens",
@@ -74,78 +221,40 @@
       },
       "fieldConfig": {
         "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
           "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
             "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
             "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
             "showPoints": "auto",
-            "fillOpacity": 10
-          },
-          "mappings": [],
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [
-              {
-                "color": "green",
-                "value": null
-              }
-            ]
-          }
-        }
-      },
-      "gridPos": {
-        "h": 8,
-        "w": 12,
-        "x": 12,
-        "y": 0
-      },
-      "id": 2,
-      "options": {
-        "legend": {
-          "calcs": [],
-          "displayMode": "table",
-          "placement": "bottom",
-          "showLegend": true
-        },
-        "tooltip": {
-          "mode": "multi",
-          "sort": "none"
-        }
-      },
-      "targets": [
-        {
-          "datasource": {
-            "type": "prometheus",
-            "uid": "prometheus"
-          },
-          "expr": "llama_stack_prompt_tokens_total",
-          "legendFormat": "Prompt - {{model_id}}",
-          "refId": "A"
-        },
-        {
-          "datasource": {
-            "type": "prometheus",
-            "uid": "prometheus"
-          },
-          "expr": "llama_stack_tokens_total",
-          "legendFormat": "Total - {{model_id}}",
-          "refId": "B"
-        }
-      ],
-      "title": "Prompt & Total Tokens",
-      "type": "timeseries"
-    },
-    {
-      "datasource": {
-        "type": "prometheus",
-        "uid": "prometheus"
-      },
-      "fieldConfig": {
-        "defaults": {
-          "custom": {
-            "drawStyle": "line",
-            "lineInterpolation": "linear",
-            "showPoints": "auto",
-            "fillOpacity": 10
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
           },
           "mappings": [],
           "thresholds": {
@@ -158,7 +267,8 @@
             ]
           },
           "unit": "ms"
-        }
+        },
+        "overrides": []
       },
       "gridPos": {
         "h": 8,
@@ -175,6 +285,7 @@
           "showLegend": true
         },
         "tooltip": {
+          "maxHeight": 600,
           "mode": "multi",
           "sort": "none"
         }
@@ -219,7 +330,8 @@
               }
             ]
           }
-        }
+        },
+        "overrides": []
       },
       "gridPos": {
         "h": 8,
@@ -240,8 +352,11 @@
           "fields": "",
           "values": false
         },
-        "textMode": "auto"
+        "showPercentChange": false,
+        "textMode": "auto",
+        "wideLayout": true
       },
+      "pluginVersion": "11.0.0",
       "targets": [
         {
           "datasource": {
@@ -272,7 +387,8 @@
               }
             ]
           }
-        }
+        },
+        "overrides": []
       },
       "gridPos": {
         "h": 8,
@@ -293,8 +409,11 @@
           "fields": "",
           "values": false
         },
-        "textMode": "auto"
+        "showPercentChange": false,
+        "textMode": "auto",
+        "wideLayout": true
       },
+      "pluginVersion": "11.0.0",
       "targets": [
         {
           "datasource": {
@@ -315,11 +434,40 @@
       },
       "fieldConfig": {
         "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
           "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
             "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
             "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
             "showPoints": "auto",
-            "fillOpacity": 10
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
           },
           "mappings": [],
           "thresholds": {
@@ -332,7 +480,8 @@
             ]
           },
           "unit": "reqps"
-        }
+        },
+        "overrides": []
       },
       "gridPos": {
         "h": 8,
@@ -349,6 +498,7 @@
           "showLegend": true
         },
         "tooltip": {
+          "maxHeight": 600,
           "mode": "multi",
           "sort": "none"
         }
@@ -374,11 +524,40 @@
       },
       "fieldConfig": {
         "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
           "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
             "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
             "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
             "showPoints": "auto",
-            "fillOpacity": 10
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
           },
           "mappings": [],
           "thresholds": {
@@ -391,7 +570,8 @@
             ]
           },
           "unit": "Bps"
-        }
+        },
+        "overrides": []
       },
       "gridPos": {
         "h": 8,
@@ -408,6 +588,7 @@
           "showLegend": true
         },
         "tooltip": {
+          "maxHeight": 600,
           "mode": "multi",
           "sort": "none"
         }
@@ -437,7 +618,7 @@
     }
   ],
   "refresh": "5s",
-  "schemaVersion": 38,
+  "schemaVersion": 39,
   "tags": [
     "llama-stack"
   ],
@@ -445,13 +626,14 @@
     "list": []
   },
   "time": {
-    "from": "now-15m",
+    "from": "now-3h",
     "to": "now"
   },
+  "timeRangeUpdatedDuringEditOrView": false,
   "timepicker": {},
   "timezone": "browser",
   "title": "Llama Stack Metrics",
   "uid": "llama-stack-metrics",
-  "version": 0,
+  "version": 17,
   "weekStart": ""
 }
diff --git a/src/llama_stack/core/datatypes.py b/src/llama_stack/core/datatypes.py
index 1e29690ff..f64286ef5 100644
--- a/src/llama_stack/core/datatypes.py
+++ b/src/llama_stack/core/datatypes.py
@@ -191,22 +191,6 @@ class DistributionSpec(BaseModel):
     )
 
 
-class TelemetryConfig(BaseModel):
-    """
-    Configuration for telemetry.
-
-    Llama Stack uses OpenTelemetry for telemetry. Please refer to https://opentelemetry.io/docs/languages/sdk-configuration/
-    for env variables to configure the OpenTelemetry SDK.
-
-    Example:
-    ```bash
-    OTEL_SERVICE_NAME=llama-stack OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 uv run llama stack run starter
-    ```
-    """
-
-    enabled: bool = Field(default=False, description="enable or disable telemetry")
-
-
 class OAuth2JWKSConfig(BaseModel):
     # The JWKS URI for collecting public keys
     uri: str
@@ -527,8 +511,6 @@ can be instantiated multiple times (with different configs) if necessary.
 
     logging: LoggingConfig | None = Field(default=None, description="Configuration for Llama Stack Logging")
 
-    telemetry: TelemetryConfig = Field(default_factory=TelemetryConfig, description="Configuration for telemetry")
-
     server: ServerConfig = Field(
         default_factory=ServerConfig,
         description="Configuration for the HTTP(S) server",
diff --git a/src/llama_stack/core/library_client.py b/src/llama_stack/core/library_client.py
index d6be7aeca..7ae29ad0d 100644
--- a/src/llama_stack/core/library_client.py
+++ b/src/llama_stack/core/library_client.py
@@ -46,8 +46,6 @@ from llama_stack.core.request_headers import PROVIDER_DATA_VAR, request_provider
 from llama_stack.core.resolver import ProviderRegistry
 from llama_stack.core.server.routes import RouteImpls, find_matching_route, initialize_route_impls
 from llama_stack.core.stack import Stack, get_stack_run_config_from_distro, replace_env_vars
-from llama_stack.core.telemetry import Telemetry
-from llama_stack.core.telemetry.tracing import CURRENT_TRACE_CONTEXT, end_trace, setup_logger, start_trace
 from llama_stack.core.utils.config import redact_sensitive_fields
 from llama_stack.core.utils.context import preserve_contexts_async_generator
 from llama_stack.core.utils.exec import in_notebook
@@ -204,13 +202,6 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
         super().__init__()
         # Initialize logging from environment variables first
         setup_logging()
-
-        # when using the library client, we should not log to console since many
-        # of our logs are intended for server-side usage
-        if sinks_from_env := os.environ.get("TELEMETRY_SINKS", None):
-            current_sinks = sinks_from_env.strip().lower().split(",")
-            os.environ["TELEMETRY_SINKS"] = ",".join(sink for sink in current_sinks if sink != "console")
-
         if in_notebook():
             import nest_asyncio
 
@@ -295,8 +286,6 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
             raise _e
 
         assert self.impls is not None
-        if self.config.telemetry.enabled:
-            setup_logger(Telemetry())
 
         if not os.environ.get("PYTEST_CURRENT_TEST"):
             console = Console()
@@ -392,13 +381,7 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
         body, field_names = self._handle_file_uploads(options, body)
 
         body = self._convert_body(matched_func, body, exclude_params=set(field_names))
-
-        trace_path = webmethod.descriptive_name or route_path
-        await start_trace(trace_path, {"__location__": "library_client"})
-        try:
-            result = await matched_func(**body)
-        finally:
-            await end_trace()
+        result = await matched_func(**body)
 
         # Handle FastAPI Response objects (e.g., from file content retrieval)
         if isinstance(result, FastAPIResponse):
@@ -457,19 +440,13 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
         # Prepare body for the function call (handles both Pydantic and traditional params)
         body = self._convert_body(func, body)
 
-        trace_path = webmethod.descriptive_name or route_path
-        await start_trace(trace_path, {"__location__": "library_client"})
-
         async def gen():
-            try:
-                async for chunk in await func(**body):
-                    data = json.dumps(convert_pydantic_to_json_value(chunk))
-                    sse_event = f"data: {data}\n\n"
-                    yield sse_event.encode("utf-8")
-            finally:
-                await end_trace()
+            async for chunk in await func(**body):
+                data = json.dumps(convert_pydantic_to_json_value(chunk))
+                sse_event = f"data: {data}\n\n"
+                yield sse_event.encode("utf-8")
 
-        wrapped_gen = preserve_contexts_async_generator(gen(), [CURRENT_TRACE_CONTEXT, PROVIDER_DATA_VAR])
+        wrapped_gen = preserve_contexts_async_generator(gen(), [PROVIDER_DATA_VAR])
 
         mock_response = httpx.Response(
             status_code=httpx.codes.OK,
diff --git a/src/llama_stack/core/resolver.py b/src/llama_stack/core/resolver.py
index 6bc32c2d0..15720df95 100644
--- a/src/llama_stack/core/resolver.py
+++ b/src/llama_stack/core/resolver.py
@@ -392,8 +392,6 @@ async def instantiate_provider(
         args = [config, deps]
         if "policy" in inspect.signature(getattr(module, method)).parameters:
             args.append(policy)
-        if "telemetry_enabled" in inspect.signature(getattr(module, method)).parameters and run_config.telemetry:
-            args.append(run_config.telemetry.enabled)
 
     fn = getattr(module, method)
     impl = await fn(*args)
@@ -401,18 +399,6 @@ async def instantiate_provider(
     impl.__provider_spec__ = provider_spec
     impl.__provider_config__ = config
 
-    # Apply tracing if telemetry is enabled and any base class has __marked_for_tracing__ marker
-    if run_config.telemetry.enabled:
-        traced_classes = [
-            base for base in reversed(impl.__class__.__mro__) if getattr(base, "__marked_for_tracing__", False)
-        ]
-
-        if traced_classes:
-            from llama_stack.core.telemetry.trace_protocol import trace_protocol
-
-            for cls in traced_classes:
-                trace_protocol(cls)
-
     protocols = api_protocol_map_for_compliance_check(run_config)
     additional_protocols = additional_protocols_map()
     # TODO: check compliance for special tool groups
diff --git a/src/llama_stack/core/routers/__init__.py b/src/llama_stack/core/routers/__init__.py
index 289755bcb..c6f8a7ac2 100644
--- a/src/llama_stack/core/routers/__init__.py
+++ b/src/llama_stack/core/routers/__init__.py
@@ -85,8 +85,6 @@ async def get_auto_router_impl(
         )
         await inference_store.initialize()
         api_to_dep_impl["store"] = inference_store
-        api_to_dep_impl["telemetry_enabled"] = run_config.telemetry.enabled
-
     elif api == Api.vector_io:
         api_to_dep_impl["vector_stores_config"] = run_config.vector_stores
     elif api == Api.safety:
diff --git a/src/llama_stack/core/routers/inference.py b/src/llama_stack/core/routers/inference.py
index 719624e86..8a7ffaa5f 100644
--- a/src/llama_stack/core/routers/inference.py
+++ b/src/llama_stack/core/routers/inference.py
@@ -7,7 +7,6 @@
 import asyncio
 import time
 from collections.abc import AsyncIterator
-from datetime import UTC, datetime
 from typing import Annotated, Any
 
 from fastapi import Body
@@ -15,11 +14,7 @@ from openai.types.chat import ChatCompletionToolChoiceOptionParam as OpenAIChatC
 from openai.types.chat import ChatCompletionToolParam as OpenAIChatCompletionToolParam
 from pydantic import TypeAdapter
 
-from llama_stack.core.telemetry.telemetry import MetricEvent
-from llama_stack.core.telemetry.tracing import enqueue_event, get_current_span
 from llama_stack.log import get_logger
-from llama_stack.models.llama.llama3.chat_format import ChatFormat
-from llama_stack.models.llama.llama3.tokenizer import Tokenizer
 from llama_stack.providers.utils.inference.inference_store import InferenceStore
 from llama_stack_api import (
     HealthResponse,
@@ -60,15 +55,10 @@ class InferenceRouter(Inference):
         self,
         routing_table: RoutingTable,
         store: InferenceStore | None = None,
-        telemetry_enabled: bool = False,
     ) -> None:
         logger.debug("Initializing InferenceRouter")
         self.routing_table = routing_table
-        self.telemetry_enabled = telemetry_enabled
         self.store = store
-        if self.telemetry_enabled:
-            self.tokenizer = Tokenizer.get_instance()
-            self.formatter = ChatFormat(self.tokenizer)
 
     async def initialize(self) -> None:
         logger.debug("InferenceRouter.initialize")
@@ -94,54 +84,6 @@ class InferenceRouter(Inference):
         )
         await self.routing_table.register_model(model_id, provider_model_id, provider_id, metadata, model_type)
 
-    def _construct_metrics(
-        self,
-        prompt_tokens: int,
-        completion_tokens: int,
-        total_tokens: int,
-        fully_qualified_model_id: str,
-        provider_id: str,
-    ) -> list[MetricEvent]:
-        """Constructs a list of MetricEvent objects containing token usage metrics.
-
-        Args:
-            prompt_tokens: Number of tokens in the prompt
-            completion_tokens: Number of tokens in the completion
-            total_tokens: Total number of tokens used
-            fully_qualified_model_id:
-            provider_id: The provider identifier
-
-        Returns:
-            List of MetricEvent objects with token usage metrics
-        """
-        span = get_current_span()
-        if span is None:
-            logger.warning("No span found for token usage metrics")
-            return []
-
-        metrics = [
-            ("prompt_tokens", prompt_tokens),
-            ("completion_tokens", completion_tokens),
-            ("total_tokens", total_tokens),
-        ]
-        metric_events = []
-        for metric_name, value in metrics:
-            metric_events.append(
-                MetricEvent(
-                    trace_id=span.trace_id,
-                    span_id=span.span_id,
-                    metric=metric_name,
-                    value=value,
-                    timestamp=datetime.now(UTC),
-                    unit="tokens",
-                    attributes={
-                        "model_id": fully_qualified_model_id,
-                        "provider_id": provider_id,
-                    },
-                )
-            )
-        return metric_events
-
     async def _get_model_provider(self, model_id: str, expected_model_type: str) -> tuple[Inference, str]:
         model = await self.routing_table.get_object_by_identifier("model", model_id)
         if model:
@@ -186,26 +128,9 @@ class InferenceRouter(Inference):
 
         if params.stream:
             return await provider.openai_completion(params)
-            # TODO: Metrics do NOT work with openai_completion stream=True due to the fact
-            # that we do not return an AsyncIterator, our tests expect a stream of chunks we cannot intercept currently.
 
         response = await provider.openai_completion(params)
         response.model = request_model_id
-        if self.telemetry_enabled and response.usage is not None:
-            metrics = self._construct_metrics(
-                prompt_tokens=response.usage.prompt_tokens,
-                completion_tokens=response.usage.completion_tokens,
-                total_tokens=response.usage.total_tokens,
-                fully_qualified_model_id=request_model_id,
-                provider_id=provider.__provider_id__,
-            )
-            for metric in metrics:
-                enqueue_event(metric)
-
-            # these metrics will show up in the client response.
-            response.metrics = (
-                metrics if not hasattr(response, "metrics") or response.metrics is None else response.metrics + metrics
-            )
         return response
 
     async def openai_chat_completion(
@@ -254,20 +179,6 @@ class InferenceRouter(Inference):
         if self.store:
             asyncio.create_task(self.store.store_chat_completion(response, params.messages))
 
-        if self.telemetry_enabled and response.usage is not None:
-            metrics = self._construct_metrics(
-                prompt_tokens=response.usage.prompt_tokens,
-                completion_tokens=response.usage.completion_tokens,
-                total_tokens=response.usage.total_tokens,
-                fully_qualified_model_id=request_model_id,
-                provider_id=provider.__provider_id__,
-            )
-            for metric in metrics:
-                enqueue_event(metric)
-            # these metrics will show up in the client response.
-            response.metrics = (
-                metrics if not hasattr(response, "metrics") or response.metrics is None else response.metrics + metrics
-            )
         return response
 
     async def openai_embeddings(
@@ -411,18 +322,6 @@ class InferenceRouter(Inference):
                     for choice_data in choices_data.values():
                         completion_text += "".join(choice_data["content_parts"])
 
-                    # Add metrics to the chunk
-                    if self.telemetry_enabled and hasattr(chunk, "usage") and chunk.usage:
-                        metrics = self._construct_metrics(
-                            prompt_tokens=chunk.usage.prompt_tokens,
-                            completion_tokens=chunk.usage.completion_tokens,
-                            total_tokens=chunk.usage.total_tokens,
-                            fully_qualified_model_id=fully_qualified_model_id,
-                            provider_id=provider_id,
-                        )
-                        for metric in metrics:
-                            enqueue_event(metric)
-
                 yield chunk
         finally:
             # Store the final assembled completion
diff --git a/src/llama_stack/core/routers/safety.py b/src/llama_stack/core/routers/safety.py
index 2bc99f14f..10c21ea88 100644
--- a/src/llama_stack/core/routers/safety.py
+++ b/src/llama_stack/core/routers/safety.py
@@ -6,11 +6,15 @@
 
 from typing import Any
 
+from opentelemetry import trace
+
 from llama_stack.core.datatypes import SafetyConfig
 from llama_stack.log import get_logger
+from llama_stack.telemetry.helpers import safety_request_span_attributes, safety_span_name
 from llama_stack_api import ModerationObject, OpenAIMessageParam, RoutingTable, RunShieldResponse, Safety, Shield
 
 logger = get_logger(name=__name__, category="core::routers")
+tracer = trace.get_tracer(__name__)
 
 
 class SafetyRouter(Safety):
@@ -51,13 +55,17 @@ class SafetyRouter(Safety):
         messages: list[OpenAIMessageParam],
         params: dict[str, Any] = None,
     ) -> RunShieldResponse:
-        logger.debug(f"SafetyRouter.run_shield: {shield_id}")
-        provider = await self.routing_table.get_provider_impl(shield_id)
-        return await provider.run_shield(
-            shield_id=shield_id,
-            messages=messages,
-            params=params,
-        )
+        with tracer.start_as_current_span(name=safety_span_name(shield_id)):
+            logger.debug(f"SafetyRouter.run_shield: {shield_id}")
+            provider = await self.routing_table.get_provider_impl(shield_id)
+            response = await provider.run_shield(
+                shield_id=shield_id,
+                messages=messages,
+                params=params,
+            )
+
+            safety_request_span_attributes(shield_id, messages, response)
+        return response
 
     async def run_moderation(self, input: str | list[str], model: str | None = None) -> ModerationObject:
         list_shields_response = await self.routing_table.list_shields()
diff --git a/src/llama_stack/core/server/server.py b/src/llama_stack/core/server/server.py
index 0d3513980..9a01eb75e 100644
--- a/src/llama_stack/core/server/server.py
+++ b/src/llama_stack/core/server/server.py
@@ -50,8 +50,6 @@ from llama_stack.core.stack import (
     cast_image_name_to_string,
     replace_env_vars,
 )
-from llama_stack.core.telemetry import Telemetry
-from llama_stack.core.telemetry.tracing import CURRENT_TRACE_CONTEXT, setup_logger
 from llama_stack.core.utils.config import redact_sensitive_fields
 from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
 from llama_stack.core.utils.context import preserve_contexts_async_generator
@@ -60,7 +58,6 @@ from llama_stack_api import Api, ConflictError, PaginatedResponse, ResourceNotFo
 
 from .auth import AuthenticationMiddleware
 from .quota import QuotaMiddleware
-from .tracing import TracingMiddleware
 
 REPO_ROOT = Path(__file__).parent.parent.parent.parent
 
@@ -263,7 +260,7 @@ def create_dynamic_typed_route(func: Any, method: str, route: str) -> Callable:
 
             try:
                 if is_streaming:
-                    context_vars = [CURRENT_TRACE_CONTEXT, PROVIDER_DATA_VAR]
+                    context_vars = [PROVIDER_DATA_VAR]
                     if test_context_var is not None:
                         context_vars.append(test_context_var)
                     gen = preserve_contexts_async_generator(sse_generator(func(**kwargs)), context_vars)
@@ -441,9 +438,6 @@ def create_app() -> StackApp:
         if cors_config:
             app.add_middleware(CORSMiddleware, **cors_config.model_dump())
 
-    if config.telemetry.enabled:
-        setup_logger(Telemetry())
-
     # Load external APIs if configured
     external_apis = load_external_apis(config)
     all_routes = get_all_api_routes(external_apis)
@@ -500,9 +494,6 @@ def create_app() -> StackApp:
     app.exception_handler(RequestValidationError)(global_exception_handler)
     app.exception_handler(Exception)(global_exception_handler)
 
-    if config.telemetry.enabled:
-        app.add_middleware(TracingMiddleware, impls=impls, external_apis=external_apis)
-
     return app
 
 
diff --git a/src/llama_stack/core/server/tracing.py b/src/llama_stack/core/server/tracing.py
deleted file mode 100644
index c4901d9b1..000000000
--- a/src/llama_stack/core/server/tracing.py
+++ /dev/null
@@ -1,80 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-from aiohttp import hdrs
-
-from llama_stack.core.external import ExternalApiSpec
-from llama_stack.core.server.routes import find_matching_route, initialize_route_impls
-from llama_stack.core.telemetry.tracing import end_trace, start_trace
-from llama_stack.log import get_logger
-
-logger = get_logger(name=__name__, category="core::server")
-
-
-class TracingMiddleware:
-    def __init__(self, app, impls, external_apis: dict[str, ExternalApiSpec]):
-        self.app = app
-        self.impls = impls
-        self.external_apis = external_apis
-        # FastAPI built-in paths that should bypass custom routing
-        self.fastapi_paths = ("/docs", "/redoc", "/openapi.json", "/favicon.ico", "/static")
-
-    async def __call__(self, scope, receive, send):
-        if scope.get("type") == "lifespan":
-            return await self.app(scope, receive, send)
-
-        path = scope.get("path", "")
-
-        # Check if the path is a FastAPI built-in path
-        if path.startswith(self.fastapi_paths):
-            # Pass through to FastAPI's built-in handlers
-            logger.debug(f"Bypassing custom routing for FastAPI built-in path: {path}")
-            return await self.app(scope, receive, send)
-
-        if not hasattr(self, "route_impls"):
-            self.route_impls = initialize_route_impls(self.impls, self.external_apis)
-
-        try:
-            _, _, route_path, webmethod = find_matching_route(
-                scope.get("method", hdrs.METH_GET), path, self.route_impls
-            )
-        except ValueError:
-            # If no matching endpoint is found, pass through to FastAPI
-            logger.debug(f"No matching route found for path: {path}, falling back to FastAPI")
-            return await self.app(scope, receive, send)
-
-        # Log deprecation warning if route is deprecated
-        if getattr(webmethod, "deprecated", False):
-            logger.warning(
-                f"DEPRECATED ROUTE USED: {scope.get('method', 'GET')} {path} - "
-                f"This route is deprecated and may be removed in a future version. "
-                f"Please check the docs for the supported version."
-            )
-
-        trace_attributes = {"__location__": "server", "raw_path": path}
-
-        # Extract W3C trace context headers and store as trace attributes
-        headers = dict(scope.get("headers", []))
-        traceparent = headers.get(b"traceparent", b"").decode()
-        if traceparent:
-            trace_attributes["traceparent"] = traceparent
-        tracestate = headers.get(b"tracestate", b"").decode()
-        if tracestate:
-            trace_attributes["tracestate"] = tracestate
-
-        trace_path = webmethod.descriptive_name or route_path
-        trace_context = await start_trace(trace_path, trace_attributes)
-
-        async def send_with_trace_id(message):
-            if message["type"] == "http.response.start":
-                headers = message.get("headers", [])
-                headers.append([b"x-trace-id", str(trace_context.trace_id).encode()])
-                message["headers"] = headers
-            await send(message)
-
-        try:
-            return await self.app(scope, receive, send_with_trace_id)
-        finally:
-            await end_trace()
diff --git a/src/llama_stack/core/telemetry/__init__.py b/src/llama_stack/core/telemetry/__init__.py
deleted file mode 100644
index bab612c0d..000000000
--- a/src/llama_stack/core/telemetry/__init__.py
+++ /dev/null
@@ -1,32 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .telemetry import Telemetry
-from .trace_protocol import serialize_value, trace_protocol
-from .tracing import (
-    CURRENT_TRACE_CONTEXT,
-    ROOT_SPAN_MARKERS,
-    end_trace,
-    enqueue_event,
-    get_current_span,
-    setup_logger,
-    span,
-    start_trace,
-)
-
-__all__ = [
-    "Telemetry",
-    "trace_protocol",
-    "serialize_value",
-    "CURRENT_TRACE_CONTEXT",
-    "ROOT_SPAN_MARKERS",
-    "end_trace",
-    "enqueue_event",
-    "get_current_span",
-    "setup_logger",
-    "span",
-    "start_trace",
-]
diff --git a/src/llama_stack/core/telemetry/telemetry.py b/src/llama_stack/core/telemetry/telemetry.py
deleted file mode 100644
index 5268fa641..000000000
--- a/src/llama_stack/core/telemetry/telemetry.py
+++ /dev/null
@@ -1,629 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import os
-import threading
-from collections.abc import Mapping, Sequence
-from datetime import datetime
-from enum import Enum
-from typing import (
-    Annotated,
-    Any,
-    Literal,
-    cast,
-)
-
-from opentelemetry import metrics, trace
-from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
-from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
-from opentelemetry.sdk.metrics import MeterProvider
-from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
-from opentelemetry.sdk.trace import TracerProvider
-from opentelemetry.sdk.trace.export import BatchSpanProcessor
-from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator
-from pydantic import BaseModel, Field
-
-from llama_stack.log import get_logger
-from llama_stack.models.llama.datatypes import Primitive
-from llama_stack_api import json_schema_type, register_schema
-
-ROOT_SPAN_MARKERS = ["__root__", "__root_span__"]
-
-# Type alias for OpenTelemetry attribute values (excludes None)
-AttributeValue = str | bool | int | float | Sequence[str] | Sequence[bool] | Sequence[int] | Sequence[float]
-Attributes = Mapping[str, AttributeValue]
-
-
-@json_schema_type
-class SpanStatus(Enum):
-    """The status of a span indicating whether it completed successfully or with an error.
-    :cvar OK: Span completed successfully without errors
-    :cvar ERROR: Span completed with an error or failure
-    """
-
-    OK = "ok"
-    ERROR = "error"
-
-
-@json_schema_type
-class Span(BaseModel):
-    """A span representing a single operation within a trace.
-    :param span_id: Unique identifier for the span
-    :param trace_id: Unique identifier for the trace this span belongs to
-    :param parent_span_id: (Optional) Unique identifier for the parent span, if this is a child span
-    :param name: Human-readable name describing the operation this span represents
-    :param start_time: Timestamp when the operation began
-    :param end_time: (Optional) Timestamp when the operation finished, if completed
-    :param attributes: (Optional) Key-value pairs containing additional metadata about the span
-    """
-
-    span_id: str
-    trace_id: str
-    parent_span_id: str | None = None
-    name: str
-    start_time: datetime
-    end_time: datetime | None = None
-    attributes: dict[str, Any] | None = Field(default_factory=lambda: {})
-
-    def set_attribute(self, key: str, value: Any):
-        if self.attributes is None:
-            self.attributes = {}
-        self.attributes[key] = value
-
-
-@json_schema_type
-class Trace(BaseModel):
-    """A trace representing the complete execution path of a request across multiple operations.
-    :param trace_id: Unique identifier for the trace
-    :param root_span_id: Unique identifier for the root span that started this trace
-    :param start_time: Timestamp when the trace began
-    :param end_time: (Optional) Timestamp when the trace finished, if completed
-    """
-
-    trace_id: str
-    root_span_id: str
-    start_time: datetime
-    end_time: datetime | None = None
-
-
-@json_schema_type
-class EventType(Enum):
-    """The type of telemetry event being logged.
-    :cvar UNSTRUCTURED_LOG: A simple log message with severity level
-    :cvar STRUCTURED_LOG: A structured log event with typed payload data
-    :cvar METRIC: A metric measurement with value and unit
-    """
-
-    UNSTRUCTURED_LOG = "unstructured_log"
-    STRUCTURED_LOG = "structured_log"
-    METRIC = "metric"
-
-
-@json_schema_type
-class LogSeverity(Enum):
-    """The severity level of a log message.
-    :cvar VERBOSE: Detailed diagnostic information for troubleshooting
-    :cvar DEBUG: Debug information useful during development
-    :cvar INFO: General informational messages about normal operation
-    :cvar WARN: Warning messages about potentially problematic situations
-    :cvar ERROR: Error messages indicating failures that don't stop execution
-    :cvar CRITICAL: Critical error messages indicating severe failures
-    """
-
-    VERBOSE = "verbose"
-    DEBUG = "debug"
-    INFO = "info"
-    WARN = "warn"
-    ERROR = "error"
-    CRITICAL = "critical"
-
-
-class EventCommon(BaseModel):
-    """Common fields shared by all telemetry events.
-    :param trace_id: Unique identifier for the trace this event belongs to
-    :param span_id: Unique identifier for the span this event belongs to
-    :param timestamp: Timestamp when the event occurred
-    :param attributes: (Optional) Key-value pairs containing additional metadata about the event
-    """
-
-    trace_id: str
-    span_id: str
-    timestamp: datetime
-    attributes: dict[str, Primitive] | None = Field(default_factory=lambda: {})
-
-
-@json_schema_type
-class UnstructuredLogEvent(EventCommon):
-    """An unstructured log event containing a simple text message.
-    :param type: Event type identifier set to UNSTRUCTURED_LOG
-    :param message: The log message text
-    :param severity: The severity level of the log message
-    """
-
-    type: Literal[EventType.UNSTRUCTURED_LOG] = EventType.UNSTRUCTURED_LOG
-    message: str
-    severity: LogSeverity
-
-
-@json_schema_type
-class MetricEvent(EventCommon):
-    """A metric event containing a measured value.
-    :param type: Event type identifier set to METRIC
-    :param metric: The name of the metric being measured
-    :param value: The numeric value of the metric measurement
-    :param unit: The unit of measurement for the metric value
-    """
-
-    type: Literal[EventType.METRIC] = EventType.METRIC
-    metric: str  # this would be an enum
-    value: int | float
-    unit: str
-
-
-@json_schema_type
-class StructuredLogType(Enum):
-    """The type of structured log event payload.
-    :cvar SPAN_START: Event indicating the start of a new span
-    :cvar SPAN_END: Event indicating the completion of a span
-    """
-
-    SPAN_START = "span_start"
-    SPAN_END = "span_end"
-
-
-@json_schema_type
-class SpanStartPayload(BaseModel):
-    """Payload for a span start event.
-    :param type: Payload type identifier set to SPAN_START
-    :param name: Human-readable name describing the operation this span represents
-    :param parent_span_id: (Optional) Unique identifier for the parent span, if this is a child span
-    """
-
-    type: Literal[StructuredLogType.SPAN_START] = StructuredLogType.SPAN_START
-    name: str
-    parent_span_id: str | None = None
-
-
-@json_schema_type
-class SpanEndPayload(BaseModel):
-    """Payload for a span end event.
-    :param type: Payload type identifier set to SPAN_END
-    :param status: The final status of the span indicating success or failure
-    """
-
-    type: Literal[StructuredLogType.SPAN_END] = StructuredLogType.SPAN_END
-    status: SpanStatus
-
-
-StructuredLogPayload = Annotated[
-    SpanStartPayload | SpanEndPayload,
-    Field(discriminator="type"),
-]
-register_schema(StructuredLogPayload, name="StructuredLogPayload")
-
-
-@json_schema_type
-class StructuredLogEvent(EventCommon):
-    """A structured log event containing typed payload data.
-    :param type: Event type identifier set to STRUCTURED_LOG
-    :param payload: The structured payload data for the log event
-    """
-
-    type: Literal[EventType.STRUCTURED_LOG] = EventType.STRUCTURED_LOG
-    payload: StructuredLogPayload
-
-
-Event = Annotated[
-    UnstructuredLogEvent | MetricEvent | StructuredLogEvent,
-    Field(discriminator="type"),
-]
-register_schema(Event, name="Event")
-
-
-@json_schema_type
-class EvalTrace(BaseModel):
-    """A trace record for evaluation purposes.
-    :param session_id: Unique identifier for the evaluation session
-    :param step: The evaluation step or phase identifier
-    :param input: The input data for the evaluation
-    :param output: The actual output produced during evaluation
-    :param expected_output: The expected output for comparison during evaluation
-    """
-
-    session_id: str
-    step: str
-    input: str
-    output: str
-    expected_output: str
-
-
-@json_schema_type
-class SpanWithStatus(Span):
-    """A span that includes status information.
-    :param status: (Optional) The current status of the span
-    """
-
-    status: SpanStatus | None = None
-
-
-@json_schema_type
-class QueryConditionOp(Enum):
-    """Comparison operators for query conditions.
-    :cvar EQ: Equal to comparison
-    :cvar NE: Not equal to comparison
-    :cvar GT: Greater than comparison
-    :cvar LT: Less than comparison
-    """
-
-    EQ = "eq"
-    NE = "ne"
-    GT = "gt"
-    LT = "lt"
-
-
-@json_schema_type
-class QueryCondition(BaseModel):
-    """A condition for filtering query results.
-    :param key: The attribute key to filter on
-    :param op: The comparison operator to apply
-    :param value: The value to compare against
-    """
-
-    key: str
-    op: QueryConditionOp
-    value: Any
-
-
-class QueryTracesResponse(BaseModel):
-    """Response containing a list of traces.
-    :param data: List of traces matching the query criteria
-    """
-
-    data: list[Trace]
-
-
-class QuerySpansResponse(BaseModel):
-    """Response containing a list of spans.
-    :param data: List of spans matching the query criteria
-    """
-
-    data: list[Span]
-
-
-class QuerySpanTreeResponse(BaseModel):
-    """Response containing a tree structure of spans.
-    :param data: Dictionary mapping span IDs to spans with status information
-    """
-
-    data: dict[str, SpanWithStatus]
-
-
-class MetricQueryType(Enum):
-    """The type of metric query to perform.
-    :cvar RANGE: Query metrics over a time range
-    :cvar INSTANT: Query metrics at a specific point in time
-    """
-
-    RANGE = "range"
-    INSTANT = "instant"
-
-
-class MetricLabelOperator(Enum):
-    """Operators for matching metric labels.
-    :cvar EQUALS: Label value must equal the specified value
-    :cvar NOT_EQUALS: Label value must not equal the specified value
-    :cvar REGEX_MATCH: Label value must match the specified regular expression
-    :cvar REGEX_NOT_MATCH: Label value must not match the specified regular expression
-    """
-
-    EQUALS = "="
-    NOT_EQUALS = "!="
-    REGEX_MATCH = "=~"
-    REGEX_NOT_MATCH = "!~"
-
-
-class MetricLabelMatcher(BaseModel):
-    """A matcher for filtering metrics by label values.
-    :param name: The name of the label to match
-    :param value: The value to match against
-    :param operator: The comparison operator to use for matching
-    """
-
-    name: str
-    value: str
-    operator: MetricLabelOperator = MetricLabelOperator.EQUALS
-
-
-@json_schema_type
-class MetricLabel(BaseModel):
-    """A label associated with a metric.
-    :param name: The name of the label
-    :param value: The value of the label
-    """
-
-    name: str
-    value: str
-
-
-@json_schema_type
-class MetricDataPoint(BaseModel):
-    """A single data point in a metric time series.
-    :param timestamp: Unix timestamp when the metric value was recorded
-    :param value: The numeric value of the metric at this timestamp
-    """
-
-    timestamp: int
-    value: float
-    unit: str
-
-
-@json_schema_type
-class MetricSeries(BaseModel):
-    """A time series of metric data points.
-    :param metric: The name of the metric
-    :param labels: List of labels associated with this metric series
-    :param values: List of data points in chronological order
-    """
-
-    metric: str
-    labels: list[MetricLabel]
-    values: list[MetricDataPoint]
-
-
-class QueryMetricsResponse(BaseModel):
-    """Response containing metric time series data.
-    :param data: List of metric series matching the query criteria
-    """
-
-    data: list[MetricSeries]
-
-
-_GLOBAL_STORAGE: dict[str, dict[str | int, Any]] = {
-    "active_spans": {},
-    "counters": {},
-    "gauges": {},
-    "up_down_counters": {},
-    "histograms": {},
-}
-_global_lock = threading.Lock()
-_TRACER_PROVIDER = None
-
-logger = get_logger(name=__name__, category="telemetry")
-
-
-def _clean_attributes(attrs: dict[str, Any] | None) -> Attributes | None:
-    """Remove None values from attributes dict to match OpenTelemetry's expected type."""
-    if attrs is None:
-        return None
-    return {k: v for k, v in attrs.items() if v is not None}
-
-
-def is_tracing_enabled(tracer):
-    with tracer.start_as_current_span("check_tracing") as span:
-        return span.is_recording()
-
-
-class Telemetry:
-    def __init__(self) -> None:
-        self.meter = None
-
-        global _TRACER_PROVIDER
-        # Initialize the correct span processor based on the provider state.
-        # This is needed since once the span processor is set, it cannot be unset.
-        # Recreating the telemetry adapter multiple times will result in duplicate span processors.
-        # Since the library client can be recreated multiple times in a notebook,
-        # the kernel will hold on to the span processor and cause duplicate spans to be written.
-        if os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT"):
-            if _TRACER_PROVIDER is None:
-                provider = TracerProvider()
-                trace.set_tracer_provider(provider)
-                _TRACER_PROVIDER = provider
-
-                # Use single OTLP endpoint for all telemetry signals
-
-                # Let OpenTelemetry SDK handle endpoint construction automatically
-                # The SDK will read OTEL_EXPORTER_OTLP_ENDPOINT and construct appropriate URLs
-                # https://opentelemetry.io/docs/languages/sdk-configuration/otlp-exporter
-                span_exporter = OTLPSpanExporter()
-                span_processor = BatchSpanProcessor(span_exporter)
-                cast(TracerProvider, trace.get_tracer_provider()).add_span_processor(span_processor)
-
-                metric_reader = PeriodicExportingMetricReader(OTLPMetricExporter())
-                metric_provider = MeterProvider(metric_readers=[metric_reader])
-                metrics.set_meter_provider(metric_provider)
-            self.is_otel_endpoint_set = True
-        else:
-            logger.warning("OTEL_EXPORTER_OTLP_ENDPOINT is not set, skipping telemetry")
-            self.is_otel_endpoint_set = False
-
-        self.meter = metrics.get_meter(__name__)
-        self._lock = _global_lock
-
-    async def initialize(self) -> None:
-        pass
-
-    async def shutdown(self) -> None:
-        if self.is_otel_endpoint_set:
-            cast(TracerProvider, trace.get_tracer_provider()).force_flush()
-
-    async def log_event(self, event: Event, ttl_seconds: int = 604800) -> None:
-        if isinstance(event, UnstructuredLogEvent):
-            self._log_unstructured(event, ttl_seconds)
-        elif isinstance(event, MetricEvent):
-            self._log_metric(event)
-        elif isinstance(event, StructuredLogEvent):
-            self._log_structured(event, ttl_seconds)
-        else:
-            raise ValueError(f"Unknown event type: {event}")
-
-    def _log_unstructured(self, event: UnstructuredLogEvent, ttl_seconds: int) -> None:
-        with self._lock:
-            # Use global storage instead of instance storage
-            span_id = int(event.span_id, 16)
-            span = _GLOBAL_STORAGE["active_spans"].get(span_id)
-
-            if span:
-                timestamp_ns = int(event.timestamp.timestamp() * 1e9)
-                span.add_event(
-                    name=event.type.value,
-                    attributes={
-                        "message": event.message,
-                        "severity": event.severity.value,
-                        "__ttl__": ttl_seconds,
-                        **(event.attributes or {}),
-                    },
-                    timestamp=timestamp_ns,
-                )
-            else:
-                print(f"Warning: No active span found for span_id {span_id}. Dropping event: {event}")
-
-    def _get_or_create_counter(self, name: str, unit: str) -> metrics.Counter:
-        assert self.meter is not None
-        if name not in _GLOBAL_STORAGE["counters"]:
-            _GLOBAL_STORAGE["counters"][name] = self.meter.create_counter(
-                name=name,
-                unit=unit,
-                description=f"Counter for {name}",
-            )
-        return cast(metrics.Counter, _GLOBAL_STORAGE["counters"][name])
-
-    def _get_or_create_gauge(self, name: str, unit: str) -> metrics.ObservableGauge:
-        assert self.meter is not None
-        if name not in _GLOBAL_STORAGE["gauges"]:
-            _GLOBAL_STORAGE["gauges"][name] = self.meter.create_gauge(
-                name=name,
-                unit=unit,
-                description=f"Gauge for {name}",
-            )
-        return cast(metrics.ObservableGauge, _GLOBAL_STORAGE["gauges"][name])
-
-    def _get_or_create_histogram(self, name: str, unit: str) -> metrics.Histogram:
-        assert self.meter is not None
-        if name not in _GLOBAL_STORAGE["histograms"]:
-            _GLOBAL_STORAGE["histograms"][name] = self.meter.create_histogram(
-                name=name,
-                unit=unit,
-                description=f"Histogram for {name}",
-            )
-        return cast(metrics.Histogram, _GLOBAL_STORAGE["histograms"][name])
-
-    def _log_metric(self, event: MetricEvent) -> None:
-        # Add metric as an event to the current span
-        try:
-            with self._lock:
-                # Only try to add to span if we have a valid span_id
-                if event.span_id:
-                    try:
-                        span_id = int(event.span_id, 16)
-                        span = _GLOBAL_STORAGE["active_spans"].get(span_id)
-
-                        if span:
-                            timestamp_ns = int(event.timestamp.timestamp() * 1e9)
-                            span.add_event(
-                                name=f"metric.{event.metric}",
-                                attributes={
-                                    "value": event.value,
-                                    "unit": event.unit,
-                                    **(event.attributes or {}),
-                                },
-                                timestamp=timestamp_ns,
-                            )
-                    except (ValueError, KeyError):
-                        # Invalid span_id or span not found, but we already logged to console above
-                        pass
-        except Exception:
-            # Lock acquisition failed
-            logger.debug("Failed to acquire lock to add metric to span")
-
-        # Log to OpenTelemetry meter if available
-        if self.meter is None:
-            return
-
-        # Use histograms for token-related metrics (per-request measurements)
-        # Use counters for other cumulative metrics
-        token_metrics = {"prompt_tokens", "completion_tokens", "total_tokens"}
-
-        if event.metric in token_metrics:
-            # Token metrics are per-request measurements, use histogram
-            histogram = self._get_or_create_histogram(event.metric, event.unit)
-            histogram.record(event.value, attributes=_clean_attributes(event.attributes))
-        elif isinstance(event.value, int):
-            counter = self._get_or_create_counter(event.metric, event.unit)
-            counter.add(event.value, attributes=_clean_attributes(event.attributes))
-        elif isinstance(event.value, float):
-            up_down_counter = self._get_or_create_up_down_counter(event.metric, event.unit)
-            up_down_counter.add(event.value, attributes=_clean_attributes(event.attributes))
-
-    def _get_or_create_up_down_counter(self, name: str, unit: str) -> metrics.UpDownCounter:
-        assert self.meter is not None
-        if name not in _GLOBAL_STORAGE["up_down_counters"]:
-            _GLOBAL_STORAGE["up_down_counters"][name] = self.meter.create_up_down_counter(
-                name=name,
-                unit=unit,
-                description=f"UpDownCounter for {name}",
-            )
-        return cast(metrics.UpDownCounter, _GLOBAL_STORAGE["up_down_counters"][name])
-
-    def _log_structured(self, event: StructuredLogEvent, ttl_seconds: int) -> None:
-        with self._lock:
-            span_id = int(event.span_id, 16)
-            tracer = trace.get_tracer(__name__)
-            if event.attributes is None:
-                event.attributes = {}
-            event.attributes["__ttl__"] = ttl_seconds
-
-            # Extract these W3C trace context attributes so they are not written to
-            # underlying storage, as we just need them to propagate the trace context.
-            traceparent = event.attributes.pop("traceparent", None)
-            tracestate = event.attributes.pop("tracestate", None)
-            if traceparent:
-                # If we have a traceparent header value, we're not the root span.
-                for root_attribute in ROOT_SPAN_MARKERS:
-                    event.attributes.pop(root_attribute, None)
-
-            if isinstance(event.payload, SpanStartPayload):
-                # Check if span already exists to prevent duplicates
-                if span_id in _GLOBAL_STORAGE["active_spans"]:
-                    return
-
-                context = None
-                if event.payload.parent_span_id:
-                    parent_span_id = int(event.payload.parent_span_id, 16)
-                    parent_span = _GLOBAL_STORAGE["active_spans"].get(parent_span_id)
-                    if parent_span:
-                        context = trace.set_span_in_context(parent_span)
-                elif traceparent:
-                    carrier = {
-                        "traceparent": traceparent,
-                        "tracestate": tracestate,
-                    }
-                    context = TraceContextTextMapPropagator().extract(carrier=carrier)
-
-                span = tracer.start_span(
-                    name=event.payload.name,
-                    context=context,
-                    attributes=_clean_attributes(event.attributes),
-                )
-                _GLOBAL_STORAGE["active_spans"][span_id] = span
-
-            elif isinstance(event.payload, SpanEndPayload):
-                span = _GLOBAL_STORAGE["active_spans"].get(span_id)  # type: ignore[assignment]
-                if span:
-                    if event.attributes:
-                        cleaned_attrs = _clean_attributes(event.attributes)
-                        if cleaned_attrs:
-                            span.set_attributes(cleaned_attrs)
-
-                    status = (
-                        trace.Status(status_code=trace.StatusCode.OK)
-                        if event.payload.status == SpanStatus.OK
-                        else trace.Status(status_code=trace.StatusCode.ERROR)
-                    )
-                    span.set_status(status)
-                    span.end()
-                    _GLOBAL_STORAGE["active_spans"].pop(span_id, None)
-            else:
-                raise ValueError(f"Unknown structured log event: {event}")
diff --git a/src/llama_stack/core/telemetry/trace_protocol.py b/src/llama_stack/core/telemetry/trace_protocol.py
deleted file mode 100644
index 95b33a4bc..000000000
--- a/src/llama_stack/core/telemetry/trace_protocol.py
+++ /dev/null
@@ -1,154 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import asyncio
-import inspect
-import json
-from collections.abc import AsyncGenerator, Callable
-from functools import wraps
-from typing import Any, cast
-
-from pydantic import BaseModel
-
-from llama_stack.models.llama.datatypes import Primitive
-
-type JSONValue = Primitive | list["JSONValue"] | dict[str, "JSONValue"]
-
-
-def serialize_value(value: Any) -> str:
-    return str(_prepare_for_json(value))
-
-
-def _prepare_for_json(value: Any) -> JSONValue:
-    """Serialize a single value into JSON-compatible format."""
-    if value is None:
-        return ""
-    elif isinstance(value, str | int | float | bool):
-        return value
-    elif hasattr(value, "_name_"):
-        return cast(str, value._name_)
-    elif isinstance(value, BaseModel):
-        return cast(JSONValue, json.loads(value.model_dump_json()))
-    elif isinstance(value, list | tuple | set):
-        return [_prepare_for_json(item) for item in value]
-    elif isinstance(value, dict):
-        return {str(k): _prepare_for_json(v) for k, v in value.items()}
-    else:
-        try:
-            json.dumps(value)
-            return cast(JSONValue, value)
-        except Exception:
-            return str(value)
-
-
-def trace_protocol[T: type[Any]](cls: T) -> T:
-    """
-    A class decorator that automatically traces all methods in a protocol/base class
-    and its inheriting classes.
-    """
-
-    def trace_method(method: Callable[..., Any]) -> Callable[..., Any]:
-        is_async = asyncio.iscoroutinefunction(method)
-        is_async_gen = inspect.isasyncgenfunction(method)
-
-        def create_span_context(self: Any, *args: Any, **kwargs: Any) -> tuple[str, str, dict[str, Primitive]]:
-            class_name = self.__class__.__name__
-            method_name = method.__name__
-            span_type = "async_generator" if is_async_gen else "async" if is_async else "sync"
-            sig = inspect.signature(method)
-            param_names = list(sig.parameters.keys())[1:]  # Skip 'self'
-            combined_args: dict[str, str] = {}
-            for i, arg in enumerate(args):
-                param_name = param_names[i] if i < len(param_names) else f"position_{i + 1}"
-                combined_args[param_name] = serialize_value(arg)
-            for k, v in kwargs.items():
-                combined_args[str(k)] = serialize_value(v)
-
-            span_attributes: dict[str, Primitive] = {
-                "__autotraced__": True,
-                "__class__": class_name,
-                "__method__": method_name,
-                "__type__": span_type,
-                "__args__": json.dumps(combined_args),
-            }
-
-            return class_name, method_name, span_attributes
-
-        @wraps(method)
-        async def async_gen_wrapper(self: Any, *args: Any, **kwargs: Any) -> AsyncGenerator[Any, None]:
-            from llama_stack.core.telemetry import tracing
-
-            class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs)
-
-            with tracing.span(f"{class_name}.{method_name}", span_attributes) as span:
-                count = 0
-                try:
-                    async for item in method(self, *args, **kwargs):
-                        yield item
-                        count += 1
-                finally:
-                    span.set_attribute("chunk_count", count)
-
-        @wraps(method)
-        async def async_wrapper(self: Any, *args: Any, **kwargs: Any) -> Any:
-            from llama_stack.core.telemetry import tracing
-
-            class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs)
-
-            with tracing.span(f"{class_name}.{method_name}", span_attributes) as span:
-                try:
-                    result = await method(self, *args, **kwargs)
-                    span.set_attribute("output", serialize_value(result))
-                    return result
-                except Exception as e:
-                    span.set_attribute("error", str(e))
-                    raise
-
-        @wraps(method)
-        def sync_wrapper(self: Any, *args: Any, **kwargs: Any) -> Any:
-            from llama_stack.core.telemetry import tracing
-
-            class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs)
-
-            with tracing.span(f"{class_name}.{method_name}", span_attributes) as span:
-                try:
-                    result = method(self, *args, **kwargs)
-                    span.set_attribute("output", serialize_value(result))
-                    return result
-                except Exception as e:
-                    span.set_attribute("error", str(e))
-                    raise
-
-        if is_async_gen:
-            return async_gen_wrapper
-        elif is_async:
-            return async_wrapper
-        else:
-            return sync_wrapper
-
-    # Wrap methods on the class itself (for classes applied at runtime)
-    # Skip if already wrapped (indicated by __wrapped__ attribute)
-    for name, method in vars(cls).items():
-        if inspect.isfunction(method) and not name.startswith("_"):
-            if not hasattr(method, "__wrapped__"):
-                wrapped = trace_method(method)
-                setattr(cls, name, wrapped)  # noqa: B010
-
-    # Also set up __init_subclass__ for future subclasses
-    original_init_subclass = cast(Callable[..., Any] | None, getattr(cls, "__init_subclass__", None))
-
-    def __init_subclass__(cls_child: type[Any], **kwargs: Any) -> None:  # noqa: N807
-        if original_init_subclass:
-            cast(Callable[..., None], original_init_subclass)(**kwargs)
-
-        for name, method in vars(cls_child).items():
-            if inspect.isfunction(method) and not name.startswith("_"):
-                setattr(cls_child, name, trace_method(method))  # noqa: B010
-
-    cls_any = cast(Any, cls)
-    cls_any.__init_subclass__ = classmethod(__init_subclass__)
-
-    return cls
diff --git a/src/llama_stack/core/telemetry/tracing.py b/src/llama_stack/core/telemetry/tracing.py
deleted file mode 100644
index a67cbe784..000000000
--- a/src/llama_stack/core/telemetry/tracing.py
+++ /dev/null
@@ -1,388 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import asyncio
-import contextvars
-import logging  # allow-direct-logging
-import queue
-import secrets
-import sys
-import threading
-import time
-from collections.abc import Callable
-from datetime import UTC, datetime
-from functools import wraps
-from typing import Any, Self
-
-from llama_stack.core.telemetry.telemetry import (
-    ROOT_SPAN_MARKERS,
-    Event,
-    LogSeverity,
-    Span,
-    SpanEndPayload,
-    SpanStartPayload,
-    SpanStatus,
-    StructuredLogEvent,
-    Telemetry,
-    UnstructuredLogEvent,
-)
-from llama_stack.core.telemetry.trace_protocol import serialize_value
-from llama_stack.log import get_logger
-
-logger = get_logger(__name__, category="core")
-
-# Fallback logger that does NOT propagate to TelemetryHandler to avoid recursion
-_fallback_logger = logging.getLogger("llama_stack.telemetry.background")
-if not _fallback_logger.handlers:
-    _fallback_logger.propagate = False
-    _fallback_logger.setLevel(logging.ERROR)
-    _fallback_handler = logging.StreamHandler(sys.stderr)
-    _fallback_handler.setLevel(logging.ERROR)
-    _fallback_handler.setFormatter(logging.Formatter("%(asctime)s [%(levelname)s] %(name)s: %(message)s"))
-    _fallback_logger.addHandler(_fallback_handler)
-
-
-INVALID_SPAN_ID = 0x0000000000000000
-INVALID_TRACE_ID = 0x00000000000000000000000000000000
-
-# The logical root span may not be visible to this process if a parent context
-# is passed in. The local root span is the first local span in a trace.
-LOCAL_ROOT_SPAN_MARKER = "__local_root_span__"
-
-
-def trace_id_to_str(trace_id: int) -> str:
-    """Convenience trace ID formatting method
-    Args:
-        trace_id: Trace ID int
-
-    Returns:
-        The trace ID as 32-byte hexadecimal string
-    """
-    return format(trace_id, "032x")
-
-
-def span_id_to_str(span_id: int) -> str:
-    """Convenience span ID formatting method
-    Args:
-        span_id: Span ID int
-
-    Returns:
-        The span ID as 16-byte hexadecimal string
-    """
-    return format(span_id, "016x")
-
-
-def generate_span_id() -> str:
-    span_id = secrets.randbits(64)
-    while span_id == INVALID_SPAN_ID:
-        span_id = secrets.randbits(64)
-    return span_id_to_str(span_id)
-
-
-def generate_trace_id() -> str:
-    trace_id = secrets.randbits(128)
-    while trace_id == INVALID_TRACE_ID:
-        trace_id = secrets.randbits(128)
-    return trace_id_to_str(trace_id)
-
-
-LOG_QUEUE_FULL_LOG_INTERVAL_SECONDS = 60.0
-
-
-class BackgroundLogger:
-    def __init__(self, api: Telemetry, capacity: int = 100000):
-        self.api = api
-        self.log_queue: queue.Queue[Any] = queue.Queue(maxsize=capacity)
-        self.worker_thread = threading.Thread(target=self._worker, daemon=True)
-        self.worker_thread.start()
-        self._last_queue_full_log_time: float = 0.0
-        self._dropped_since_last_notice: int = 0
-
-    def log_event(self, event: Event) -> None:
-        try:
-            self.log_queue.put_nowait(event)
-        except queue.Full:
-            # Aggregate drops and emit at most once per interval via fallback logger
-            self._dropped_since_last_notice += 1
-            current_time = time.time()
-            if current_time - self._last_queue_full_log_time >= LOG_QUEUE_FULL_LOG_INTERVAL_SECONDS:
-                _fallback_logger.error(
-                    "Log queue is full; dropped %d events since last notice",
-                    self._dropped_since_last_notice,
-                )
-                self._last_queue_full_log_time = current_time
-                self._dropped_since_last_notice = 0
-
-    def _worker(self):
-        loop = asyncio.new_event_loop()
-        asyncio.set_event_loop(loop)
-        loop.run_until_complete(self._process_logs())
-
-    async def _process_logs(self):
-        while True:
-            try:
-                event = self.log_queue.get()
-                await self.api.log_event(event)
-            except Exception:
-                import traceback
-
-                traceback.print_exc()
-                print("Error processing log event")
-            finally:
-                self.log_queue.task_done()
-
-    def __del__(self) -> None:
-        self.log_queue.join()
-
-
-BACKGROUND_LOGGER: BackgroundLogger | None = None
-
-
-def enqueue_event(event: Event) -> None:
-    """Enqueue a telemetry event to the background logger if available.
-
-    This provides a non-blocking path for routers and other hot paths to
-    submit telemetry without awaiting the Telemetry API, reducing contention
-    with the main event loop.
-    """
-    global BACKGROUND_LOGGER
-    if BACKGROUND_LOGGER is None:
-        raise RuntimeError("Telemetry API not initialized")
-    BACKGROUND_LOGGER.log_event(event)
-
-
-class TraceContext:
-    def __init__(self, logger: BackgroundLogger, trace_id: str):
-        self.logger = logger
-        self.trace_id = trace_id
-        self.spans: list[Span] = []
-
-    def push_span(self, name: str, attributes: dict[str, Any] | None = None) -> Span:
-        current_span = self.get_current_span()
-        span = Span(
-            span_id=generate_span_id(),
-            trace_id=self.trace_id,
-            name=name,
-            start_time=datetime.now(UTC),
-            parent_span_id=current_span.span_id if current_span else None,
-            attributes=attributes,
-        )
-
-        self.logger.log_event(
-            StructuredLogEvent(
-                trace_id=span.trace_id,
-                span_id=span.span_id,
-                timestamp=span.start_time,
-                attributes=span.attributes,
-                payload=SpanStartPayload(
-                    name=span.name,
-                    parent_span_id=span.parent_span_id,
-                ),
-            )
-        )
-
-        self.spans.append(span)
-        return span
-
-    def pop_span(self, status: SpanStatus = SpanStatus.OK) -> None:
-        span = self.spans.pop()
-        if span is not None:
-            self.logger.log_event(
-                StructuredLogEvent(
-                    trace_id=span.trace_id,
-                    span_id=span.span_id,
-                    timestamp=span.start_time,
-                    attributes=span.attributes,
-                    payload=SpanEndPayload(
-                        status=status,
-                    ),
-                )
-            )
-
-    def get_current_span(self) -> Span | None:
-        return self.spans[-1] if self.spans else None
-
-
-CURRENT_TRACE_CONTEXT: contextvars.ContextVar[TraceContext | None] = contextvars.ContextVar(
-    "trace_context", default=None
-)
-
-
-def setup_logger(api: Telemetry, level: int = logging.INFO):
-    global BACKGROUND_LOGGER
-
-    if BACKGROUND_LOGGER is None:
-        BACKGROUND_LOGGER = BackgroundLogger(api)
-    root_logger = logging.getLogger()
-    root_logger.setLevel(level)
-    root_logger.addHandler(TelemetryHandler())
-
-
-async def start_trace(name: str, attributes: dict[str, Any] | None = None) -> TraceContext | None:
-    global CURRENT_TRACE_CONTEXT, BACKGROUND_LOGGER
-
-    if BACKGROUND_LOGGER is None:
-        logger.debug("No Telemetry implementation set. Skipping trace initialization...")
-        return None
-
-    trace_id = generate_trace_id()
-    context = TraceContext(BACKGROUND_LOGGER, trace_id)
-    # Mark this span as the root for the trace for now. The processing of
-    # traceparent context if supplied comes later and will result in the
-    # ROOT_SPAN_MARKERS being removed. Also mark this is the 'local' root,
-    # i.e. the root of the spans originating in this process as this is
-    # needed to ensure that we insert this 'local' root span's id into
-    # the trace record in sqlite store.
-    attributes = dict.fromkeys(ROOT_SPAN_MARKERS, True) | {LOCAL_ROOT_SPAN_MARKER: True} | (attributes or {})
-    context.push_span(name, attributes)
-
-    CURRENT_TRACE_CONTEXT.set(context)
-    return context
-
-
-async def end_trace(status: SpanStatus = SpanStatus.OK):
-    global CURRENT_TRACE_CONTEXT
-
-    context = CURRENT_TRACE_CONTEXT.get()
-    if context is None:
-        logger.debug("No trace context to end")
-        return
-
-    context.pop_span(status)
-    CURRENT_TRACE_CONTEXT.set(None)
-
-
-def severity(levelname: str) -> LogSeverity:
-    if levelname == "DEBUG":
-        return LogSeverity.DEBUG
-    elif levelname == "INFO":
-        return LogSeverity.INFO
-    elif levelname == "WARNING":
-        return LogSeverity.WARN
-    elif levelname == "ERROR":
-        return LogSeverity.ERROR
-    elif levelname == "CRITICAL":
-        return LogSeverity.CRITICAL
-    else:
-        raise ValueError(f"Unknown log level: {levelname}")
-
-
-# TODO: ideally, the actual emitting should be done inside a separate daemon
-# process completely isolated from the server
-class TelemetryHandler(logging.Handler):
-    def emit(self, record: logging.LogRecord) -> None:
-        # horrendous hack to avoid logging from asyncio and getting into an infinite loop
-        if record.module in ("asyncio", "selector_events"):
-            return
-
-        global CURRENT_TRACE_CONTEXT
-        context = CURRENT_TRACE_CONTEXT.get()
-        if context is None:
-            return
-
-        span = context.get_current_span()
-        if span is None:
-            return
-
-        enqueue_event(
-            UnstructuredLogEvent(
-                trace_id=span.trace_id,
-                span_id=span.span_id,
-                timestamp=datetime.now(UTC),
-                message=self.format(record),
-                severity=severity(record.levelname),
-            )
-        )
-
-    def close(self) -> None:
-        pass
-
-
-class SpanContextManager:
-    def __init__(self, name: str, attributes: dict[str, Any] | None = None):
-        self.name = name
-        self.attributes = attributes
-        self.span: Span | None = None
-
-    def __enter__(self) -> Self:
-        global CURRENT_TRACE_CONTEXT
-        context = CURRENT_TRACE_CONTEXT.get()
-        if not context:
-            logger.debug("No trace context to push span")
-            return self
-
-        self.span = context.push_span(self.name, self.attributes)
-        return self
-
-    def __exit__(self, exc_type, exc_value, traceback) -> None:
-        global CURRENT_TRACE_CONTEXT
-        context = CURRENT_TRACE_CONTEXT.get()
-        if not context:
-            logger.debug("No trace context to pop span")
-            return
-
-        context.pop_span()
-
-    def set_attribute(self, key: str, value: Any) -> None:
-        if self.span:
-            if self.span.attributes is None:
-                self.span.attributes = {}
-            self.span.attributes[key] = serialize_value(value)
-
-    async def __aenter__(self) -> Self:
-        global CURRENT_TRACE_CONTEXT
-        context = CURRENT_TRACE_CONTEXT.get()
-        if not context:
-            logger.debug("No trace context to push span")
-            return self
-
-        self.span = context.push_span(self.name, self.attributes)
-        return self
-
-    async def __aexit__(self, exc_type, exc_value, traceback) -> None:
-        global CURRENT_TRACE_CONTEXT
-        context = CURRENT_TRACE_CONTEXT.get()
-        if not context:
-            logger.debug("No trace context to pop span")
-            return
-
-        context.pop_span()
-
-    def __call__(self, func: Callable[..., Any]) -> Callable[..., Any]:
-        @wraps(func)
-        def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
-            with self:
-                return func(*args, **kwargs)
-
-        @wraps(func)
-        async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
-            async with self:
-                return await func(*args, **kwargs)
-
-        @wraps(func)
-        def wrapper(*args: Any, **kwargs: Any) -> Any:
-            if asyncio.iscoroutinefunction(func):
-                return async_wrapper(*args, **kwargs)
-            else:
-                return sync_wrapper(*args, **kwargs)
-
-        return wrapper
-
-
-def span(name: str, attributes: dict[str, Any] | None = None) -> SpanContextManager:
-    return SpanContextManager(name, attributes)
-
-
-def get_current_span() -> Span | None:
-    global CURRENT_TRACE_CONTEXT
-    if CURRENT_TRACE_CONTEXT is None:
-        logger.debug("No trace context to get current span")
-        return None
-
-    context = CURRENT_TRACE_CONTEXT.get()
-    if context:
-        return context.get_current_span()
-    return None
diff --git a/src/llama_stack/core/utils/context.py b/src/llama_stack/core/utils/context.py
index e7c61a8ed..0c3e41f00 100644
--- a/src/llama_stack/core/utils/context.py
+++ b/src/llama_stack/core/utils/context.py
@@ -7,8 +7,6 @@
 from collections.abc import AsyncGenerator
 from contextvars import ContextVar
 
-from llama_stack.core.telemetry.tracing import CURRENT_TRACE_CONTEXT
-
 _MISSING = object()
 
 
@@ -69,16 +67,12 @@ def preserve_contexts_async_generator[T](
             try:
                 yield item
                 # Update our tracked values with any changes made during this iteration
-                # Only for non-trace context vars - trace context must persist across yields
-                # to allow nested span tracking for telemetry
+                # This allows context changes to persist across generator iterations
                 for context_var in context_vars:
-                    if context_var is not CURRENT_TRACE_CONTEXT:
-                        initial_context_values[context_var.name] = context_var.get()
+                    initial_context_values[context_var.name] = context_var.get()
             finally:
-                # Restore non-trace context vars after each yield to prevent leaks between requests
-                # CURRENT_TRACE_CONTEXT is NOT restored here to preserve telemetry span stack
+                # Restore context vars after each yield to prevent leaks between requests
                 for context_var in context_vars:
-                    if context_var is not CURRENT_TRACE_CONTEXT:
-                        _restore_context_var(context_var)
+                    _restore_context_var(context_var)
 
     return wrapper()
diff --git a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml
index 7721138c7..8414dcae5 100644
--- a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml
+++ b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml
@@ -281,8 +281,6 @@ registered_resources:
     provider_id: rag-runtime
 server:
   port: 8321
-telemetry:
-  enabled: true
 vector_stores:
   default_provider_id: faiss
   default_embedding_model:
diff --git a/src/llama_stack/distributions/ci-tests/run.yaml b/src/llama_stack/distributions/ci-tests/run.yaml
index b791e1488..e83fc7fb5 100644
--- a/src/llama_stack/distributions/ci-tests/run.yaml
+++ b/src/llama_stack/distributions/ci-tests/run.yaml
@@ -272,8 +272,6 @@ registered_resources:
     provider_id: rag-runtime
 server:
   port: 8321
-telemetry:
-  enabled: true
 vector_stores:
   default_provider_id: faiss
   default_embedding_model:
diff --git a/src/llama_stack/distributions/dell/run-with-safety.yaml b/src/llama_stack/distributions/dell/run-with-safety.yaml
index e0da8060d..63bd95168 100644
--- a/src/llama_stack/distributions/dell/run-with-safety.yaml
+++ b/src/llama_stack/distributions/dell/run-with-safety.yaml
@@ -140,5 +140,3 @@ registered_resources:
     provider_id: rag-runtime
 server:
   port: 8321
-telemetry:
-  enabled: true
diff --git a/src/llama_stack/distributions/dell/run.yaml b/src/llama_stack/distributions/dell/run.yaml
index bc3117d88..93f0c35bc 100644
--- a/src/llama_stack/distributions/dell/run.yaml
+++ b/src/llama_stack/distributions/dell/run.yaml
@@ -131,5 +131,3 @@ registered_resources:
     provider_id: rag-runtime
 server:
   port: 8321
-telemetry:
-  enabled: true
diff --git a/src/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml b/src/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml
index 2fa9d198b..63fc3b1d2 100644
--- a/src/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml
+++ b/src/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml
@@ -153,5 +153,3 @@ registered_resources:
     provider_id: rag-runtime
 server:
   port: 8321
-telemetry:
-  enabled: true
diff --git a/src/llama_stack/distributions/meta-reference-gpu/run.yaml b/src/llama_stack/distributions/meta-reference-gpu/run.yaml
index 5c7f75ca8..ba8235398 100644
--- a/src/llama_stack/distributions/meta-reference-gpu/run.yaml
+++ b/src/llama_stack/distributions/meta-reference-gpu/run.yaml
@@ -138,5 +138,3 @@ registered_resources:
     provider_id: rag-runtime
 server:
   port: 8321
-telemetry:
-  enabled: true
diff --git a/src/llama_stack/distributions/nvidia/run-with-safety.yaml b/src/llama_stack/distributions/nvidia/run-with-safety.yaml
index d2c7dd090..7d95565e5 100644
--- a/src/llama_stack/distributions/nvidia/run-with-safety.yaml
+++ b/src/llama_stack/distributions/nvidia/run-with-safety.yaml
@@ -135,5 +135,3 @@ registered_resources:
     provider_id: rag-runtime
 server:
   port: 8321
-telemetry:
-  enabled: true
diff --git a/src/llama_stack/distributions/nvidia/run.yaml b/src/llama_stack/distributions/nvidia/run.yaml
index c267587c7..8c80b8303 100644
--- a/src/llama_stack/distributions/nvidia/run.yaml
+++ b/src/llama_stack/distributions/nvidia/run.yaml
@@ -114,5 +114,3 @@ registered_resources:
     provider_id: rag-runtime
 server:
   port: 8321
-telemetry:
-  enabled: true
diff --git a/src/llama_stack/distributions/oci/run.yaml b/src/llama_stack/distributions/oci/run.yaml
index e385ec606..ff0c818be 100644
--- a/src/llama_stack/distributions/oci/run.yaml
+++ b/src/llama_stack/distributions/oci/run.yaml
@@ -132,5 +132,3 @@ registered_resources:
     provider_id: tavily-search
 server:
   port: 8321
-telemetry:
-  enabled: true
diff --git a/src/llama_stack/distributions/open-benchmark/run.yaml b/src/llama_stack/distributions/open-benchmark/run.yaml
index 7ebc58841..43aa45b51 100644
--- a/src/llama_stack/distributions/open-benchmark/run.yaml
+++ b/src/llama_stack/distributions/open-benchmark/run.yaml
@@ -251,5 +251,3 @@ registered_resources:
     provider_id: rag-runtime
 server:
   port: 8321
-telemetry:
-  enabled: true
diff --git a/src/llama_stack/distributions/postgres-demo/run.yaml b/src/llama_stack/distributions/postgres-demo/run.yaml
index 049f519cd..c9316f923 100644
--- a/src/llama_stack/distributions/postgres-demo/run.yaml
+++ b/src/llama_stack/distributions/postgres-demo/run.yaml
@@ -114,5 +114,3 @@ registered_resources:
     provider_id: rag-runtime
 server:
   port: 8321
-telemetry:
-  enabled: true
diff --git a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml
index 9c250c05a..0662986f1 100644
--- a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml
+++ b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml
@@ -284,8 +284,6 @@ registered_resources:
     provider_id: rag-runtime
 server:
   port: 8321
-telemetry:
-  enabled: true
 vector_stores:
   default_provider_id: faiss
   default_embedding_model:
diff --git a/src/llama_stack/distributions/starter-gpu/run.yaml b/src/llama_stack/distributions/starter-gpu/run.yaml
index 65f9ae326..9ef5b3f6d 100644
--- a/src/llama_stack/distributions/starter-gpu/run.yaml
+++ b/src/llama_stack/distributions/starter-gpu/run.yaml
@@ -275,8 +275,6 @@ registered_resources:
     provider_id: rag-runtime
 server:
   port: 8321
-telemetry:
-  enabled: true
 vector_stores:
   default_provider_id: faiss
   default_embedding_model:
diff --git a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
index 3314bb9e9..1da4f0da7 100644
--- a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
+++ b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
@@ -281,8 +281,6 @@ registered_resources:
     provider_id: rag-runtime
 server:
   port: 8321
-telemetry:
-  enabled: true
 vector_stores:
   default_provider_id: faiss
   default_embedding_model:
diff --git a/src/llama_stack/distributions/starter/run.yaml b/src/llama_stack/distributions/starter/run.yaml
index e88539e6a..3e6cde13a 100644
--- a/src/llama_stack/distributions/starter/run.yaml
+++ b/src/llama_stack/distributions/starter/run.yaml
@@ -272,8 +272,6 @@ registered_resources:
     provider_id: rag-runtime
 server:
   port: 8321
-telemetry:
-  enabled: true
 vector_stores:
   default_provider_id: faiss
   default_embedding_model:
diff --git a/src/llama_stack/distributions/template.py b/src/llama_stack/distributions/template.py
index 90b458805..bab3211e9 100644
--- a/src/llama_stack/distributions/template.py
+++ b/src/llama_stack/distributions/template.py
@@ -24,7 +24,6 @@ from llama_stack.core.datatypes import (
     Provider,
     SafetyConfig,
     ShieldInput,
-    TelemetryConfig,
     ToolGroupInput,
     VectorStoresConfig,
 )
@@ -189,7 +188,6 @@ class RunConfigSettings(BaseModel):
     default_benchmarks: list[BenchmarkInput] | None = None
     vector_stores_config: VectorStoresConfig | None = None
     safety_config: SafetyConfig | None = None
-    telemetry: TelemetryConfig = Field(default_factory=lambda: TelemetryConfig(enabled=True))
     storage_backends: dict[str, Any] | None = None
     storage_stores: dict[str, Any] | None = None
 
@@ -289,7 +287,6 @@ class RunConfigSettings(BaseModel):
             "server": {
                 "port": 8321,
             },
-            "telemetry": self.telemetry.model_dump(exclude_none=True) if self.telemetry else None,
         }
 
         if self.vector_stores_config:
diff --git a/src/llama_stack/distributions/watsonx/run.yaml b/src/llama_stack/distributions/watsonx/run.yaml
index f8c489fe3..55ea34cb6 100644
--- a/src/llama_stack/distributions/watsonx/run.yaml
+++ b/src/llama_stack/distributions/watsonx/run.yaml
@@ -132,5 +132,3 @@ registered_resources:
     provider_id: rag-runtime
 server:
   port: 8321
-telemetry:
-  enabled: true
diff --git a/src/llama_stack/log.py b/src/llama_stack/log.py
index c11c2c06f..a44a0ac26 100644
--- a/src/llama_stack/log.py
+++ b/src/llama_stack/log.py
@@ -37,7 +37,6 @@ CATEGORIES = [
     "eval",
     "tools",
     "client",
-    "telemetry",
     "openai",
     "openai_responses",
     "openai_conversations",
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/__init__.py b/src/llama_stack/providers/inline/agents/meta_reference/__init__.py
index 9683baf00..c9c7d348a 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/__init__.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/__init__.py
@@ -15,7 +15,6 @@ async def get_provider_impl(
     config: MetaReferenceAgentsImplConfig,
     deps: dict[Api, Any],
     policy: list[AccessRule],
-    telemetry_enabled: bool = False,
 ):
     from .agents import MetaReferenceAgentsImpl
 
@@ -29,7 +28,6 @@ async def get_provider_impl(
         deps[Api.conversations],
         deps[Api.prompts],
         deps[Api.files],
-        telemetry_enabled,
         policy,
     )
     await impl.initialize()
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/agents.py b/src/llama_stack/providers/inline/agents/meta_reference/agents.py
index ca419a51a..1ceb34f8f 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/agents.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/agents.py
@@ -50,7 +50,6 @@ class MetaReferenceAgentsImpl(Agents):
         prompts_api: Prompts,
         files_api: Files,
         policy: list[AccessRule],
-        telemetry_enabled: bool = False,
     ):
         self.config = config
         self.inference_api = inference_api
@@ -59,7 +58,6 @@ class MetaReferenceAgentsImpl(Agents):
         self.tool_runtime_api = tool_runtime_api
         self.tool_groups_api = tool_groups_api
         self.conversations_api = conversations_api
-        self.telemetry_enabled = telemetry_enabled
         self.prompts_api = prompts_api
         self.files_api = files_api
         self.in_memory_store = InmemoryKVStoreImpl()
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
index 9e901d88b..3515e0578 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
@@ -8,7 +8,8 @@ import uuid
 from collections.abc import AsyncIterator
 from typing import Any
 
-from llama_stack.core.telemetry import tracing
+from opentelemetry import trace
+
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
 from llama_stack_api import (
@@ -79,6 +80,7 @@ from .utils import (
 )
 
 logger = get_logger(name=__name__, category="agents::meta_reference")
+tracer = trace.get_tracer(__name__)
 
 
 def convert_tooldef_to_chat_tool(tool_def):
@@ -1106,8 +1108,10 @@ class StreamingResponseOrchestrator:
                 "server_url": mcp_tool.server_url,
                 "mcp_list_tools_id": list_id,
             }
-            # List MCP tools with authorization from tool config
-            async with tracing.span("list_mcp_tools", attributes):
+
+            # TODO: follow semantic conventions for Open Telemetry tool spans
+            # https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/#execute-tool-span
+            with tracer.start_as_current_span("list_mcp_tools", attributes=attributes):
                 tool_defs = await list_mcp_tools(
                     endpoint=mcp_tool.server_url,
                     headers=mcp_tool.headers,
@@ -1183,9 +1187,9 @@ class StreamingResponseOrchestrator:
         if mcp_server.require_approval == "never":
             return False
         if isinstance(mcp_server, ApprovalFilter):
-            if tool_name in mcp_server.always:
+            if mcp_server.always and tool_name in mcp_server.always:
                 return True
-            if tool_name in mcp_server.never:
+            if mcp_server.never and tool_name in mcp_server.never:
                 return False
         return True
 
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
index 4f294a979..d27a0f8ad 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
@@ -9,7 +9,8 @@ import json
 from collections.abc import AsyncIterator
 from typing import Any
 
-from llama_stack.core.telemetry import tracing
+from opentelemetry import trace
+
 from llama_stack.log import get_logger
 from llama_stack_api import (
     ImageContentItem,
@@ -42,6 +43,7 @@ from llama_stack_api import (
 from .types import ChatCompletionContext, ToolExecutionResult
 
 logger = get_logger(name=__name__, category="agents::meta_reference")
+tracer = trace.get_tracer(__name__)
 
 
 class ToolExecutor:
@@ -296,8 +298,9 @@ class ToolExecutor:
                     "server_url": mcp_tool.server_url,
                     "tool_name": function_name,
                 }
-                # Invoke MCP tool with authorization from tool config
-                async with tracing.span("invoke_mcp_tool", attributes):
+                # TODO: follow semantic conventions for Open Telemetry tool spans
+                # https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/#execute-tool-span
+                with tracer.start_as_current_span("invoke_mcp_tool", attributes=attributes):
                     result = await invoke_mcp_tool(
                         endpoint=mcp_tool.server_url,
                         tool_name=function_name,
@@ -318,7 +321,7 @@ class ToolExecutor:
                     # Use vector_stores.search API instead of knowledge_search tool
                     # to support filters and ranking_options
                     query = tool_kwargs.get("query", "")
-                    async with tracing.span("knowledge_search", {}):
+                    with tracer.start_as_current_span("knowledge_search"):
                         result = await self._execute_knowledge_search_via_vector_store(
                             query=query,
                             response_file_search_tool=response_file_search_tool,
@@ -327,7 +330,9 @@ class ToolExecutor:
                 attributes = {
                     "tool_name": function_name,
                 }
-                async with tracing.span("invoke_tool", attributes):
+                # TODO: follow semantic conventions for Open Telemetry tool spans
+                # https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/#execute-tool-span
+                with tracer.start_as_current_span("invoke_tool", attributes=attributes):
                     result = await self.tool_runtime_api.invoke_tool(
                         tool_name=function_name,
                         kwargs=tool_kwargs,
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/safety.py b/src/llama_stack/providers/inline/agents/meta_reference/safety.py
index bfb557a99..123a2e283 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/safety.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/safety.py
@@ -6,7 +6,6 @@
 
 import asyncio
 
-from llama_stack.core.telemetry import tracing
 from llama_stack.log import get_logger
 from llama_stack_api import OpenAIMessageParam, Safety, SafetyViolation, ViolationLevel
 
@@ -31,15 +30,12 @@ class ShieldRunnerMixin:
         self.output_shields = output_shields
 
     async def run_multiple_shields(self, messages: list[OpenAIMessageParam], identifiers: list[str]) -> None:
-        async def run_shield_with_span(identifier: str):
-            async with tracing.span(f"run_shield_{identifier}"):
-                return await self.safety_api.run_shield(
-                    shield_id=identifier,
-                    messages=messages,
-                    params={},
-                )
-
-        responses = await asyncio.gather(*[run_shield_with_span(identifier) for identifier in identifiers])
+        responses = await asyncio.gather(
+            *[
+                self.safety_api.run_shield(shield_id=identifier, messages=messages, params={})
+                for identifier in identifiers
+            ]
+        )
         for identifier, response in zip(identifiers, responses, strict=False):
             if not response.violation:
                 continue
diff --git a/src/llama_stack/providers/remote/inference/bedrock/bedrock.py b/src/llama_stack/providers/remote/inference/bedrock/bedrock.py
index 451549db8..a890a568e 100644
--- a/src/llama_stack/providers/remote/inference/bedrock/bedrock.py
+++ b/src/llama_stack/providers/remote/inference/bedrock/bedrock.py
@@ -8,7 +8,6 @@ from collections.abc import AsyncIterator, Iterable
 
 from openai import AuthenticationError
 
-from llama_stack.core.telemetry.tracing import get_current_span
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 from llama_stack_api import (
@@ -84,7 +83,7 @@ class BedrockInferenceAdapter(OpenAIMixin):
     ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
         """Override to enable streaming usage metrics and handle authentication errors."""
         # Enable streaming usage metrics when telemetry is active
-        if params.stream and get_current_span() is not None:
+        if params.stream:
             if params.stream_options is None:
                 params.stream_options = {"include_usage": True}
             elif "include_usage" not in params.stream_options:
diff --git a/src/llama_stack/providers/remote/inference/watsonx/watsonx.py b/src/llama_stack/providers/remote/inference/watsonx/watsonx.py
index 5684f6c17..2fcda370a 100644
--- a/src/llama_stack/providers/remote/inference/watsonx/watsonx.py
+++ b/src/llama_stack/providers/remote/inference/watsonx/watsonx.py
@@ -10,7 +10,6 @@ from typing import Any
 import litellm
 import requests
 
-from llama_stack.core.telemetry.tracing import get_current_span
 from llama_stack.log import get_logger
 from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig
 from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
@@ -59,7 +58,7 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
 
         # Add usage tracking for streaming when telemetry is active
         stream_options = params.stream_options
-        if params.stream and get_current_span() is not None:
+        if params.stream:
             if stream_options is None:
                 stream_options = {"include_usage": True}
             elif "include_usage" not in stream_options:
diff --git a/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py b/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py
index c462d1aad..47c68ff0a 100644
--- a/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py
+++ b/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py
@@ -217,10 +217,9 @@ class LiteLLMOpenAIMixin(
         params: OpenAIChatCompletionRequestWithExtraBody,
     ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
         # Add usage tracking for streaming when telemetry is active
-        from llama_stack.core.telemetry.tracing import get_current_span
 
         stream_options = params.stream_options
-        if params.stream and get_current_span() is not None:
+        if params.stream:
             if stream_options is None:
                 stream_options = {"include_usage": True}
             elif "include_usage" not in stream_options:
diff --git a/src/llama_stack/providers/utils/tools/mcp.py b/src/llama_stack/providers/utils/tools/mcp.py
index 9c5e9cd96..05cdfa73b 100644
--- a/src/llama_stack/providers/utils/tools/mcp.py
+++ b/src/llama_stack/providers/utils/tools/mcp.py
@@ -89,6 +89,7 @@ async def client_wrapper(endpoint: str, headers: dict[str, str]) -> AsyncGenerat
                 # sse_client and streamablehttp_client have different signatures, but both
                 # are called the same way here, so we cast to Any to avoid type errors
                 client = cast(Any, sse_client)
+
             async with client(endpoint, headers=headers) as client_streams:
                 async with ClientSession(read_stream=client_streams[0], write_stream=client_streams[1]) as session:
                     await session.initialize()
diff --git a/src/llama_stack/telemetry/__init__.py b/src/llama_stack/telemetry/__init__.py
new file mode 100644
index 000000000..756f351d8
--- /dev/null
+++ b/src/llama_stack/telemetry/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
diff --git a/src/llama_stack/telemetry/constants.py b/src/llama_stack/telemetry/constants.py
new file mode 100644
index 000000000..1d3db0742
--- /dev/null
+++ b/src/llama_stack/telemetry/constants.py
@@ -0,0 +1,27 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""
+This file contains constants used for naming data captured for telemetry.
+
+This is used to ensure that the data captured for telemetry is consistent and can be used to
+identify and correlate data. If custom telemetry data is added to llama stack, please add
+constants for it here.
+"""
+
+llama_stack_prefix = "llama_stack"
+
+# Safety Attributes
+RUN_SHIELD_OPERATION_NAME = "run_shield"
+
+SAFETY_REQUEST_PREFIX = f"{llama_stack_prefix}.safety.request"
+SAFETY_REQUEST_SHIELD_ID_ATTRIBUTE = f"{SAFETY_REQUEST_PREFIX}.shield_id"
+SAFETY_REQUEST_MESSAGES_ATTRIBUTE = f"{SAFETY_REQUEST_PREFIX}.messages"
+
+SAFETY_RESPONSE_PREFIX = f"{llama_stack_prefix}.safety.response"
+SAFETY_RESPONSE_METADATA_ATTRIBUTE = f"{SAFETY_RESPONSE_PREFIX}.metadata"
+SAFETY_RESPONSE_VIOLATION_LEVEL_ATTRIBUTE = f"{SAFETY_RESPONSE_PREFIX}.violation.level"
+SAFETY_RESPONSE_USER_MESSAGE_ATTRIBUTE = f"{SAFETY_RESPONSE_PREFIX}.violation.user_message"
diff --git a/src/llama_stack/telemetry/helpers.py b/src/llama_stack/telemetry/helpers.py
new file mode 100644
index 000000000..2ae13c9c5
--- /dev/null
+++ b/src/llama_stack/telemetry/helpers.py
@@ -0,0 +1,43 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import json
+
+from opentelemetry import trace
+
+from llama_stack_api import OpenAIMessageParam, RunShieldResponse
+
+from .constants import (
+    RUN_SHIELD_OPERATION_NAME,
+    SAFETY_REQUEST_MESSAGES_ATTRIBUTE,
+    SAFETY_REQUEST_SHIELD_ID_ATTRIBUTE,
+    SAFETY_RESPONSE_METADATA_ATTRIBUTE,
+    SAFETY_RESPONSE_USER_MESSAGE_ATTRIBUTE,
+    SAFETY_RESPONSE_VIOLATION_LEVEL_ATTRIBUTE,
+)
+
+
+def safety_span_name(shield_id: str) -> str:
+    return f"{RUN_SHIELD_OPERATION_NAME} {shield_id}"
+
+
+# TODO: Consider using Wrapt to automatically instrument code
+# This is the industry standard way to package automatically instrumentation in python.
+def safety_request_span_attributes(
+    shield_id: str, messages: list[OpenAIMessageParam], response: RunShieldResponse
+) -> None:
+    span = trace.get_current_span()
+    span.set_attribute(SAFETY_REQUEST_SHIELD_ID_ATTRIBUTE, shield_id)
+    messages_json = json.dumps([msg.model_dump() for msg in messages])
+    span.set_attribute(SAFETY_REQUEST_MESSAGES_ATTRIBUTE, messages_json)
+
+    if response.violation:
+        if response.violation.metadata:
+            metadata_json = json.dumps(response.violation.metadata)
+            span.set_attribute(SAFETY_RESPONSE_METADATA_ATTRIBUTE, metadata_json)
+        if response.violation.user_message:
+            span.set_attribute(SAFETY_RESPONSE_USER_MESSAGE_ATTRIBUTE, response.violation.user_message)
+        span.set_attribute(SAFETY_RESPONSE_VIOLATION_LEVEL_ATTRIBUTE, response.violation.violation_level.value)
diff --git a/tests/integration/inference/test_provider_data_routing.py b/tests/integration/inference/test_provider_data_routing.py
index e4a0a24b5..cf79c9f8f 100644
--- a/tests/integration/inference/test_provider_data_routing.py
+++ b/tests/integration/inference/test_provider_data_routing.py
@@ -17,7 +17,6 @@ from unittest.mock import AsyncMock, patch
 import pytest
 
 from llama_stack.core.library_client import LlamaStackAsLibraryClient
-from llama_stack.core.telemetry.telemetry import MetricEvent
 from llama_stack_api import (
     Api,
     OpenAIAssistantMessageParam,
@@ -27,10 +26,6 @@ from llama_stack_api import (
 )
 
 
-class OpenAIChatCompletionWithMetrics(OpenAIChatCompletion):
-    metrics: list[MetricEvent] | None = None
-
-
 def test_unregistered_model_routing_with_provider_data(client_with_models):
     """
     Test that a model can be routed using provider_id/model_id format
@@ -72,7 +67,7 @@ def test_unregistered_model_routing_with_provider_data(client_with_models):
     # The inference router's routing_table.impls_by_provider_id should have anthropic
     # Let's patch the anthropic provider's openai_chat_completion method
     # to avoid making real API calls
-    mock_response = OpenAIChatCompletionWithMetrics(
+    mock_response = OpenAIChatCompletion(
         id="chatcmpl-test-123",
         created=1234567890,
         model="claude-3-5-sonnet-20241022",
diff --git a/tests/integration/telemetry/collectors/in_memory.py b/tests/integration/telemetry/collectors/in_memory.py
index 7127b3816..9ff8de6f5 100644
--- a/tests/integration/telemetry/collectors/in_memory.py
+++ b/tests/integration/telemetry/collectors/in_memory.py
@@ -15,11 +15,10 @@ from opentelemetry.sdk.trace import TracerProvider
 from opentelemetry.sdk.trace.export import SimpleSpanProcessor
 from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
 
-import llama_stack.core.telemetry.telemetry as telemetry_module
-
 from .base import BaseTelemetryCollector, MetricStub, SpanStub
 
 
+# TODO: Fix thi to work with Automatic Instrumentation
 class InMemoryTelemetryCollector(BaseTelemetryCollector):
     """In-memory telemetry collector for library-client tests.
 
@@ -75,13 +74,10 @@ class InMemoryTelemetryManager:
         meter_provider = MeterProvider(metric_readers=[metric_reader])
         metrics.set_meter_provider(meter_provider)
 
-        telemetry_module._TRACER_PROVIDER = tracer_provider
-
         self.collector = InMemoryTelemetryCollector(span_exporter, metric_reader)
         self._tracer_provider = tracer_provider
         self._meter_provider = meter_provider
 
     def shutdown(self) -> None:
-        telemetry_module._TRACER_PROVIDER = None
         self._tracer_provider.shutdown()
         self._meter_provider.shutdown()
diff --git a/tests/integration/telemetry/conftest.py b/tests/integration/telemetry/conftest.py
index fd9224ae4..9448e40a0 100644
--- a/tests/integration/telemetry/conftest.py
+++ b/tests/integration/telemetry/conftest.py
@@ -15,6 +15,7 @@ from tests.integration.fixtures.common import instantiate_llama_stack_client
 from tests.integration.telemetry.collectors import InMemoryTelemetryManager, OtlpHttpTestCollector
 
 
+# TODO: Fix this to work with Automatic Instrumentation
 @pytest.fixture(scope="session")
 def telemetry_test_collector():
     stack_mode = os.environ.get("LLAMA_STACK_TEST_STACK_CONFIG_TYPE", "library_client")
@@ -48,6 +49,7 @@ def telemetry_test_collector():
             manager.shutdown()
 
 
+# TODO: Fix this to work with Automatic Instrumentation
 @pytest.fixture(scope="session")
 def llama_stack_client(telemetry_test_collector, request):
     """Ensure telemetry collector is ready before initializing the stack client."""
diff --git a/tests/unit/cli/test_stack_config.py b/tests/unit/cli/test_stack_config.py
index 6aefac003..41570194c 100644
--- a/tests/unit/cli/test_stack_config.py
+++ b/tests/unit/cli/test_stack_config.py
@@ -155,9 +155,6 @@ def old_config():
               provider_type: inline::meta-reference
               config: {{}}
         api_providers:
-          telemetry:
-            provider_type: noop
-            config: {{}}
     """
     )
 
@@ -181,7 +178,7 @@ def test_parse_and_maybe_upgrade_config_up_to_date(up_to_date_config):
 def test_parse_and_maybe_upgrade_config_old_format(old_config):
     result = parse_and_maybe_upgrade_config(old_config)
     assert result.version == LLAMA_STACK_RUN_CONFIG_VERSION
-    assert all(api in result.providers for api in ["inference", "safety", "memory", "telemetry"])
+    assert all(api in result.providers for api in ["inference", "safety", "memory"])
     safety_provider = result.providers["safety"][0]
     assert safety_provider.provider_type == "inline::meta-reference"
     assert "llama_guard_shield" in safety_provider.config
diff --git a/tests/unit/providers/agents/meta_reference/test_safety_optional.py b/tests/unit/providers/agents/meta_reference/test_safety_optional.py
index c2311b68f..10b15b26d 100644
--- a/tests/unit/providers/agents/meta_reference/test_safety_optional.py
+++ b/tests/unit/providers/agents/meta_reference/test_safety_optional.py
@@ -83,7 +83,7 @@ class TestProviderInitialization:
             new_callable=AsyncMock,
         ):
             # Should not raise any exception
-            provider = await get_provider_impl(config, mock_deps, policy=[], telemetry_enabled=False)
+            provider = await get_provider_impl(config, mock_deps, policy=[])
             assert provider is not None
 
     async def test_initialization_without_safety_api(self, mock_persistence_config, mock_deps):
@@ -97,7 +97,7 @@ class TestProviderInitialization:
             new_callable=AsyncMock,
         ):
             # Should not raise any exception
-            provider = await get_provider_impl(config, mock_deps, policy=[], telemetry_enabled=False)
+            provider = await get_provider_impl(config, mock_deps, policy=[])
             assert provider is not None
             assert provider.safety_api is None
 

From 28ff6d8659e774005f75ee47cac605c3640e493a Mon Sep 17 00:00:00 2001
From: Emilio Garcia <iamemilio@users.noreply.github.com>
Date: Mon, 1 Dec 2025 13:40:57 -0500
Subject: [PATCH 15/17] fix: remove telemetry_traceable (#4205)

# What does this PR do?
Removes stale data from llama stack about old telemetry system


**Depends on** https://github.com/llamastack/llama-stack/pull/4127

Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
---
 src/llama_stack_api/common/tracing.py | 22 ----------------------
 src/llama_stack_api/conversations.py  |  2 --
 src/llama_stack_api/files.py          |  2 --
 src/llama_stack_api/inference.py      |  2 --
 src/llama_stack_api/models.py         |  2 --
 src/llama_stack_api/prompts.py        |  2 --
 src/llama_stack_api/safety.py         |  2 --
 src/llama_stack_api/shields.py        |  2 --
 src/llama_stack_api/tools.py          |  3 ---
 src/llama_stack_api/vector_io.py      |  2 --
 10 files changed, 41 deletions(-)
 delete mode 100644 src/llama_stack_api/common/tracing.py

diff --git a/src/llama_stack_api/common/tracing.py b/src/llama_stack_api/common/tracing.py
deleted file mode 100644
index 830c2945a..000000000
--- a/src/llama_stack_api/common/tracing.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-
-def telemetry_traceable(cls):
-    """
-    Mark a protocol for automatic tracing when telemetry is enabled.
-
-    This is a metadata-only decorator with no dependencies on core.
-    Actual tracing is applied by core routers at runtime if telemetry is enabled.
-
-    Usage:
-        @runtime_checkable
-        @telemetry_traceable
-        class MyProtocol(Protocol):
-            ...
-    """
-    cls.__marked_for_tracing__ = True
-    return cls
diff --git a/src/llama_stack_api/conversations.py b/src/llama_stack_api/conversations.py
index 4854181d1..81b5ab2c4 100644
--- a/src/llama_stack_api/conversations.py
+++ b/src/llama_stack_api/conversations.py
@@ -9,7 +9,6 @@ from typing import Annotated, Literal, Protocol, runtime_checkable
 
 from pydantic import BaseModel, Field
 
-from llama_stack_api.common.tracing import telemetry_traceable
 from llama_stack_api.openai_responses import (
     OpenAIResponseInputFunctionToolCallOutput,
     OpenAIResponseMCPApprovalRequest,
@@ -157,7 +156,6 @@ class ConversationItemDeletedResource(BaseModel):
 
 
 @runtime_checkable
-@telemetry_traceable
 class Conversations(Protocol):
     """Conversations
 
diff --git a/src/llama_stack_api/files.py b/src/llama_stack_api/files.py
index 8a75a1c39..e515fe0ae 100644
--- a/src/llama_stack_api/files.py
+++ b/src/llama_stack_api/files.py
@@ -11,7 +11,6 @@ from fastapi import File, Form, Response, UploadFile
 from pydantic import BaseModel, Field
 
 from llama_stack_api.common.responses import Order
-from llama_stack_api.common.tracing import telemetry_traceable
 from llama_stack_api.schema_utils import json_schema_type, webmethod
 from llama_stack_api.version import LLAMA_STACK_API_V1
 
@@ -102,7 +101,6 @@ class OpenAIFileDeleteResponse(BaseModel):
 
 
 @runtime_checkable
-@telemetry_traceable
 class Files(Protocol):
     """Files
 
diff --git a/src/llama_stack_api/inference.py b/src/llama_stack_api/inference.py
index b42de95be..4a169486a 100644
--- a/src/llama_stack_api/inference.py
+++ b/src/llama_stack_api/inference.py
@@ -22,7 +22,6 @@ from llama_stack_api.common.content_types import InterleavedContent
 from llama_stack_api.common.responses import (
     Order,
 )
-from llama_stack_api.common.tracing import telemetry_traceable
 from llama_stack_api.models import Model
 from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
 from llama_stack_api.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
@@ -989,7 +988,6 @@ class OpenAIEmbeddingsRequestWithExtraBody(BaseModel, extra="allow"):
 
 
 @runtime_checkable
-@telemetry_traceable
 class InferenceProvider(Protocol):
     """
     This protocol defines the interface that should be implemented by all inference providers.
diff --git a/src/llama_stack_api/models.py b/src/llama_stack_api/models.py
index 98c16b6c2..3efdfe66b 100644
--- a/src/llama_stack_api/models.py
+++ b/src/llama_stack_api/models.py
@@ -9,7 +9,6 @@ from typing import Any, Literal, Protocol, runtime_checkable
 
 from pydantic import BaseModel, ConfigDict, Field, field_validator
 
-from llama_stack_api.common.tracing import telemetry_traceable
 from llama_stack_api.resource import Resource, ResourceType
 from llama_stack_api.schema_utils import json_schema_type, webmethod
 from llama_stack_api.version import LLAMA_STACK_API_V1
@@ -106,7 +105,6 @@ class OpenAIListModelsResponse(BaseModel):
 
 
 @runtime_checkable
-@telemetry_traceable
 class Models(Protocol):
     async def list_models(self) -> ListModelsResponse:
         """List all models.
diff --git a/src/llama_stack_api/prompts.py b/src/llama_stack_api/prompts.py
index 8562e4704..2054ccd30 100644
--- a/src/llama_stack_api/prompts.py
+++ b/src/llama_stack_api/prompts.py
@@ -10,7 +10,6 @@ from typing import Protocol, runtime_checkable
 
 from pydantic import BaseModel, Field, field_validator, model_validator
 
-from llama_stack_api.common.tracing import telemetry_traceable
 from llama_stack_api.schema_utils import json_schema_type, webmethod
 from llama_stack_api.version import LLAMA_STACK_API_V1
 
@@ -93,7 +92,6 @@ class ListPromptsResponse(BaseModel):
 
 
 @runtime_checkable
-@telemetry_traceable
 class Prompts(Protocol):
     """Prompts
 
diff --git a/src/llama_stack_api/safety.py b/src/llama_stack_api/safety.py
index ef84be2ea..7b4f2af5c 100644
--- a/src/llama_stack_api/safety.py
+++ b/src/llama_stack_api/safety.py
@@ -9,7 +9,6 @@ from typing import Any, Protocol, runtime_checkable
 
 from pydantic import BaseModel, Field
 
-from llama_stack_api.common.tracing import telemetry_traceable
 from llama_stack_api.inference import OpenAIMessageParam
 from llama_stack_api.schema_utils import json_schema_type, webmethod
 from llama_stack_api.shields import Shield
@@ -94,7 +93,6 @@ class ShieldStore(Protocol):
 
 
 @runtime_checkable
-@telemetry_traceable
 class Safety(Protocol):
     """Safety
 
diff --git a/src/llama_stack_api/shields.py b/src/llama_stack_api/shields.py
index 19e412a5a..36ad2351b 100644
--- a/src/llama_stack_api/shields.py
+++ b/src/llama_stack_api/shields.py
@@ -8,7 +8,6 @@ from typing import Any, Literal, Protocol, runtime_checkable
 
 from pydantic import BaseModel
 
-from llama_stack_api.common.tracing import telemetry_traceable
 from llama_stack_api.resource import Resource, ResourceType
 from llama_stack_api.schema_utils import json_schema_type, webmethod
 from llama_stack_api.version import LLAMA_STACK_API_V1
@@ -49,7 +48,6 @@ class ListShieldsResponse(BaseModel):
 
 
 @runtime_checkable
-@telemetry_traceable
 class Shields(Protocol):
     @webmethod(route="/shields", method="GET", level=LLAMA_STACK_API_V1)
     async def list_shields(self) -> ListShieldsResponse:
diff --git a/src/llama_stack_api/tools.py b/src/llama_stack_api/tools.py
index 4dd5d55d2..94f2251b0 100644
--- a/src/llama_stack_api/tools.py
+++ b/src/llama_stack_api/tools.py
@@ -11,7 +11,6 @@ from pydantic import BaseModel
 from typing_extensions import runtime_checkable
 
 from llama_stack_api.common.content_types import URL, InterleavedContent
-from llama_stack_api.common.tracing import telemetry_traceable
 from llama_stack_api.resource import Resource, ResourceType
 from llama_stack_api.schema_utils import json_schema_type, webmethod
 from llama_stack_api.version import LLAMA_STACK_API_V1
@@ -109,7 +108,6 @@ class ListToolDefsResponse(BaseModel):
 
 
 @runtime_checkable
-@telemetry_traceable
 class ToolGroups(Protocol):
     @webmethod(route="/toolgroups", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
     async def register_tool_group(
@@ -191,7 +189,6 @@ class SpecialToolGroup(Enum):
 
 
 @runtime_checkable
-@telemetry_traceable
 class ToolRuntime(Protocol):
     tool_store: ToolStore | None = None
 
diff --git a/src/llama_stack_api/vector_io.py b/src/llama_stack_api/vector_io.py
index 135468d19..188ea3307 100644
--- a/src/llama_stack_api/vector_io.py
+++ b/src/llama_stack_api/vector_io.py
@@ -13,7 +13,6 @@ from typing import Annotated, Any, Literal, Protocol, runtime_checkable
 from fastapi import Body, Query
 from pydantic import BaseModel, Field, field_validator
 
-from llama_stack_api.common.tracing import telemetry_traceable
 from llama_stack_api.inference import InterleavedContent
 from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod
 from llama_stack_api.vector_stores import VectorStore
@@ -572,7 +571,6 @@ class VectorStoreTable(Protocol):
 
 
 @runtime_checkable
-@telemetry_traceable
 class VectorIO(Protocol):
     vector_store_table: VectorStoreTable | None = None
 

From 618c03405c91a03c91240092b35f02c9565dbb18 Mon Sep 17 00:00:00 2001
From: Abhishek Bongale <abhishekbongale@outlook.com>
Date: Mon, 1 Dec 2025 18:48:53 +0000
Subject: [PATCH 16/17] feat: Add metadata field to request and response
 (#4237)

This changes adds Optional metadata field to OpenAI compatible request
and response object.

fixes: #3564

Signed-off-by: Abhishek Bongale <abhishekbongale@outlook.com>
Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
---
 client-sdks/stainless/openapi.yml              | 18 ++++++++++++++++++
 docs/static/deprecated-llama-stack-spec.yaml   | 18 ++++++++++++++++++
 docs/static/experimental-llama-stack-spec.yaml | 12 ++++++++++++
 docs/static/llama-stack-spec.yaml              | 18 ++++++++++++++++++
 docs/static/stainless-llama-stack-spec.yaml    | 18 ++++++++++++++++++
 .../inline/agents/meta_reference/agents.py     |  2 ++
 .../responses/openai_responses.py              |  4 ++++
 .../meta_reference/responses/streaming.py      |  4 ++++
 src/llama_stack_api/agents.py                  |  2 ++
 src/llama_stack_api/openai_responses.py        |  2 ++
 10 files changed, 98 insertions(+)

diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml
index 9703f94b5..da61a6385 100644
--- a/client-sdks/stainless/openapi.yml
+++ b/client-sdks/stainless/openapi.yml
@@ -6796,6 +6796,12 @@ components:
           anyOf:
           - type: integer
           - type: 'null'
+        metadata:
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
         input:
           items:
             anyOf:
@@ -7199,6 +7205,12 @@ components:
           anyOf:
           - type: integer
           - type: 'null'
+        metadata:
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
       type: object
       required:
       - input
@@ -7330,6 +7342,12 @@ components:
           anyOf:
           - type: integer
           - type: 'null'
+        metadata:
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
       type: object
       required:
       - created_at
diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml
index 70ecf9b03..d07b216b0 100644
--- a/docs/static/deprecated-llama-stack-spec.yaml
+++ b/docs/static/deprecated-llama-stack-spec.yaml
@@ -3639,6 +3639,12 @@ components:
           anyOf:
           - type: integer
           - type: 'null'
+        metadata:
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
         input:
           items:
             anyOf:
@@ -4042,6 +4048,12 @@ components:
           anyOf:
           - type: integer
           - type: 'null'
+        metadata:
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
       type: object
       required:
       - input
@@ -4173,6 +4185,12 @@ components:
           anyOf:
           - type: integer
           - type: 'null'
+        metadata:
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
       type: object
       required:
       - created_at
diff --git a/docs/static/experimental-llama-stack-spec.yaml b/docs/static/experimental-llama-stack-spec.yaml
index 18cf3415f..4d5a43693 100644
--- a/docs/static/experimental-llama-stack-spec.yaml
+++ b/docs/static/experimental-llama-stack-spec.yaml
@@ -3336,6 +3336,12 @@ components:
           anyOf:
           - type: integer
           - type: 'null'
+        metadata:
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
         input:
           items:
             anyOf:
@@ -3736,6 +3742,12 @@ components:
           anyOf:
           - type: integer
           - type: 'null'
+        metadata:
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
       type: object
       required:
       - created_at
diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
index 19239e722..0d91aeaaa 100644
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@@ -5817,6 +5817,12 @@ components:
           anyOf:
           - type: integer
           - type: 'null'
+        metadata:
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
         input:
           items:
             anyOf:
@@ -6220,6 +6226,12 @@ components:
           anyOf:
           - type: integer
           - type: 'null'
+        metadata:
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
       type: object
       required:
       - input
@@ -6351,6 +6363,12 @@ components:
           anyOf:
           - type: integer
           - type: 'null'
+        metadata:
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
       type: object
       required:
       - created_at
diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml
index 9703f94b5..da61a6385 100644
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@@ -6796,6 +6796,12 @@ components:
           anyOf:
           - type: integer
           - type: 'null'
+        metadata:
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
         input:
           items:
             anyOf:
@@ -7199,6 +7205,12 @@ components:
           anyOf:
           - type: integer
           - type: 'null'
+        metadata:
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
       type: object
       required:
       - input
@@ -7330,6 +7342,12 @@ components:
           anyOf:
           - type: integer
           - type: 'null'
+        metadata:
+          anyOf:
+          - additionalProperties:
+              type: string
+            type: object
+          - type: 'null'
       type: object
       required:
       - created_at
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/agents.py b/src/llama_stack/providers/inline/agents/meta_reference/agents.py
index 1ceb34f8f..39cc22be7 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/agents.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/agents.py
@@ -109,6 +109,7 @@ class MetaReferenceAgentsImpl(Agents):
         max_infer_iters: int | None = 10,
         guardrails: list[ResponseGuardrail] | None = None,
         max_tool_calls: int | None = None,
+        metadata: dict[str, str] | None = None,
     ) -> OpenAIResponseObject:
         assert self.openai_responses_impl is not None, "OpenAI responses not initialized"
         result = await self.openai_responses_impl.create_openai_response(
@@ -128,6 +129,7 @@ class MetaReferenceAgentsImpl(Agents):
             guardrails,
             parallel_tool_calls,
             max_tool_calls,
+            metadata,
         )
         return result  # type: ignore[no-any-return]
 
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
index c8282df69..9cf30908c 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
@@ -336,6 +336,7 @@ class OpenAIResponsesImpl:
         guardrails: list[str | ResponseGuardrailSpec] | None = None,
         parallel_tool_calls: bool | None = None,
         max_tool_calls: int | None = None,
+        metadata: dict[str, str] | None = None,
     ):
         stream = bool(stream)
         text = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")) if text is None else text
@@ -390,6 +391,7 @@ class OpenAIResponsesImpl:
             guardrail_ids=guardrail_ids,
             parallel_tool_calls=parallel_tool_calls,
             max_tool_calls=max_tool_calls,
+            metadata=metadata,
         )
 
         if stream:
@@ -442,6 +444,7 @@ class OpenAIResponsesImpl:
         guardrail_ids: list[str] | None = None,
         parallel_tool_calls: bool | None = True,
         max_tool_calls: int | None = None,
+        metadata: dict[str, str] | None = None,
     ) -> AsyncIterator[OpenAIResponseObjectStream]:
         # These should never be None when called from create_openai_response (which sets defaults)
         # but we assert here to help mypy understand the types
@@ -490,6 +493,7 @@ class OpenAIResponsesImpl:
             guardrail_ids=guardrail_ids,
             instructions=instructions,
             max_tool_calls=max_tool_calls,
+            metadata=metadata,
         )
 
         # Stream the response
diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
index 3515e0578..c778d65e7 100644
--- a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
+++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
@@ -120,6 +120,7 @@ class StreamingResponseOrchestrator:
         prompt: OpenAIResponsePrompt | None = None,
         parallel_tool_calls: bool | None = None,
         max_tool_calls: int | None = None,
+        metadata: dict[str, str] | None = None,
     ):
         self.inference_api = inference_api
         self.ctx = ctx
@@ -137,6 +138,7 @@ class StreamingResponseOrchestrator:
         self.parallel_tool_calls = parallel_tool_calls
         # Max number of total calls to built-in tools that can be processed in a response
         self.max_tool_calls = max_tool_calls
+        self.metadata = metadata
         self.sequence_number = 0
         # Store MCP tool mapping that gets built during tool processing
         self.mcp_tool_to_server: dict[str, OpenAIResponseInputToolMCP] = (
@@ -164,6 +166,7 @@ class StreamingResponseOrchestrator:
             model=self.ctx.model,
             status="completed",
             output=[OpenAIResponseMessage(role="assistant", content=[refusal_content], type="message")],
+            metadata=self.metadata,
         )
 
         return OpenAIResponseObjectStreamResponseCompleted(response=refusal_response)
@@ -199,6 +202,7 @@ class StreamingResponseOrchestrator:
             prompt=self.prompt,
             parallel_tool_calls=self.parallel_tool_calls,
             max_tool_calls=self.max_tool_calls,
+            metadata=self.metadata,
         )
 
     async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]:
diff --git a/src/llama_stack_api/agents.py b/src/llama_stack_api/agents.py
index 9b767608a..8d3b489e1 100644
--- a/src/llama_stack_api/agents.py
+++ b/src/llama_stack_api/agents.py
@@ -89,6 +89,7 @@ class Agents(Protocol):
             ),
         ] = None,
         max_tool_calls: int | None = None,
+        metadata: dict[str, str] | None = None,
     ) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
         """Create a model response.
 
@@ -100,6 +101,7 @@ class Agents(Protocol):
         :param include: (Optional) Additional fields to include in the response.
         :param guardrails: (Optional) List of guardrails to apply during response generation. Can be guardrail IDs (strings) or guardrail specifications.
         :param max_tool_calls: (Optional) Max number of total calls to built-in tools that can be processed in a response.
+        :param metadata: (Optional) Dictionary of metadata key-value pairs to attach to the response.
         :returns: An OpenAIResponseObject.
         """
         ...
diff --git a/src/llama_stack_api/openai_responses.py b/src/llama_stack_api/openai_responses.py
index e20004487..177d2314a 100644
--- a/src/llama_stack_api/openai_responses.py
+++ b/src/llama_stack_api/openai_responses.py
@@ -597,6 +597,7 @@ class OpenAIResponseObject(BaseModel):
     :param usage: (Optional) Token usage information for the response
     :param instructions: (Optional) System message inserted into the model's context
     :param max_tool_calls: (Optional) Max number of total calls to built-in tools that can be processed in a response
+    :param metadata: (Optional) Dictionary of metadata key-value pairs
     """
 
     created_at: int
@@ -619,6 +620,7 @@ class OpenAIResponseObject(BaseModel):
     usage: OpenAIResponseUsage | None = None
     instructions: str | None = None
     max_tool_calls: int | None = None
+    metadata: dict[str, str] | None = None
 
 
 @json_schema_type

From 89807dc11716968c2f704b0750afc660619cf122 Mon Sep 17 00:00:00 2001
From: Jaideep Rao <jrao@redhat.com>
Date: Tue, 2 Dec 2025 01:13:58 +0530
Subject: [PATCH 17/17] feat(api)!: deprecate `toolgroup` and `tool_runtime`
 apis (#4249)

# What does this PR do?
marks `toolgroup` and `tool_runtime` APIs for deprecation

<!-- If resolving an issue, uncomment and update the line below -->
Closes #4233 and #4061 (partially)

How long do we wait before we remove deprecated APIs?

## Test Plan
<!-- Describe the tests you ran to verify your changes with result
summaries. *Provide clear instructions so the plan can be easily
re-executed.* -->

Signed-off-by: Jaideep Rao <jrao@redhat.com>
---
 client-sdks/stainless/openapi.yml            |   6 +
 docs/static/deprecated-llama-stack-spec.yaml | 157 +++++++++++++
 docs/static/llama-stack-spec.yaml            | 228 -------------------
 docs/static/stainless-llama-stack-spec.yaml  |   6 +
 src/llama_stack_api/tools.py                 |  12 +-
 5 files changed, 175 insertions(+), 234 deletions(-)

diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml
index da61a6385..51607d92d 100644
--- a/client-sdks/stainless/openapi.yml
+++ b/client-sdks/stainless/openapi.yml
@@ -2091,6 +2091,7 @@ paths:
             schema:
               $ref: '#/components/schemas/InvokeToolRequest'
         required: true
+      deprecated: true
   /v1/tool-runtime/list-tools:
     get:
       responses:
@@ -2142,6 +2143,7 @@ paths:
           - $ref: '#/components/schemas/URL'
           - type: 'null'
           title: Mcp Endpoint
+      deprecated: true
   /v1/toolgroups:
     get:
       responses:
@@ -2168,6 +2170,7 @@ paths:
       summary: List Tool Groups
       description: List tool groups with optional provider.
       operationId: list_tool_groups_v1_toolgroups_get
+      deprecated: true
     post:
       responses:
         '400':
@@ -2229,6 +2232,7 @@ paths:
         schema:
           type: string
         description: 'Path parameter: toolgroup_id'
+      deprecated: true
     delete:
       responses:
         '400':
@@ -2293,6 +2297,7 @@ paths:
           - type: string
           - type: 'null'
           title: Toolgroup Id
+      deprecated: true
   /v1/tools/{tool_name}:
     get:
       responses:
@@ -2326,6 +2331,7 @@ paths:
         schema:
           type: string
         description: 'Path parameter: tool_name'
+      deprecated: true
   /v1/vector-io/insert:
     post:
       responses:
diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml
index d07b216b0..2d0ce6e08 100644
--- a/docs/static/deprecated-llama-stack-spec.yaml
+++ b/docs/static/deprecated-llama-stack-spec.yaml
@@ -378,6 +378,91 @@ paths:
           type: string
         description: 'Path parameter: identifier'
       deprecated: true
+  /v1/tool-runtime/invoke:
+    post:
+      responses:
+        '200':
+          description: A ToolInvocationResult.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ToolInvocationResult'
+        '400':
+          description: Bad Request
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
+        '500':
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
+        default:
+          description: Default Response
+          $ref: '#/components/responses/DefaultError'
+      tags:
+      - Tool Runtime
+      summary: Invoke Tool
+      description: Run a tool with the given arguments.
+      operationId: invoke_tool_v1_tool_runtime_invoke_post
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/InvokeToolRequest'
+        required: true
+      deprecated: true
+  /v1/tool-runtime/list-tools:
+    get:
+      responses:
+        '200':
+          description: A ListToolDefsResponse.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ListToolDefsResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
+        '429':
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
+        '500':
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
+        default:
+          $ref: '#/components/responses/DefaultError'
+          description: Default Response
+      tags:
+      - Tool Runtime
+      summary: List Runtime Tools
+      description: List all tools in the runtime.
+      operationId: list_runtime_tools_v1_tool_runtime_list_tools_get
+      parameters:
+      - name: authorization
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: Authorization
+      - name: tool_group_id
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: Tool Group Id
+      - name: mcp_endpoint
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - $ref: '#/components/schemas/URL'
+          - type: 'null'
+          title: Mcp Endpoint
+      deprecated: true
   /v1/toolgroups:
     get:
       responses:
@@ -404,6 +489,7 @@ paths:
       summary: List Tool Groups
       description: List tool groups with optional provider.
       operationId: list_tool_groups_v1_toolgroups_get
+      deprecated: true
     post:
       responses:
         '400':
@@ -465,6 +551,7 @@ paths:
         schema:
           type: string
         description: 'Path parameter: toolgroup_id'
+      deprecated: true
     delete:
       responses:
         '400':
@@ -494,6 +581,76 @@ paths:
           type: string
         description: 'Path parameter: toolgroup_id'
       deprecated: true
+  /v1/tools:
+    get:
+      responses:
+        '200':
+          description: A ListToolDefsResponse.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ListToolDefsResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
+        '429':
+          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
+        '500':
+          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
+        default:
+          $ref: '#/components/responses/DefaultError'
+          description: Default Response
+      tags:
+      - Tool Groups
+      summary: List Tools
+      description: List tools with optional tool group.
+      operationId: list_tools_v1_tools_get
+      parameters:
+      - name: toolgroup_id
+        in: query
+        required: false
+        schema:
+          anyOf:
+          - type: string
+          - type: 'null'
+          title: Toolgroup Id
+      deprecated: true
+  /v1/tools/{tool_name}:
+    get:
+      responses:
+        '200':
+          description: A ToolDef.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ToolDef'
+        '400':
+          description: Bad Request
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          description: Too Many Requests
+          $ref: '#/components/responses/TooManyRequests429'
+        '500':
+          description: Internal Server Error
+          $ref: '#/components/responses/InternalServerError500'
+        default:
+          description: Default Response
+          $ref: '#/components/responses/DefaultError'
+      tags:
+      - Tool Groups
+      summary: Get Tool
+      description: Get a tool by its name.
+      operationId: get_tool_v1_tools__tool_name__get
+      parameters:
+      - name: tool_name
+        in: path
+        required: true
+        schema:
+          type: string
+        description: 'Path parameter: tool_name'
+      deprecated: true
   /v1beta/datasets:
     get:
       responses:
diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
index 0d91aeaaa..a593fef85 100644
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@@ -1872,216 +1872,6 @@ paths:
         schema:
           type: string
         description: 'Path parameter: identifier'
-  /v1/tool-runtime/invoke:
-    post:
-      responses:
-        '200':
-          description: A ToolInvocationResult.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ToolInvocationResult'
-        '400':
-          description: Bad Request
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          description: Too Many Requests
-          $ref: '#/components/responses/TooManyRequests429'
-        '500':
-          description: Internal Server Error
-          $ref: '#/components/responses/InternalServerError500'
-        default:
-          description: Default Response
-          $ref: '#/components/responses/DefaultError'
-      tags:
-      - Tool Runtime
-      summary: Invoke Tool
-      description: Run a tool with the given arguments.
-      operationId: invoke_tool_v1_tool_runtime_invoke_post
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/InvokeToolRequest'
-        required: true
-  /v1/tool-runtime/list-tools:
-    get:
-      responses:
-        '200':
-          description: A ListToolDefsResponse.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ListToolDefsResponse'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-          description: Bad Request
-        '429':
-          $ref: '#/components/responses/TooManyRequests429'
-          description: Too Many Requests
-        '500':
-          $ref: '#/components/responses/InternalServerError500'
-          description: Internal Server Error
-        default:
-          $ref: '#/components/responses/DefaultError'
-          description: Default Response
-      tags:
-      - Tool Runtime
-      summary: List Runtime Tools
-      description: List all tools in the runtime.
-      operationId: list_runtime_tools_v1_tool_runtime_list_tools_get
-      parameters:
-      - name: authorization
-        in: query
-        required: false
-        schema:
-          anyOf:
-          - type: string
-          - type: 'null'
-          title: Authorization
-      - name: tool_group_id
-        in: query
-        required: false
-        schema:
-          anyOf:
-          - type: string
-          - type: 'null'
-          title: Tool Group Id
-      - name: mcp_endpoint
-        in: query
-        required: false
-        schema:
-          anyOf:
-          - $ref: '#/components/schemas/URL'
-          - type: 'null'
-          title: Mcp Endpoint
-  /v1/toolgroups:
-    get:
-      responses:
-        '200':
-          description: A ListToolGroupsResponse.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ListToolGroupsResponse'
-        '400':
-          description: Bad Request
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          description: Too Many Requests
-          $ref: '#/components/responses/TooManyRequests429'
-        '500':
-          description: Internal Server Error
-          $ref: '#/components/responses/InternalServerError500'
-        default:
-          description: Default Response
-          $ref: '#/components/responses/DefaultError'
-      tags:
-      - Tool Groups
-      summary: List Tool Groups
-      description: List tool groups with optional provider.
-      operationId: list_tool_groups_v1_toolgroups_get
-  /v1/toolgroups/{toolgroup_id}:
-    get:
-      responses:
-        '200':
-          description: A ToolGroup.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ToolGroup'
-        '400':
-          description: Bad Request
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          description: Too Many Requests
-          $ref: '#/components/responses/TooManyRequests429'
-        '500':
-          description: Internal Server Error
-          $ref: '#/components/responses/InternalServerError500'
-        default:
-          description: Default Response
-          $ref: '#/components/responses/DefaultError'
-      tags:
-      - Tool Groups
-      summary: Get Tool Group
-      description: Get a tool group by its ID.
-      operationId: get_tool_group_v1_toolgroups__toolgroup_id__get
-      parameters:
-      - name: toolgroup_id
-        in: path
-        required: true
-        schema:
-          type: string
-        description: 'Path parameter: toolgroup_id'
-  /v1/tools:
-    get:
-      responses:
-        '200':
-          description: A ListToolDefsResponse.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ListToolDefsResponse'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-          description: Bad Request
-        '429':
-          $ref: '#/components/responses/TooManyRequests429'
-          description: Too Many Requests
-        '500':
-          $ref: '#/components/responses/InternalServerError500'
-          description: Internal Server Error
-        default:
-          $ref: '#/components/responses/DefaultError'
-          description: Default Response
-      tags:
-      - Tool Groups
-      summary: List Tools
-      description: List tools with optional tool group.
-      operationId: list_tools_v1_tools_get
-      parameters:
-      - name: toolgroup_id
-        in: query
-        required: false
-        schema:
-          anyOf:
-          - type: string
-          - type: 'null'
-          title: Toolgroup Id
-  /v1/tools/{tool_name}:
-    get:
-      responses:
-        '200':
-          description: A ToolDef.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ToolDef'
-        '400':
-          description: Bad Request
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          description: Too Many Requests
-          $ref: '#/components/responses/TooManyRequests429'
-        '500':
-          description: Internal Server Error
-          $ref: '#/components/responses/InternalServerError500'
-        default:
-          description: Default Response
-          $ref: '#/components/responses/DefaultError'
-      tags:
-      - Tool Groups
-      summary: Get Tool
-      description: Get a tool by its name.
-      operationId: get_tool_v1_tools__tool_name__get
-      parameters:
-      - name: tool_name
-        in: path
-        required: true
-        schema:
-          type: string
-        description: 'Path parameter: tool_name'
   /v1/vector-io/insert:
     post:
       responses:
@@ -8133,24 +7923,6 @@ components:
       required:
       - data
       title: ListShieldsResponse
-    InvokeToolRequest:
-      properties:
-        tool_name:
-          type: string
-          title: Tool Name
-        kwargs:
-          additionalProperties: true
-          type: object
-          title: Kwargs
-        authorization:
-          anyOf:
-          - type: string
-          - type: 'null'
-      type: object
-      required:
-      - tool_name
-      - kwargs
-      title: InvokeToolRequest
     ImageContentItem:
       description: A image content item
       properties:
diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml
index da61a6385..51607d92d 100644
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@@ -2091,6 +2091,7 @@ paths:
             schema:
               $ref: '#/components/schemas/InvokeToolRequest'
         required: true
+      deprecated: true
   /v1/tool-runtime/list-tools:
     get:
       responses:
@@ -2142,6 +2143,7 @@ paths:
           - $ref: '#/components/schemas/URL'
           - type: 'null'
           title: Mcp Endpoint
+      deprecated: true
   /v1/toolgroups:
     get:
       responses:
@@ -2168,6 +2170,7 @@ paths:
       summary: List Tool Groups
       description: List tool groups with optional provider.
       operationId: list_tool_groups_v1_toolgroups_get
+      deprecated: true
     post:
       responses:
         '400':
@@ -2229,6 +2232,7 @@ paths:
         schema:
           type: string
         description: 'Path parameter: toolgroup_id'
+      deprecated: true
     delete:
       responses:
         '400':
@@ -2293,6 +2297,7 @@ paths:
           - type: string
           - type: 'null'
           title: Toolgroup Id
+      deprecated: true
   /v1/tools/{tool_name}:
     get:
       responses:
@@ -2326,6 +2331,7 @@ paths:
         schema:
           type: string
         description: 'Path parameter: tool_name'
+      deprecated: true
   /v1/vector-io/insert:
     post:
       responses:
diff --git a/src/llama_stack_api/tools.py b/src/llama_stack_api/tools.py
index 94f2251b0..2a2a4304c 100644
--- a/src/llama_stack_api/tools.py
+++ b/src/llama_stack_api/tools.py
@@ -126,7 +126,7 @@ class ToolGroups(Protocol):
         """
         ...
 
-    @webmethod(route="/toolgroups/{toolgroup_id:path}", method="GET", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/toolgroups/{toolgroup_id:path}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
     async def get_tool_group(
         self,
         toolgroup_id: str,
@@ -138,7 +138,7 @@ class ToolGroups(Protocol):
         """
         ...
 
-    @webmethod(route="/toolgroups", method="GET", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/toolgroups", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
     async def list_tool_groups(self) -> ListToolGroupsResponse:
         """List tool groups with optional provider.
 
@@ -146,7 +146,7 @@ class ToolGroups(Protocol):
         """
         ...
 
-    @webmethod(route="/tools", method="GET", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/tools", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
     async def list_tools(self, toolgroup_id: str | None = None) -> ListToolDefsResponse:
         """List tools with optional tool group.
 
@@ -155,7 +155,7 @@ class ToolGroups(Protocol):
         """
         ...
 
-    @webmethod(route="/tools/{tool_name:path}", method="GET", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/tools/{tool_name:path}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
     async def get_tool(
         self,
         tool_name: str,
@@ -193,7 +193,7 @@ class ToolRuntime(Protocol):
     tool_store: ToolStore | None = None
 
     # TODO: This needs to be renamed once OPEN API generator name conflict issue is fixed.
-    @webmethod(route="/tool-runtime/list-tools", method="GET", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/tool-runtime/list-tools", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
     async def list_runtime_tools(
         self,
         tool_group_id: str | None = None,
@@ -209,7 +209,7 @@ class ToolRuntime(Protocol):
         """
         ...
 
-    @webmethod(route="/tool-runtime/invoke", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/tool-runtime/invoke", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
     async def invoke_tool(
         self,
         tool_name: str,