From 0b5a794c27949eb573b6d43108aa938805d232dd Mon Sep 17 00:00:00 2001
From: ehhuang <ehhuang@users.noreply.github.com>
Date: Fri, 8 Aug 2025 13:47:36 -0700
Subject: [PATCH 01/17] fix: telemetry logger spams when queue is full (#3070)

# What does this PR do?


## Test Plan
Ran a stress test on chat completion endpoint locally:

For 10 concurrent users over 3 minutes:
Before:
<img width="1440" height="201" alt="image"
src="https://github.com/user-attachments/assets/24e0d580-186e-4e24-931e-2b936c5859b6"
/>

After:
<img width="1434" height="204" alt="image"
src="https://github.com/user-attachments/assets/4b806d88-f822-41e9-b25a-018cc4bec866"
/>

(Will send scripts in a future PR.)
---
 .../providers/utils/telemetry/tracing.py      | 29 +++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)
diff --git a/llama_stack/providers/utils/telemetry/tracing.py b/llama_stack/providers/utils/telemetry/tracing.py
index 75b29cdce..7080e774a 100644
--- a/llama_stack/providers/utils/telemetry/tracing.py
+++ b/llama_stack/providers/utils/telemetry/tracing.py
@@ -9,7 +9,9 @@ import contextvars
 import logging
 import queue
 import random
+import sys
 import threading
+import time
 from collections.abc import Callable
 from datetime import UTC, datetime
 from functools import wraps
@@ -30,6 +32,16 @@ from llama_stack.providers.utils.telemetry.trace_protocol import serialize_value
 
 logger = get_logger(__name__, category="core")
 
+# Fallback logger that does NOT propagate to TelemetryHandler to avoid recursion
+_fallback_logger = logging.getLogger("llama_stack.telemetry.background")
+if not _fallback_logger.handlers:
+    _fallback_logger.propagate = False
+    _fallback_logger.setLevel(logging.ERROR)
+    _fallback_handler = logging.StreamHandler(sys.stderr)
+    _fallback_handler.setLevel(logging.ERROR)
+    _fallback_handler.setFormatter(logging.Formatter("%(asctime)s [%(levelname)s] %(name)s: %(message)s"))
+    _fallback_logger.addHandler(_fallback_handler)
+
 
 INVALID_SPAN_ID = 0x0000000000000000
 INVALID_TRACE_ID = 0x00000000000000000000000000000000
@@ -79,19 +91,32 @@ def generate_trace_id() -> str:
 CURRENT_TRACE_CONTEXT = contextvars.ContextVar("trace_context", default=None)
 BACKGROUND_LOGGER = None
 
+LOG_QUEUE_FULL_LOG_INTERVAL_SECONDS = 60.0
+
 
 class BackgroundLogger:
     def __init__(self, api: Telemetry, capacity: int = 100000):
         self.api = api
-        self.log_queue = queue.Queue(maxsize=capacity)
+        self.log_queue: queue.Queue[Any] = queue.Queue(maxsize=capacity)
         self.worker_thread = threading.Thread(target=self._process_logs, daemon=True)
         self.worker_thread.start()
+        self._last_queue_full_log_time: float = 0.0
+        self._dropped_since_last_notice: int = 0
 
     def log_event(self, event):
         try:
             self.log_queue.put_nowait(event)
         except queue.Full:
-            logger.error("Log queue is full, dropping event")
+            # Aggregate drops and emit at most once per interval via fallback logger
+            self._dropped_since_last_notice += 1
+            current_time = time.time()
+            if current_time - self._last_queue_full_log_time >= LOG_QUEUE_FULL_LOG_INTERVAL_SECONDS:
+                _fallback_logger.error(
+                    "Log queue is full; dropped %d events since last notice",
+                    self._dropped_since_last_notice,
+                )
+                self._last_queue_full_log_time = current_time
+                self._dropped_since_last_notice = 0
 
     def _process_logs(self):
         while True:

From 1677d6bffdf0a002abe3b827c460df4930ee83c8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Vlastimil=20Eli=C3=A1=C5=A1?=
 <vlastimil.elias@worldonline.cz>
Date: Fri, 8 Aug 2025 22:48:15 +0200
Subject: [PATCH 02/17] feat: Flash-Lite 2.0 and 2.5 models added to Gemini
 inference provider (#3058)

PR adds Flash-Lite 2.0 and 2.5 models to the Gemini inference provider

Closes #3046

## Test Plan
I was not able to locate any existing test for this provider, so I
performed manual testing. But the change is really trivial and
straightforward.
---
 llama_stack/providers/remote/inference/gemini/models.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llama_stack/providers/remote/inference/gemini/models.py b/llama_stack/providers/remote/inference/gemini/models.py
index 6fda35e0f..bd696b0ac 100644
--- a/llama_stack/providers/remote/inference/gemini/models.py
+++ b/llama_stack/providers/remote/inference/gemini/models.py
@@ -13,7 +13,9 @@ LLM_MODEL_IDS = [
     "gemini-1.5-flash",
     "gemini-1.5-pro",
     "gemini-2.0-flash",
+    "gemini-2.0-flash-lite",
     "gemini-2.5-flash",
+    "gemini-2.5-flash-lite",
     "gemini-2.5-pro",
 ]
 

From ce72a2852516bbd702d202b2f4426478643faea0 Mon Sep 17 00:00:00 2001
From: Varsha <varshaprasad96@gmail.com>
Date: Sun, 10 Aug 2025 15:48:36 -0700
Subject: [PATCH 03/17] docs: Update doc on search modes for Milvus (#3078)

# What does this PR do?
Update Milvus doc on using search modes.

<!-- If resolving an issue, uncomment and update the line below -->
<!-- Closes #[issue-number] -->

## Test Plan
<!-- Describe the tests you ran to verify your changes with result
summaries. *Provide clear instructions so the plan can be easily
re-executed.* -->

Signed-off-by: Varsha Prasad Narsing <varshaprasad96@gmail.com>
---
 .../providers/vector_io/remote_milvus.md      | 87 +++++++++++++++++++
 llama_stack/providers/registry/vector_io.py   | 87 +++++++++++++++++++
 2 files changed, 174 insertions(+)

diff --git a/docs/source/providers/vector_io/remote_milvus.md b/docs/source/providers/vector_io/remote_milvus.md
index 3646f4acc..2af64b8bb 100644
--- a/docs/source/providers/vector_io/remote_milvus.md
+++ b/docs/source/providers/vector_io/remote_milvus.md
@@ -11,6 +11,7 @@ That means you're not limited to storing vectors in memory or in a separate serv
 
 - Easy to use
 - Fully integrated with Llama Stack
+- Supports all search modes: vector, keyword, and hybrid search (both inline and remote configurations)
 
 ## Usage
 
@@ -101,6 +102,92 @@ vector_io:
 - **`client_pem_path`**: Path to the **client certificate** file (required for mTLS).
 - **`client_key_path`**: Path to the **client private key** file (required for mTLS).
 
+## Search Modes
+
+Milvus supports three different search modes for both inline and remote configurations:
+
+### Vector Search
+Vector search uses semantic similarity to find the most relevant chunks based on embedding vectors. This is the default search mode and works well for finding conceptually similar content.
+
+```python
+# Vector search example
+search_response = client.vector_stores.search(
+    vector_store_id=vector_store.id,
+    query="What is machine learning?",
+    search_mode="vector",
+    max_num_results=5,
+)
+```
+
+### Keyword Search
+Keyword search uses traditional text-based matching to find chunks containing specific terms or phrases. This is useful when you need exact term matches.
+
+```python
+# Keyword search example
+search_response = client.vector_stores.search(
+    vector_store_id=vector_store.id,
+    query="Python programming language",
+    search_mode="keyword",
+    max_num_results=5,
+)
+```
+
+### Hybrid Search
+Hybrid search combines both vector and keyword search methods to provide more comprehensive results. It leverages the strengths of both semantic similarity and exact term matching.
+
+#### Basic Hybrid Search
+```python
+# Basic hybrid search example (uses RRF ranker with default impact_factor=60.0)
+search_response = client.vector_stores.search(
+    vector_store_id=vector_store.id,
+    query="neural networks in Python",
+    search_mode="hybrid",
+    max_num_results=5,
+)
+```
+
+**Note**: The default `impact_factor` value of 60.0 was empirically determined to be optimal in the original RRF research paper: ["Reciprocal Rank Fusion outperforms Condorcet and individual Rank Learning Methods"](https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf) (Cormack et al., 2009).
+
+#### Hybrid Search with RRF (Reciprocal Rank Fusion) Ranker
+RRF combines rankings from vector and keyword search by using reciprocal ranks. The impact factor controls how much weight is given to higher-ranked results.
+
+```python
+# Hybrid search with custom RRF parameters
+search_response = client.vector_stores.search(
+    vector_store_id=vector_store.id,
+    query="neural networks in Python",
+    search_mode="hybrid",
+    max_num_results=5,
+    ranking_options={
+        "ranker": {
+            "type": "rrf",
+            "impact_factor": 100.0,  # Higher values give more weight to top-ranked results
+        }
+    },
+)
+```
+
+#### Hybrid Search with Weighted Ranker
+Weighted ranker linearly combines normalized scores from vector and keyword search. The alpha parameter controls the balance between the two search methods.
+
+```python
+# Hybrid search with weighted ranker
+search_response = client.vector_stores.search(
+    vector_store_id=vector_store.id,
+    query="neural networks in Python",
+    search_mode="hybrid",
+    max_num_results=5,
+    ranking_options={
+        "ranker": {
+            "type": "weighted",
+            "alpha": 0.7,  # 70% vector search, 30% keyword search
+        }
+    },
+)
+```
+
+For detailed documentation on RRF and Weighted rankers, please refer to the [Milvus Reranking Guide](https://milvus.io/docs/reranking.md).
+
 ## Documentation
 See the [Milvus documentation](https://milvus.io/docs/install-overview.md) for more details about Milvus in general.
 
diff --git a/llama_stack/providers/registry/vector_io.py b/llama_stack/providers/registry/vector_io.py
index 846f7b88e..b4f3ab6ac 100644
--- a/llama_stack/providers/registry/vector_io.py
+++ b/llama_stack/providers/registry/vector_io.py
@@ -535,6 +535,7 @@ That means you're not limited to storing vectors in memory or in a separate serv
 
 - Easy to use
 - Fully integrated with Llama Stack
+- Supports all search modes: vector, keyword, and hybrid search (both inline and remote configurations)
 
 ## Usage
 
@@ -625,6 +626,92 @@ vector_io:
 - **`client_pem_path`**: Path to the **client certificate** file (required for mTLS).
 - **`client_key_path`**: Path to the **client private key** file (required for mTLS).
 
+## Search Modes
+
+Milvus supports three different search modes for both inline and remote configurations:
+
+### Vector Search
+Vector search uses semantic similarity to find the most relevant chunks based on embedding vectors. This is the default search mode and works well for finding conceptually similar content.
+
+```python
+# Vector search example
+search_response = client.vector_stores.search(
+    vector_store_id=vector_store.id,
+    query="What is machine learning?",
+    search_mode="vector",
+    max_num_results=5,
+)
+```
+
+### Keyword Search
+Keyword search uses traditional text-based matching to find chunks containing specific terms or phrases. This is useful when you need exact term matches.
+
+```python
+# Keyword search example
+search_response = client.vector_stores.search(
+    vector_store_id=vector_store.id,
+    query="Python programming language",
+    search_mode="keyword",
+    max_num_results=5,
+)
+```
+
+### Hybrid Search
+Hybrid search combines both vector and keyword search methods to provide more comprehensive results. It leverages the strengths of both semantic similarity and exact term matching.
+
+#### Basic Hybrid Search
+```python
+# Basic hybrid search example (uses RRF ranker with default impact_factor=60.0)
+search_response = client.vector_stores.search(
+    vector_store_id=vector_store.id,
+    query="neural networks in Python",
+    search_mode="hybrid",
+    max_num_results=5,
+)
+```
+
+**Note**: The default `impact_factor` value of 60.0 was empirically determined to be optimal in the original RRF research paper: ["Reciprocal Rank Fusion outperforms Condorcet and individual Rank Learning Methods"](https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf) (Cormack et al., 2009).
+
+#### Hybrid Search with RRF (Reciprocal Rank Fusion) Ranker
+RRF combines rankings from vector and keyword search by using reciprocal ranks. The impact factor controls how much weight is given to higher-ranked results.
+
+```python
+# Hybrid search with custom RRF parameters
+search_response = client.vector_stores.search(
+    vector_store_id=vector_store.id,
+    query="neural networks in Python",
+    search_mode="hybrid",
+    max_num_results=5,
+    ranking_options={
+        "ranker": {
+            "type": "rrf",
+            "impact_factor": 100.0,  # Higher values give more weight to top-ranked results
+        }
+    },
+)
+```
+
+#### Hybrid Search with Weighted Ranker
+Weighted ranker linearly combines normalized scores from vector and keyword search. The alpha parameter controls the balance between the two search methods.
+
+```python
+# Hybrid search with weighted ranker
+search_response = client.vector_stores.search(
+    vector_store_id=vector_store.id,
+    query="neural networks in Python",
+    search_mode="hybrid",
+    max_num_results=5,
+    ranking_options={
+        "ranker": {
+            "type": "weighted",
+            "alpha": 0.7,  # 70% vector search, 30% keyword search
+        }
+    },
+)
+```
+
+For detailed documentation on RRF and Weighted rankers, please refer to the [Milvus Reranking Guide](https://milvus.io/docs/reranking.md).
+
 ## Documentation
 See the [Milvus documentation](https://milvus.io/docs/install-overview.md) for more details about Milvus in general.
 

From 69dc789e15421e717d447042593cde08c61ad9e2 Mon Sep 17 00:00:00 2001
From: Varsha <varshaprasad96@gmail.com>
Date: Sun, 10 Aug 2025 16:34:34 -0700
Subject: [PATCH 04/17] docs: Add unsupported search mode info about FAISS
 (#3089)

---
 docs/source/providers/vector_io/inline_faiss.md      | 12 ++++++++++++
 .../providers/inline/vector_io/faiss/faiss.py        |  8 ++++++--
 llama_stack/providers/registry/vector_io.py          | 12 ++++++++++++
 3 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/docs/source/providers/vector_io/inline_faiss.md b/docs/source/providers/vector_io/inline_faiss.md
index bcff66f3f..cfa18a839 100644
--- a/docs/source/providers/vector_io/inline_faiss.md
+++ b/docs/source/providers/vector_io/inline_faiss.md
@@ -12,6 +12,18 @@ That means you'll get fast and efficient vector retrieval.
 - Lightweight and easy to use
 - Fully integrated with Llama Stack
 - GPU support
+- **Vector search** - FAISS supports pure vector similarity search using embeddings
+
+## Search Modes
+
+**Supported:**
+- **Vector Search** (`mode="vector"`): Performs vector similarity search using embeddings
+
+**Not Supported:**
+- **Keyword Search** (`mode="keyword"`): Not supported by FAISS
+- **Hybrid Search** (`mode="hybrid"`): Not supported by FAISS
+
+> **Note**: FAISS is designed as a pure vector similarity search library. See the [FAISS GitHub repository](https://github.com/facebookresearch/faiss) for more details about FAISS's core functionality.
 
 ## Usage
 
diff --git a/llama_stack/providers/inline/vector_io/faiss/faiss.py b/llama_stack/providers/inline/vector_io/faiss/faiss.py
index 7a5373726..5a063592c 100644
--- a/llama_stack/providers/inline/vector_io/faiss/faiss.py
+++ b/llama_stack/providers/inline/vector_io/faiss/faiss.py
@@ -174,7 +174,9 @@ class FaissIndex(EmbeddingIndex):
         k: int,
         score_threshold: float,
     ) -> QueryChunksResponse:
-        raise NotImplementedError("Keyword search is not supported in FAISS")
+        raise NotImplementedError(
+            "Keyword search is not supported - underlying DB FAISS does not support this search mode"
+        )
 
     async def query_hybrid(
         self,
@@ -185,7 +187,9 @@ class FaissIndex(EmbeddingIndex):
         reranker_type: str,
         reranker_params: dict[str, Any] | None = None,
     ) -> QueryChunksResponse:
-        raise NotImplementedError("Hybrid search is not supported in FAISS")
+        raise NotImplementedError(
+            "Hybrid search is not supported - underlying DB FAISS does not support this search mode"
+        )
 
 
 class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
diff --git a/llama_stack/providers/registry/vector_io.py b/llama_stack/providers/registry/vector_io.py
index b4f3ab6ac..ed170b508 100644
--- a/llama_stack/providers/registry/vector_io.py
+++ b/llama_stack/providers/registry/vector_io.py
@@ -45,6 +45,18 @@ That means you'll get fast and efficient vector retrieval.
 - Lightweight and easy to use
 - Fully integrated with Llama Stack
 - GPU support
+- **Vector search** - FAISS supports pure vector similarity search using embeddings
+
+## Search Modes
+
+**Supported:**
+- **Vector Search** (`mode="vector"`): Performs vector similarity search using embeddings
+
+**Not Supported:**
+- **Keyword Search** (`mode="keyword"`): Not supported by FAISS
+- **Hybrid Search** (`mode="hybrid"`): Not supported by FAISS
+
+> **Note**: FAISS is designed as a pure vector similarity search library. See the [FAISS GitHub repository](https://github.com/facebookresearch/faiss) for more details about FAISS's core functionality.
 
 ## Usage
 

From 78a59a4dbeaa50cc85d4f08f6c72b1bb51e7f721 Mon Sep 17 00:00:00 2001
From: Francisco Arceo <arceofrancisco@gmail.com>
Date: Sun, 10 Aug 2025 19:11:14 -0600
Subject: [PATCH 05/17] chore: Adding GitHub Stars, trends, and contributor
 shout out to README (#3079)

# What does this PR do?

Updates READMe to add
1. GitHub badge highlighting Llama Stack as #1 Repo of the Day
2. GitHub Star History (cumulative stars chart)
3. Contributor shout out

<!-- If resolving an issue, uncomment and update the line below -->
<!-- Closes #[issue-number] -->

## Test Plan
<!-- Describe the tests you ran to verify your changes with result
summaries. *Provide clear instructions so the plan can be easily
re-executed.* -->

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
---
 README.md | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/README.md b/README.md
index 03aa3dd50..8db4580a2 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,8 @@
 # Llama Stack
 
+<a href="https://trendshift.io/repositories/11824" target="_blank"><img src="https://trendshift.io/api/badge/repositories/11824" alt="meta-llama%2Fllama-stack | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
+
+-----
 [![PyPI version](https://img.shields.io/pypi/v/llama_stack.svg)](https://pypi.org/project/llama_stack/)
 [![PyPI - Downloads](https://img.shields.io/pypi/dm/llama-stack)](https://pypi.org/project/llama-stack/)
 [![License](https://img.shields.io/pypi/l/llama_stack.svg)](https://github.com/meta-llama/llama-stack/blob/main/LICENSE)
@@ -9,6 +12,7 @@
 
 [**Quick Start**](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html) | [**Documentation**](https://llama-stack.readthedocs.io/en/latest/index.html) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack)
 
+
 ### ✨🎉 Llama 4 Support  🎉✨
 We released [Version 0.2.0](https://github.com/meta-llama/llama-stack/releases/tag/v0.2.0) with support for the Llama 4 herd of models released by Meta.
 
@@ -179,3 +183,17 @@ Please checkout our [Documentation](https://llama-stack.readthedocs.io/en/latest
 Check out our client SDKs for connecting to a Llama Stack server in your preferred language, you can choose from [python](https://github.com/meta-llama/llama-stack-client-python), [typescript](https://github.com/meta-llama/llama-stack-client-typescript), [swift](https://github.com/meta-llama/llama-stack-client-swift), and [kotlin](https://github.com/meta-llama/llama-stack-client-kotlin) programming languages to quickly build your applications.
 
 You can find more example scripts with client SDKs to talk with the Llama Stack server in our [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) repo.
+
+
+## 🌟 GitHub Star History
+## Star History
+
+[![Star History Chart](https://api.star-history.com/svg?repos=meta-llama/llama-stack&type=Date)](https://www.star-history.com/#meta-llama/llama-stack&Date)
+
+## ✨ Contributors
+
+Thanks to all of our amazing contributors!
+
+<a href="https://github.com/meta-llama/llama-stack/graphs/contributors">
+  <img src="https://contrib.rocks/image?repo=meta-llama/llama-stack" />
+</a>
\ No newline at end of file

From a4bad6c0b44dabad5fe183266960eeb52b2b27d6 Mon Sep 17 00:00:00 2001
From: Eran Cohen <eranco@redhat.com>
Date: Mon, 11 Aug 2025 15:22:04 +0300
Subject: [PATCH 06/17] feat: Add Google Vertex AI inference provider support
 (#2841)

# What does this PR do?
- Add new Vertex AI remote inference provider with litellm integration
- Support for Gemini models through Google Cloud Vertex AI platform
- Uses Google Cloud Application Default Credentials (ADC) for
authentication
- Added VertexAI models: gemini-2.5-flash, gemini-2.5-pro,
gemini-2.0-flash.
- Updated provider registry to include vertexai provider
- Updated starter template to support Vertex AI configuration
- Added comprehensive documentation and sample configuration

<!-- If resolving an issue, uncomment and update the line below -->
relates to https://github.com/meta-llama/llama-stack/issues/2747

## Test Plan
<!-- Describe the tests you ran to verify your changes with result
summaries. *Provide clear instructions so the plan can be easily
re-executed.* -->

Signed-off-by: Eran Cohen <eranco@redhat.com>
Co-authored-by: Francisco Arceo <arceofrancisco@gmail.com>
---
 docs/source/providers/inference/index.md      |  1 +
 .../providers/inference/remote_vertexai.md    | 40 ++++++++++++++
 llama_stack/distributions/ci-tests/build.yaml |  1 +
 llama_stack/distributions/ci-tests/run.yaml   |  5 ++
 llama_stack/distributions/starter/build.yaml  |  1 +
 llama_stack/distributions/starter/run.yaml    |  5 ++
 llama_stack/distributions/starter/starter.py  | 10 ++++
 llama_stack/providers/registry/inference.py   | 30 +++++++++++
 .../remote/inference/vertexai/__init__.py     | 15 ++++++
 .../remote/inference/vertexai/config.py       | 45 ++++++++++++++++
 .../remote/inference/vertexai/models.py       | 20 +++++++
 .../remote/inference/vertexai/vertexai.py     | 52 +++++++++++++++++++
 .../inference/test_openai_completion.py       |  1 +
 .../inference/test_text_inference.py          |  1 +
 14 files changed, 227 insertions(+)
 create mode 100644 docs/source/providers/inference/remote_vertexai.md
 create mode 100644 llama_stack/providers/remote/inference/vertexai/__init__.py
 create mode 100644 llama_stack/providers/remote/inference/vertexai/config.py
 create mode 100644 llama_stack/providers/remote/inference/vertexai/models.py
 create mode 100644 llama_stack/providers/remote/inference/vertexai/vertexai.py

diff --git a/docs/source/providers/inference/index.md b/docs/source/providers/inference/index.md
index 1c7bc86b9..38781e5eb 100644
--- a/docs/source/providers/inference/index.md
+++ b/docs/source/providers/inference/index.md
@@ -29,6 +29,7 @@ remote_runpod
 remote_sambanova
 remote_tgi
 remote_together
+remote_vertexai
 remote_vllm
 remote_watsonx
 ```
diff --git a/docs/source/providers/inference/remote_vertexai.md b/docs/source/providers/inference/remote_vertexai.md
new file mode 100644
index 000000000..962bbd76f
--- /dev/null
+++ b/docs/source/providers/inference/remote_vertexai.md
@@ -0,0 +1,40 @@
+# remote::vertexai
+
+## Description
+
+Google Vertex AI inference provider enables you to use Google's Gemini models through Google Cloud's Vertex AI platform, providing several advantages:
+
+• Enterprise-grade security: Uses Google Cloud's security controls and IAM
+• Better integration: Seamless integration with other Google Cloud services
+• Advanced features: Access to additional Vertex AI features like model tuning and monitoring
+• Authentication: Uses Google Cloud Application Default Credentials (ADC) instead of API keys
+
+Configuration:
+- Set VERTEX_AI_PROJECT environment variable (required)
+- Set VERTEX_AI_LOCATION environment variable (optional, defaults to us-central1)
+- Use Google Cloud Application Default Credentials or service account key
+
+Authentication Setup:
+Option 1 (Recommended): gcloud auth application-default login
+Option 2: Set GOOGLE_APPLICATION_CREDENTIALS to service account key path
+
+Available Models:
+- vertex_ai/gemini-2.0-flash
+- vertex_ai/gemini-2.5-flash
+- vertex_ai/gemini-2.5-pro
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `project` | `<class 'str'>` | No |  | Google Cloud project ID for Vertex AI |
+| `location` | `<class 'str'>` | No | us-central1 | Google Cloud location for Vertex AI |
+
+## Sample Configuration
+
+```yaml
+project: ${env.VERTEX_AI_PROJECT:=}
+location: ${env.VERTEX_AI_LOCATION:=us-central1}
+
+```
+
diff --git a/llama_stack/distributions/ci-tests/build.yaml b/llama_stack/distributions/ci-tests/build.yaml
index 2f9ae8682..e6e699b62 100644
--- a/llama_stack/distributions/ci-tests/build.yaml
+++ b/llama_stack/distributions/ci-tests/build.yaml
@@ -14,6 +14,7 @@ distribution_spec:
     - provider_type: remote::openai
     - provider_type: remote::anthropic
     - provider_type: remote::gemini
+    - provider_type: remote::vertexai
     - provider_type: remote::groq
     - provider_type: remote::sambanova
     - provider_type: inline::sentence-transformers
diff --git a/llama_stack/distributions/ci-tests/run.yaml b/llama_stack/distributions/ci-tests/run.yaml
index 188c66275..05e1b4576 100644
--- a/llama_stack/distributions/ci-tests/run.yaml
+++ b/llama_stack/distributions/ci-tests/run.yaml
@@ -65,6 +65,11 @@ providers:
     provider_type: remote::gemini
     config:
       api_key: ${env.GEMINI_API_KEY:=}
+  - provider_id: ${env.VERTEX_AI_PROJECT:+vertexai}
+    provider_type: remote::vertexai
+    config:
+      project: ${env.VERTEX_AI_PROJECT:=}
+      location: ${env.VERTEX_AI_LOCATION:=us-central1}
   - provider_id: groq
     provider_type: remote::groq
     config:
diff --git a/llama_stack/distributions/starter/build.yaml b/llama_stack/distributions/starter/build.yaml
index f95a03a9e..1a4f81d49 100644
--- a/llama_stack/distributions/starter/build.yaml
+++ b/llama_stack/distributions/starter/build.yaml
@@ -14,6 +14,7 @@ distribution_spec:
     - provider_type: remote::openai
     - provider_type: remote::anthropic
     - provider_type: remote::gemini
+    - provider_type: remote::vertexai
     - provider_type: remote::groq
     - provider_type: remote::sambanova
     - provider_type: inline::sentence-transformers
diff --git a/llama_stack/distributions/starter/run.yaml b/llama_stack/distributions/starter/run.yaml
index 8bd737686..46bd12956 100644
--- a/llama_stack/distributions/starter/run.yaml
+++ b/llama_stack/distributions/starter/run.yaml
@@ -65,6 +65,11 @@ providers:
     provider_type: remote::gemini
     config:
       api_key: ${env.GEMINI_API_KEY:=}
+  - provider_id: ${env.VERTEX_AI_PROJECT:+vertexai}
+    provider_type: remote::vertexai
+    config:
+      project: ${env.VERTEX_AI_PROJECT:=}
+      location: ${env.VERTEX_AI_LOCATION:=us-central1}
   - provider_id: groq
     provider_type: remote::groq
     config:
diff --git a/llama_stack/distributions/starter/starter.py b/llama_stack/distributions/starter/starter.py
index a970f2d1c..0270b68ad 100644
--- a/llama_stack/distributions/starter/starter.py
+++ b/llama_stack/distributions/starter/starter.py
@@ -56,6 +56,7 @@ ENABLED_INFERENCE_PROVIDERS = [
     "fireworks",
     "together",
     "gemini",
+    "vertexai",
     "groq",
     "sambanova",
     "anthropic",
@@ -71,6 +72,7 @@ INFERENCE_PROVIDER_IDS = {
     "tgi": "${env.TGI_URL:+tgi}",
     "cerebras": "${env.CEREBRAS_API_KEY:+cerebras}",
     "nvidia": "${env.NVIDIA_API_KEY:+nvidia}",
+    "vertexai": "${env.VERTEX_AI_PROJECT:+vertexai}",
 }
 
 
@@ -246,6 +248,14 @@ def get_distribution_template() -> DistributionTemplate:
                 "",
                 "Gemini API Key",
             ),
+            "VERTEX_AI_PROJECT": (
+                "",
+                "Google Cloud Project ID for Vertex AI",
+            ),
+            "VERTEX_AI_LOCATION": (
+                "us-central1",
+                "Google Cloud Location for Vertex AI",
+            ),
             "SAMBANOVA_API_KEY": (
                 "",
                 "SambaNova API Key",
diff --git a/llama_stack/providers/registry/inference.py b/llama_stack/providers/registry/inference.py
index a8bc96a77..1801cdcad 100644
--- a/llama_stack/providers/registry/inference.py
+++ b/llama_stack/providers/registry/inference.py
@@ -213,6 +213,36 @@ def available_providers() -> list[ProviderSpec]:
                 description="Google Gemini inference provider for accessing Gemini models and Google's AI services.",
             ),
         ),
+        remote_provider_spec(
+            api=Api.inference,
+            adapter=AdapterSpec(
+                adapter_type="vertexai",
+                pip_packages=["litellm", "google-cloud-aiplatform"],
+                module="llama_stack.providers.remote.inference.vertexai",
+                config_class="llama_stack.providers.remote.inference.vertexai.VertexAIConfig",
+                provider_data_validator="llama_stack.providers.remote.inference.vertexai.config.VertexAIProviderDataValidator",
+                description="""Google Vertex AI inference provider enables you to use Google's Gemini models through Google Cloud's Vertex AI platform, providing several advantages:
+
+• Enterprise-grade security: Uses Google Cloud's security controls and IAM
+• Better integration: Seamless integration with other Google Cloud services
+• Advanced features: Access to additional Vertex AI features like model tuning and monitoring
+• Authentication: Uses Google Cloud Application Default Credentials (ADC) instead of API keys
+
+Configuration:
+- Set VERTEX_AI_PROJECT environment variable (required)
+- Set VERTEX_AI_LOCATION environment variable (optional, defaults to us-central1)
+- Use Google Cloud Application Default Credentials or service account key
+
+Authentication Setup:
+Option 1 (Recommended): gcloud auth application-default login
+Option 2: Set GOOGLE_APPLICATION_CREDENTIALS to service account key path
+
+Available Models:
+- vertex_ai/gemini-2.0-flash
+- vertex_ai/gemini-2.5-flash
+- vertex_ai/gemini-2.5-pro""",
+            ),
+        ),
         remote_provider_spec(
             api=Api.inference,
             adapter=AdapterSpec(
diff --git a/llama_stack/providers/remote/inference/vertexai/__init__.py b/llama_stack/providers/remote/inference/vertexai/__init__.py
new file mode 100644
index 000000000..d9e9419be
--- /dev/null
+++ b/llama_stack/providers/remote/inference/vertexai/__init__.py
@@ -0,0 +1,15 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .config import VertexAIConfig
+
+
+async def get_adapter_impl(config: VertexAIConfig, _deps):
+    from .vertexai import VertexAIInferenceAdapter
+
+    impl = VertexAIInferenceAdapter(config)
+    await impl.initialize()
+    return impl
diff --git a/llama_stack/providers/remote/inference/vertexai/config.py b/llama_stack/providers/remote/inference/vertexai/config.py
new file mode 100644
index 000000000..659de653e
--- /dev/null
+++ b/llama_stack/providers/remote/inference/vertexai/config.py
@@ -0,0 +1,45 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any
+
+from pydantic import BaseModel, Field
+
+from llama_stack.schema_utils import json_schema_type
+
+
+class VertexAIProviderDataValidator(BaseModel):
+    vertex_project: str | None = Field(
+        default=None,
+        description="Google Cloud project ID for Vertex AI",
+    )
+    vertex_location: str | None = Field(
+        default=None,
+        description="Google Cloud location for Vertex AI (e.g., us-central1)",
+    )
+
+
+@json_schema_type
+class VertexAIConfig(BaseModel):
+    project: str = Field(
+        description="Google Cloud project ID for Vertex AI",
+    )
+    location: str = Field(
+        default="us-central1",
+        description="Google Cloud location for Vertex AI",
+    )
+
+    @classmethod
+    def sample_run_config(
+        cls,
+        project: str = "${env.VERTEX_AI_PROJECT:=}",
+        location: str = "${env.VERTEX_AI_LOCATION:=us-central1}",
+        **kwargs,
+    ) -> dict[str, Any]:
+        return {
+            "project": project,
+            "location": location,
+        }
diff --git a/llama_stack/providers/remote/inference/vertexai/models.py b/llama_stack/providers/remote/inference/vertexai/models.py
new file mode 100644
index 000000000..e72db533d
--- /dev/null
+++ b/llama_stack/providers/remote/inference/vertexai/models.py
@@ -0,0 +1,20 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.providers.utils.inference.model_registry import (
+    ProviderModelEntry,
+)
+
+# Vertex AI model IDs with vertex_ai/ prefix as required by litellm
+LLM_MODEL_IDS = [
+    "vertex_ai/gemini-2.0-flash",
+    "vertex_ai/gemini-2.5-flash",
+    "vertex_ai/gemini-2.5-pro",
+]
+
+SAFETY_MODELS_ENTRIES = list[ProviderModelEntry]()
+
+MODEL_ENTRIES = [ProviderModelEntry(provider_model_id=m) for m in LLM_MODEL_IDS] + SAFETY_MODELS_ENTRIES
diff --git a/llama_stack/providers/remote/inference/vertexai/vertexai.py b/llama_stack/providers/remote/inference/vertexai/vertexai.py
new file mode 100644
index 000000000..8807fd0e6
--- /dev/null
+++ b/llama_stack/providers/remote/inference/vertexai/vertexai.py
@@ -0,0 +1,52 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any
+
+from llama_stack.apis.inference import ChatCompletionRequest
+from llama_stack.providers.utils.inference.litellm_openai_mixin import (
+    LiteLLMOpenAIMixin,
+)
+
+from .config import VertexAIConfig
+from .models import MODEL_ENTRIES
+
+
+class VertexAIInferenceAdapter(LiteLLMOpenAIMixin):
+    def __init__(self, config: VertexAIConfig) -> None:
+        LiteLLMOpenAIMixin.__init__(
+            self,
+            MODEL_ENTRIES,
+            litellm_provider_name="vertex_ai",
+            api_key_from_config=None,  # Vertex AI uses ADC, not API keys
+            provider_data_api_key_field="vertex_project",  # Use project for validation
+        )
+        self.config = config
+
+    def get_api_key(self) -> str:
+        # Vertex AI doesn't use API keys, it uses Application Default Credentials
+        # Return empty string to let litellm handle authentication via ADC
+        return ""
+
+    async def _get_params(self, request: ChatCompletionRequest) -> dict[str, Any]:
+        # Get base parameters from parent
+        params = await super()._get_params(request)
+
+        # Add Vertex AI specific parameters
+        provider_data = self.get_request_provider_data()
+        if provider_data:
+            if getattr(provider_data, "vertex_project", None):
+                params["vertex_project"] = provider_data.vertex_project
+            if getattr(provider_data, "vertex_location", None):
+                params["vertex_location"] = provider_data.vertex_location
+        else:
+            params["vertex_project"] = self.config.project
+            params["vertex_location"] = self.config.location
+
+        # Remove api_key since Vertex AI uses ADC
+        params.pop("api_key", None)
+
+        return params
diff --git a/tests/integration/inference/test_openai_completion.py b/tests/integration/inference/test_openai_completion.py
index 0222bfb79..72137662d 100644
--- a/tests/integration/inference/test_openai_completion.py
+++ b/tests/integration/inference/test_openai_completion.py
@@ -34,6 +34,7 @@ def skip_if_model_doesnt_support_openai_completion(client_with_models, model_id)
         "remote::runpod",
         "remote::sambanova",
         "remote::tgi",
+        "remote::vertexai",
     ):
         pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI completions.")
 
diff --git a/tests/integration/inference/test_text_inference.py b/tests/integration/inference/test_text_inference.py
index 08e19726e..d7ffe5929 100644
--- a/tests/integration/inference/test_text_inference.py
+++ b/tests/integration/inference/test_text_inference.py
@@ -29,6 +29,7 @@ def skip_if_model_doesnt_support_completion(client_with_models, model_id):
             "remote::openai",
             "remote::anthropic",
             "remote::gemini",
+            "remote::vertexai",
             "remote::groq",
             "remote::sambanova",
         )

From 8faff925919d034f2f3e971a2d24f64b221ed4d9 Mon Sep 17 00:00:00 2001
From: Matthew Farrellee <matt@cs.wisc.edu>
Date: Mon, 11 Aug 2025 09:38:54 -0500
Subject: [PATCH 07/17] chore: remove redundant code in unregister_toolgroup
 (#3092)

# What does this PR do?

removes redundant code

## Test Plan

ci
---
 llama_stack/core/routing_tables/toolgroups.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/llama_stack/core/routing_tables/toolgroups.py b/llama_stack/core/routing_tables/toolgroups.py
index e172af991..6910b3906 100644
--- a/llama_stack/core/routing_tables/toolgroups.py
+++ b/llama_stack/core/routing_tables/toolgroups.py
@@ -124,10 +124,7 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
         return toolgroup
 
     async def unregister_toolgroup(self, toolgroup_id: str) -> None:
-        tool_group = await self.get_tool_group(toolgroup_id)
-        if tool_group is None:
-            raise ToolGroupNotFoundError(toolgroup_id)
-        await self.unregister_object(tool_group)
+        await self.unregister_object(await self.get_tool_group(toolgroup_id))
 
     async def shutdown(self) -> None:
         pass

From 7448a4a88c71a996b7cfa980d9d55915c1ab5094 Mon Sep 17 00:00:00 2001
From: Francisco Arceo <arceofrancisco@gmail.com>
Date: Mon, 11 Aug 2025 08:39:52 -0600
Subject: [PATCH 08/17] chore: Updating UI Sidebar (#3081)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# What does this PR do?
This updates the sidebar to look a little more like other popular ones.

<img width="1913" height="1352" alt="Screenshot 2025-08-08 at 11 25
31 PM"
src="https://github.com/user-attachments/assets/00738412-1101-48ec-8864-cde4a8733ec1"
/>

## Test Plan
<!-- Describe the tests you ran to verify your changes with result
summaries. *Provide clear instructions so the plan can be easily
re-executed.* -->

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
---
 llama_stack/ui/app/chat-playground/page.tsx   |   2 +-
 .../ui/components/layout/app-sidebar.tsx      | 168 ++++++++++--------
 2 files changed, 96 insertions(+), 74 deletions(-)

diff --git a/llama_stack/ui/app/chat-playground/page.tsx b/llama_stack/ui/app/chat-playground/page.tsx
index c31248b78..d8094af85 100644
--- a/llama_stack/ui/app/chat-playground/page.tsx
+++ b/llama_stack/ui/app/chat-playground/page.tsx
@@ -175,7 +175,7 @@ const handleSubmitWithContent = async (content: string) => {
   return (
     <div className="flex flex-col h-full max-w-4xl mx-auto">
       <div className="mb-4 flex justify-between items-center">
-        <h1 className="text-2xl font-bold">Chat Playground</h1>
+        <h1 className="text-2xl font-bold">Chat Playground (Completions)</h1>
         <div className="flex gap-2">
           <Select value={selectedModel} onValueChange={setSelectedModel} disabled={isModelsLoading || isGenerating}>
             <SelectTrigger className="w-[180px]">
diff --git a/llama_stack/ui/components/layout/app-sidebar.tsx b/llama_stack/ui/components/layout/app-sidebar.tsx
index 26ac21da3..2ff106e01 100644
--- a/llama_stack/ui/components/layout/app-sidebar.tsx
+++ b/llama_stack/ui/components/layout/app-sidebar.tsx
@@ -6,6 +6,8 @@ import {
   MoveUpRight,
   Database,
   MessageCircle,
+  Settings2,
+  Compass,
 } from "lucide-react";
 import Link from "next/link";
 import { usePathname } from "next/navigation";
@@ -22,15 +24,16 @@ import {
   SidebarMenuItem,
   SidebarHeader,
 } from "@/components/ui/sidebar";
-// Extracted Chat Playground item
-const chatPlaygroundItem = {
-  title: "Chat Playground",
-  url: "/chat-playground",
-  icon: MessageCircle,
-};
 
-// Removed Chat Playground from log items
-const logItems = [
+const createItems = [
+  {
+    title: "Chat Playground",
+    url: "/chat-playground",
+    icon: MessageCircle,
+  },
+];
+
+const manageItems = [
   {
     title: "Chat Completions",
     url: "/logs/chat-completions",
@@ -53,77 +56,96 @@ const logItems = [
   },
 ];
 
+const optimizeItems: { title: string; url: string; icon: React.ElementType }[] = [
+    {
+        title: "Evaluations",
+        url: "",
+        icon: Compass,
+    },
+    {
+        title: "Fine-tuning",
+        url: "",
+        icon: Settings2,
+    },
+];
+
+interface SidebarItem {
+  title: string;
+  url: string;
+  icon: React.ElementType;
+}
+
 export function AppSidebar() {
   const pathname = usePathname();
 
-  return (
-    <Sidebar>
-      <SidebarHeader>
-        <Link href="/">Llama Stack</Link>
-      </SidebarHeader>
-      <SidebarContent>
-        {/* Chat Playground as its own section */}
-        <SidebarGroup>
-          <SidebarGroupContent>
-            <SidebarMenu>
-              <SidebarMenuItem>
+  const renderSidebarItems = (items: SidebarItem[]) => {
+    return items.map((item) => {
+      const isActive = pathname.startsWith(item.url);
+      return (
+        <SidebarMenuItem key={item.title}>
+          <SidebarMenuButton
+            asChild
+            className={cn(
+              "justify-start",
+              isActive &&
+                "bg-gray-200 dark:bg-gray-700 hover:bg-gray-200 dark:hover:bg-gray-700 text-gray-900 dark:text-gray-100",
+            )}
+          >
+            <Link href={item.url}>
+              <item.icon
+                className={cn(
+                  isActive && "text-gray-900 dark:text-gray-100",
+                  "mr-2 h-4 w-4",
+                )}
+              />
+              <span>{item.title}</span>
+            </Link>
+          </SidebarMenuButton>
+        </SidebarMenuItem>
+      );
+    });
+  };
+
+return (
+  <Sidebar>
+    <SidebarHeader>
+      <Link href="/">Llama Stack</Link>
+    </SidebarHeader>
+    <SidebarContent>
+      <SidebarGroup>
+        <SidebarGroupLabel>Create</SidebarGroupLabel>
+        <SidebarGroupContent>
+          <SidebarMenu>{renderSidebarItems(createItems)}</SidebarMenu>
+        </SidebarGroupContent>
+      </SidebarGroup>
+
+      <SidebarGroup>
+        <SidebarGroupLabel>Manage</SidebarGroupLabel>
+        <SidebarGroupContent>
+          <SidebarMenu>{renderSidebarItems(manageItems)}</SidebarMenu>
+        </SidebarGroupContent>
+      </SidebarGroup>
+
+      <SidebarGroup>
+        <SidebarGroupLabel>Optimize</SidebarGroupLabel>
+        <SidebarGroupContent>
+          <SidebarMenu>
+            {optimizeItems.map((item) => (
+              <SidebarMenuItem key={item.title}>
                 <SidebarMenuButton
-                  asChild
-                  className={cn(
-                    "justify-start",
-                    pathname.startsWith(chatPlaygroundItem.url) &&
-                      "bg-gray-200 dark:bg-gray-700 hover:bg-gray-200 dark:hover:bg-gray-700 text-gray-900 dark:text-gray-100",
-                  )}
+                  disabled
+                  className="justify-start opacity-60 cursor-not-allowed"
                 >
-                  <Link href={chatPlaygroundItem.url}>
-                    <chatPlaygroundItem.icon
-                      className={cn(
-                        pathname.startsWith(chatPlaygroundItem.url) && "text-gray-900 dark:text-gray-100",
-                        "mr-2 h-4 w-4",
-                      )}
-                    />
-                    <span>{chatPlaygroundItem.title}</span>
-                  </Link>
+                  <item.icon className="mr-2 h-4 w-4" />
+                  <span>{item.title}</span>
+                  <span className="ml-2 text-xs text-gray-500">(Coming Soon)</span>
                 </SidebarMenuButton>
               </SidebarMenuItem>
-            </SidebarMenu>
-          </SidebarGroupContent>
-        </SidebarGroup>
-
-        {/* Logs section */}
-        <SidebarGroup>
-          <SidebarGroupLabel>Logs</SidebarGroupLabel>
-          <SidebarGroupContent>
-            <SidebarMenu>
-              {logItems.map((item) => {
-                const isActive = pathname.startsWith(item.url);
-                return (
-                  <SidebarMenuItem key={item.title}>
-                    <SidebarMenuButton
-                      asChild
-                      className={cn(
-                        "justify-start",
-                        isActive &&
-                          "bg-gray-200 dark:bg-gray-700 hover:bg-gray-200 dark:hover:bg-gray-700 text-gray-900 dark:text-gray-100",
-                      )}
-                    >
-                      <Link href={item.url}>
-                        <item.icon
-                          className={cn(
-                            isActive && "text-gray-900 dark:text-gray-100",
-                            "mr-2 h-4 w-4",
-                          )}
-                        />
-                        <span>{item.title}</span>
-                      </Link>
-                    </SidebarMenuButton>
-                  </SidebarMenuItem>
-                );
-              })}
-            </SidebarMenu>
-          </SidebarGroupContent>
-        </SidebarGroup>
-      </SidebarContent>
-    </Sidebar>
+            ))}
+          </SidebarMenu>
+        </SidebarGroupContent>
+      </SidebarGroup>
+    </SidebarContent>
+  </Sidebar>
   );
 }

From b5b5f5b9ae2d85bf663d947f5a57616ec1bc52a6 Mon Sep 17 00:00:00 2001
From: Mustafa Elbehery <melbeher@redhat.com>
Date: Mon, 11 Aug 2025 17:40:40 +0200
Subject: [PATCH 09/17] chore: add `mypy` prompt guard (#2678)

# What does this PR do?
<!-- Provide a short summary of what this PR does and why. Link to
relevant issues if applicable. -->
This PR adds static type coverage to `llama-stack`

Part of https://github.com/meta-llama/llama-stack/issues/2647

<!-- If resolving an issue, uncomment and update the line below -->
<!-- Closes #[issue-number] -->

## Test Plan
<!-- Describe the tests you ran to verify your changes with result
summaries. *Provide clear instructions so the plan can be easily
re-executed.* -->

Signed-off-by: Mustafa Elbehery <melbeher@redhat.com>
---
 .../inline/safety/prompt_guard/prompt_guard.py        | 11 ++++++++---
 pyproject.toml                                        |  1 -
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py b/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py
index 796771ee1..e11ec5cf5 100644
--- a/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py
+++ b/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py
@@ -15,6 +15,7 @@ from llama_stack.apis.safety import (
     RunShieldResponse,
     Safety,
     SafetyViolation,
+    ShieldStore,
     ViolationLevel,
 )
 from llama_stack.apis.shields import Shield
@@ -32,6 +33,8 @@ PROMPT_GUARD_MODEL = "Prompt-Guard-86M"
 
 
 class PromptGuardSafetyImpl(Safety, ShieldsProtocolPrivate):
+    shield_store: ShieldStore
+
     def __init__(self, config: PromptGuardConfig, _deps) -> None:
         self.config = config
 
@@ -53,7 +56,7 @@ class PromptGuardSafetyImpl(Safety, ShieldsProtocolPrivate):
         self,
         shield_id: str,
         messages: list[Message],
-        params: dict[str, Any] = None,
+        params: dict[str, Any],
     ) -> RunShieldResponse:
         shield = await self.shield_store.get_shield(shield_id)
         if not shield:
@@ -117,8 +120,10 @@ class PromptGuardShield:
         elif self.config.guard_type == PromptGuardType.jailbreak.value and score_malicious > self.threshold:
             violation = SafetyViolation(
                 violation_level=ViolationLevel.ERROR,
-                violation_type=f"prompt_injection:malicious={score_malicious}",
-                violation_return_message="Sorry, I cannot do this.",
+                user_message="Sorry, I cannot do this.",
+                metadata={
+                    "violation_type": f"prompt_injection:malicious={score_malicious}",
+                },
             )
 
         return RunShieldResponse(violation=violation)
diff --git a/pyproject.toml b/pyproject.toml
index bb079790f..a77ec5ac9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -266,7 +266,6 @@ exclude = [
     "^llama_stack/providers/inline/post_training/common/validator\\.py$",
     "^llama_stack/providers/inline/safety/code_scanner/",
     "^llama_stack/providers/inline/safety/llama_guard/",
-    "^llama_stack/providers/inline/safety/prompt_guard/",
     "^llama_stack/providers/inline/scoring/basic/",
     "^llama_stack/providers/inline/scoring/braintrust/",
     "^llama_stack/providers/inline/scoring/llm_as_judge/",

From f7adf58b1bb4501a6661479a7eba3cf89264a0ed Mon Sep 17 00:00:00 2001
From: Francisco Arceo <arceofrancisco@gmail.com>
Date: Mon, 11 Aug 2025 12:11:09 -0600
Subject: [PATCH 10/17] docs: Add documentation on how to contribute a Vector
 DB provider and update testing documentation (#3093)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# What does this PR do?

- Adds documentation on how to contribute a Vector DB provider.
- Updates the testing section to be a little friendlier to navigate.
- Also added new shortcut for search so that `/` and `⌘ K` or `ctrl+K`
trigger search


<img width="1903" height="1346" alt="Screenshot 2025-08-11 at 10 10
12 AM"
src="https://github.com/user-attachments/assets/6995b3b8-a2ab-4200-be72-c5b03a784a29"
/>

<img width="1915" height="1438" alt="Screenshot 2025-08-11 at 10 10
25 AM"
src="https://github.com/user-attachments/assets/1f54d30e-5be1-4f27-b1e9-3c3537dcb8e9"
/>

<!-- If resolving an issue, uncomment and update the line below -->
<!-- Closes #[issue-number] -->

## Test Plan
<!-- Describe the tests you ran to verify your changes with result
summaries. *Provide clear instructions so the plan can be easily
re-executed.* -->

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
---
 docs/_static/js/keyboard_shortcuts.js         | 14 ++++
 docs/source/conf.py                           |  1 +
 docs/source/contributing/index.md             | 18 ++++-
 .../contributing/new_vector_database.md       | 75 +++++++++++++++++++
 docs/source/contributing/testing.md           | 12 +--
 5 files changed, 113 insertions(+), 7 deletions(-)
 create mode 100644 docs/_static/js/keyboard_shortcuts.js
 create mode 100644 docs/source/contributing/new_vector_database.md

diff --git a/docs/_static/js/keyboard_shortcuts.js b/docs/_static/js/keyboard_shortcuts.js
new file mode 100644
index 000000000..81d0b7c65
--- /dev/null
+++ b/docs/_static/js/keyboard_shortcuts.js
@@ -0,0 +1,14 @@
+document.addEventListener('keydown', function(event) {
+  // command+K or ctrl+K
+  if ((event.metaKey || event.ctrlKey) && event.key === 'k') {
+    event.preventDefault();
+    document.querySelector('.search-input, .search-field, input[name="q"]').focus();
+  }
+
+  // forward slash
+  if (event.key === '/' &&
+      !event.target.matches('input, textarea, select')) {
+    event.preventDefault();
+    document.querySelector('.search-input, .search-field, input[name="q"]').focus();
+  }
+});
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 20f1abf00..3f84d1310 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -131,6 +131,7 @@ html_static_path = ["../_static"]
 def setup(app):
     app.add_css_file("css/my_theme.css")
     app.add_js_file("js/detect_theme.js")
+    app.add_js_file("js/keyboard_shortcuts.js")
 
     def dockerhub_role(name, rawtext, text, lineno, inliner, options={}, content=[]):
         url = f"https://hub.docker.com/r/llamastack/{text}"
diff --git a/docs/source/contributing/index.md b/docs/source/contributing/index.md
index 1e067ea6c..79c3861ea 100644
--- a/docs/source/contributing/index.md
+++ b/docs/source/contributing/index.md
@@ -2,14 +2,28 @@
 ```{include} ../../../CONTRIBUTING.md
 ```
 
-See the [Adding a New API Provider](new_api_provider.md) which describes how to add new API providers to the Stack.
+## Testing
 
+See the [Test Page](testing.md) which describes how to test your changes.
+```{toctree}
+:maxdepth: 1
+:hidden:
+:caption: Testing
 
+testing
+```
 
+## Adding a New Provider
+
+See the [Adding a New API Provider Page](new_api_provider.md) which describes how to add new API providers to the Stack.
+
+See the [Vector Database Page](new_vector_database.md) which describes how to add a new vector databases with Llama Stack.
+
+See the [External Provider Page](../providers/external/index.md) which describes how to add external providers to the Stack.
 ```{toctree}
 :maxdepth: 1
 :hidden:
 
 new_api_provider
-testing
+new_vector_database
 ```
diff --git a/docs/source/contributing/new_vector_database.md b/docs/source/contributing/new_vector_database.md
new file mode 100644
index 000000000..83c0f55bc
--- /dev/null
+++ b/docs/source/contributing/new_vector_database.md
@@ -0,0 +1,75 @@
+# Adding a New Vector Database
+
+This guide will walk you through the process of adding a new vector database to Llama Stack.
+
+> **_NOTE:_** Here's an example Pull Request of the [Milvus Vector Database Provider](https://github.com/meta-llama/llama-stack/pull/1467).
+
+Vector Database providers are used to store and retrieve vector embeddings. Vector databases are not limited to vector
+search but can support keyword and hybrid search. Additionally, vector database can also support operations like
+filtering, sorting, and aggregating vectors.
+
+## Steps to Add a New Vector Database Provider
+1. **Choose the Database Type**: Determine if your vector database is a remote service, inline, or both.
+   - Remote databases make requests to external services, while inline databases execute locally. Some providers support both.
+2. **Implement the Provider**: Create a new provider class that inherits from `VectorDatabaseProvider` and implements the required methods.
+   - Implement methods for vector storage, retrieval, search, and any additional features your database supports.
+     - You will need to implement the following methods for `YourVectorIndex`:
+        - `YourVectorIndex.create()`
+        - `YourVectorIndex.initialize()`
+        - `YourVectorIndex.add_chunks()`
+        - `YourVectorIndex.delete_chunk()`
+        - `YourVectorIndex.query_vector()`
+        - `YourVectorIndex.query_keyword()`
+        - `YourVectorIndex.query_hybrid()`
+     - You will need to implement the following methods for `YourVectorIOAdapter`:
+        - `YourVectorIOAdapter.initialize()`
+        - `YourVectorIOAdapter.shutdown()`
+        - `YourVectorIOAdapter.list_vector_dbs()`
+        - `YourVectorIOAdapter.register_vector_db()`
+        - `YourVectorIOAdapter.unregister_vector_db()`
+        - `YourVectorIOAdapter.insert_chunks()`
+        - `YourVectorIOAdapter.query_chunks()`
+        - `YourVectorIOAdapter.delete_chunks()`
+3. **Add to Registry**: Register your provider in the appropriate registry file.
+   - Update {repopath}`llama_stack/providers/registry/vector_io.py` to include your new provider.
+```python
+from llama_stack.providers.registry.specs import InlineProviderSpec
+from llama_stack.providers.registry.api import Api
+
+InlineProviderSpec(
+    api=Api.vector_io,
+    provider_type="inline::milvus",
+    pip_packages=["pymilvus>=2.4.10"],
+    module="llama_stack.providers.inline.vector_io.milvus",
+    config_class="llama_stack.providers.inline.vector_io.milvus.MilvusVectorIOConfig",
+    api_dependencies=[Api.inference],
+    optional_api_dependencies=[Api.files],
+    description="",
+),
+```
+4. **Add Tests**: Create unit tests and integration tests for your provider in the `tests/` directory.
+   - Unit Tests
+     - By following the structure of the class methods, you will be able to easily run unit and integration tests for your database.
+       1. You have to configure the tests for your provide in `/tests/unit/providers/vector_io/conftest.py`.
+       2. Update the `vector_provider` fixture to include your provider if they are an inline provider.
+       3. Create a `your_vectorprovider_index` fixture that initializes your vector index.
+       4. Create a `your_vectorprovider_adapter` fixture that initializes your vector adapter.
+       5. Add your provider to the `vector_io_providers` fixture dictionary.
+         - Please follow the naming convention of `your_vectorprovider_index` and `your_vectorprovider_adapter` as the tests require this to execute properly.
+   - Integration Tests
+     - Integration tests are located in {repopath}`tests/integration`. These tests use the python client-SDK APIs (from the `llama_stack_client` package) to test functionality.
+     - The two set of integration tests are:
+       - `tests/integration/vector_io/test_vector_io.py`: This file tests registration, insertion, and retrieval.
+       - `tests/integration/vector_io/test_openai_vector_stores.py`: These tests are for OpenAI-compatible vector stores and test the OpenAI API compatibility.
+        - You will need to update `skip_if_provider_doesnt_support_openai_vector_stores` to include your provider as well as `skip_if_provider_doesnt_support_openai_vector_stores_search` to test the appropriate search functionality.
+     - Running the tests in the GitHub CI
+       - You will need to update the `.github/workflows/integration-vector-io-tests.yml` file to include your provider.
+        - If your provider is a remote provider, you will also have to add a container to spin up and run it in the action.
+   - Updating the pyproject.yml
+     - If you are adding tests for the `inline` provider you will have to update the `unit` group.
+       - `uv add new_pip_package --group unit`
+     - If you are adding tests for the `remote` provider you will have to update the `test` group, which is used in the GitHub CI for integration tests.
+       - `uv add new_pip_package --group test`
+5. **Update Documentation**: Please update the documentation for end users
+   - Generate the provider documentation by running {repopath}`./scripts/provider_codegen.py`.
+   - Update the autogenerated content in the registry/vector_io.py file with information about your provider. Please see other providers for examples.
\ No newline at end of file
diff --git a/docs/source/contributing/testing.md b/docs/source/contributing/testing.md
index 47bf9dea7..454ded266 100644
--- a/docs/source/contributing/testing.md
+++ b/docs/source/contributing/testing.md
@@ -1,6 +1,8 @@
-# Testing Llama Stack
+```{include} ../../../tests/README.md
+```
 
-Tests are of three different kinds:
-- Unit tests
-- Provider focused integration tests
-- Client SDK tests
+```{include} ../../../tests/unit/README.md
+```
+
+```{include} ../../../tests/integration/README.md
+```

From 803114180bff8d6aa219bc3f48459b13e20c4484 Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Mon, 11 Aug 2025 11:51:43 -0700
Subject: [PATCH 11/17] chore(logging)!: use comma as a delimiter (#3095)

Using commas is much more shell-friendly. A semi-colon is a statement
delimiter and must be escaped.

This change is backwards incompatible but I imagine not many people are
using this. I could be wrong. Looking for feedback.
---
 llama_stack/log.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llama_stack/log.py b/llama_stack/log.py
index ab53e08c0..0a2d63ef6 100644
--- a/llama_stack/log.py
+++ b/llama_stack/log.py
@@ -99,7 +99,8 @@ def parse_environment_config(env_config: str) -> dict[str, int]:
         Dict[str, int]: A dictionary mapping categories to their log levels.
     """
     category_levels = {}
-    for pair in env_config.split(";"):
+    delimiter = ","
+    for pair in env_config.split(delimiter):
         if not pair.strip():
             continue
 

From 19123ca957d95fe19133508a1de53e3b3c86d9c1 Mon Sep 17 00:00:00 2001
From: Nathan Weinberg <31703736+nathan-weinberg@users.noreply.github.com>
Date: Tue, 12 Aug 2025 06:20:39 -0400
Subject: [PATCH 12/17] refactor: standardize InferenceRouter model handling
 (#2965)

---
 llama_stack/apis/common/errors.py             | 10 ++++
 llama_stack/core/routers/inference.py         | 49 ++++++-------------
 llama_stack/core/routing_tables/vector_dbs.py |  4 +-
 .../remote/inference/ollama/ollama.py         |  3 --
 4 files changed, 28 insertions(+), 38 deletions(-)

diff --git a/llama_stack/apis/common/errors.py b/llama_stack/apis/common/errors.py
index 95d6ac18e..6e0fa0b3c 100644
--- a/llama_stack/apis/common/errors.py
+++ b/llama_stack/apis/common/errors.py
@@ -62,3 +62,13 @@ class SessionNotFoundError(ValueError):
     def __init__(self, session_name: str) -> None:
         message = f"Session '{session_name}' not found or access denied."
         super().__init__(message)
+
+
+class ModelTypeError(TypeError):
+    """raised when a model is present but not the correct type"""
+
+    def __init__(self, model_name: str, model_type: str, expected_model_type: str) -> None:
+        message = (
+            f"Model '{model_name}' is of type '{model_type}' rather than the expected type '{expected_model_type}'"
+        )
+        super().__init__(message)
diff --git a/llama_stack/core/routers/inference.py b/llama_stack/core/routers/inference.py
index 79ab7c34f..52581cc9d 100644
--- a/llama_stack/core/routers/inference.py
+++ b/llama_stack/core/routers/inference.py
@@ -18,7 +18,7 @@ from llama_stack.apis.common.content_types import (
     InterleavedContent,
     InterleavedContentItem,
 )
-from llama_stack.apis.common.errors import ModelNotFoundError
+from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError
 from llama_stack.apis.inference import (
     BatchChatCompletionResponse,
     BatchCompletionResponse,
@@ -177,6 +177,15 @@ class InferenceRouter(Inference):
             encoded = self.formatter.encode_content(messages)
         return len(encoded.tokens) if encoded and encoded.tokens else 0
 
+    async def _get_model(self, model_id: str, expected_model_type: str) -> Model:
+        """takes a model id and gets model after ensuring that it is accessible and of the correct type"""
+        model = await self.routing_table.get_model(model_id)
+        if model is None:
+            raise ModelNotFoundError(model_id)
+        if model.model_type != expected_model_type:
+            raise ModelTypeError(model_id, model.model_type, expected_model_type)
+        return model
+
     async def chat_completion(
         self,
         model_id: str,
@@ -195,11 +204,7 @@ class InferenceRouter(Inference):
         )
         if sampling_params is None:
             sampling_params = SamplingParams()
-        model = await self.routing_table.get_model(model_id)
-        if model is None:
-            raise ModelNotFoundError(model_id)
-        if model.model_type == ModelType.embedding:
-            raise ValueError(f"Model '{model_id}' is an embedding model and does not support chat completions")
+        model = await self._get_model(model_id, ModelType.llm)
         if tool_config:
             if tool_choice and tool_choice != tool_config.tool_choice:
                 raise ValueError("tool_choice and tool_config.tool_choice must match")
@@ -301,11 +306,7 @@ class InferenceRouter(Inference):
         logger.debug(
             f"InferenceRouter.completion: {model_id=}, {stream=}, {content=}, {sampling_params=}, {response_format=}",
         )
-        model = await self.routing_table.get_model(model_id)
-        if model is None:
-            raise ModelNotFoundError(model_id)
-        if model.model_type == ModelType.embedding:
-            raise ValueError(f"Model '{model_id}' is an embedding model and does not support chat completions")
+        model = await self._get_model(model_id, ModelType.llm)
         provider = await self.routing_table.get_provider_impl(model_id)
         params = dict(
             model_id=model_id,
@@ -355,11 +356,7 @@ class InferenceRouter(Inference):
         task_type: EmbeddingTaskType | None = None,
     ) -> EmbeddingsResponse:
         logger.debug(f"InferenceRouter.embeddings: {model_id}")
-        model = await self.routing_table.get_model(model_id)
-        if model is None:
-            raise ModelNotFoundError(model_id)
-        if model.model_type == ModelType.llm:
-            raise ValueError(f"Model '{model_id}' is an LLM model and does not support embeddings")
+        await self._get_model(model_id, ModelType.embedding)
         provider = await self.routing_table.get_provider_impl(model_id)
         return await provider.embeddings(
             model_id=model_id,
@@ -395,12 +392,7 @@ class InferenceRouter(Inference):
         logger.debug(
             f"InferenceRouter.openai_completion: {model=}, {stream=}, {prompt=}",
         )
-        model_obj = await self.routing_table.get_model(model)
-        if model_obj is None:
-            raise ModelNotFoundError(model)
-        if model_obj.model_type == ModelType.embedding:
-            raise ValueError(f"Model '{model}' is an embedding model and does not support completions")
-
+        model_obj = await self._get_model(model, ModelType.llm)
         params = dict(
             model=model_obj.identifier,
             prompt=prompt,
@@ -476,11 +468,7 @@ class InferenceRouter(Inference):
         logger.debug(
             f"InferenceRouter.openai_chat_completion: {model=}, {stream=}, {messages=}",
         )
-        model_obj = await self.routing_table.get_model(model)
-        if model_obj is None:
-            raise ModelNotFoundError(model)
-        if model_obj.model_type == ModelType.embedding:
-            raise ValueError(f"Model '{model}' is an embedding model and does not support chat completions")
+        model_obj = await self._get_model(model, ModelType.llm)
 
         # Use the OpenAI client for a bit of extra input validation without
         # exposing the OpenAI client itself as part of our API surface
@@ -567,12 +555,7 @@ class InferenceRouter(Inference):
         logger.debug(
             f"InferenceRouter.openai_embeddings: {model=}, input_type={type(input)}, {encoding_format=}, {dimensions=}",
         )
-        model_obj = await self.routing_table.get_model(model)
-        if model_obj is None:
-            raise ModelNotFoundError(model)
-        if model_obj.model_type != ModelType.embedding:
-            raise ValueError(f"Model '{model}' is not an embedding model")
-
+        model_obj = await self._get_model(model, ModelType.embedding)
         params = dict(
             model=model_obj.identifier,
             input=input,
diff --git a/llama_stack/core/routing_tables/vector_dbs.py b/llama_stack/core/routing_tables/vector_dbs.py
index c81a27a3b..e8dc46997 100644
--- a/llama_stack/core/routing_tables/vector_dbs.py
+++ b/llama_stack/core/routing_tables/vector_dbs.py
@@ -8,7 +8,7 @@ from typing import Any
 
 from pydantic import TypeAdapter
 
-from llama_stack.apis.common.errors import ModelNotFoundError, VectorStoreNotFoundError
+from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError, VectorStoreNotFoundError
 from llama_stack.apis.models import ModelType
 from llama_stack.apis.resource import ResourceType
 from llama_stack.apis.vector_dbs import ListVectorDBsResponse, VectorDB, VectorDBs
@@ -66,7 +66,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl, VectorDBs):
         if model is None:
             raise ModelNotFoundError(embedding_model)
         if model.model_type != ModelType.embedding:
-            raise ValueError(f"Model {embedding_model} is not an embedding model")
+            raise ModelTypeError(embedding_model, model.model_type, ModelType.embedding)
         if "embedding_dimension" not in model.metadata:
             raise ValueError(f"Model {embedding_model} does not have an embedding dimension")
         vector_db_data = {
diff --git a/llama_stack/providers/remote/inference/ollama/ollama.py b/llama_stack/providers/remote/inference/ollama/ollama.py
index 26b4dec76..a93421536 100644
--- a/llama_stack/providers/remote/inference/ollama/ollama.py
+++ b/llama_stack/providers/remote/inference/ollama/ollama.py
@@ -457,9 +457,6 @@ class OllamaInferenceAdapter(
         user: str | None = None,
     ) -> OpenAIEmbeddingsResponse:
         model_obj = await self._get_model(model)
-        if model_obj.model_type != ModelType.embedding:
-            raise ValueError(f"Model {model} is not an embedding model")
-
         if model_obj.provider_resource_id is None:
             raise ValueError(f"Model {model} has no provider_resource_id set")
 

From 4a13ef45e984af8274b06e118af3baadf770a3bc Mon Sep 17 00:00:00 2001
From: Mustafa Elbehery <melbeher@redhat.com>
Date: Tue, 12 Aug 2025 17:32:52 +0200
Subject: [PATCH 13/17] fix: Implement missing `run_moderation` method in
 `PromptGuardSafetyImpl` (#3101)

# What does this PR do?
<!-- Provide a short summary of what this PR does and why. Link to
relevant issues if applicable. -->
This PR addresses an issue where `PromptGuardSafetyImpl` was an
incomplete implementation of an abstract class. The class was missing
the required run_moderation method from its parent interface.


Currently, running `pre-commit` locally fails with the error below.

```
llama_stack/providers/inline/safety/prompt_guard/__init__.py:15: error: Cannot instantiate abstract class "PromptGuardSafetyImpl" with abstract attribute "run_moderation"  [abstract]
Found 1 error in 1 file (checked 410 source files)
```

This PR fixes the issue as follows

- Added the missing run_moderation method to PromptGuardSafetyImpl
- Method raises NotImplementedError with appropriate message indicating
this functionality is not implemented for PromptGuard
- This allows the class to be properly instantiated while clearly
indicating the limitation

<!-- If resolving an issue, uncomment and update the line below -->
<!-- Closes #[issue-number] -->

Signed-off-by: Mustafa Elbehery <melbeher@redhat.com>
---
 .../providers/inline/safety/prompt_guard/prompt_guard.py       | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py b/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py
index e11ec5cf5..801500dee 100644
--- a/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py
+++ b/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py
@@ -64,6 +64,9 @@ class PromptGuardSafetyImpl(Safety, ShieldsProtocolPrivate):
 
         return await self.shield.run(messages)
 
+    async def run_moderation(self, input: str | list[str], model: str):
+        raise NotImplementedError("run_moderation not implemented for PromptGuard")
+
 
 class PromptGuardShield:
     def __init__(

From b70e2f1f09bae55603674e56de46a62608ee588e Mon Sep 17 00:00:00 2001
From: Matthew Farrellee <matt@cs.wisc.edu>
Date: Tue, 12 Aug 2025 10:40:32 -0500
Subject: [PATCH 14/17] fix(dep): update to openai >= 1.99.6 and use new
 Function location (#3087)

# What does this PR do?

closes #3072

## Test Plan

ci
---
 llama_stack/providers/utils/inference/openai_compat.py | 2 +-
 pyproject.toml                                         | 2 +-
 uv.lock                                                | 8 ++++----
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py
index e6e5ccc8a..9a77c8cc4 100644
--- a/llama_stack/providers/utils/inference/openai_compat.py
+++ b/llama_stack/providers/utils/inference/openai_compat.py
@@ -70,7 +70,7 @@ from openai.types.chat.chat_completion_chunk import (
 from openai.types.chat.chat_completion_content_part_image_param import (
     ImageURL as OpenAIImageURL,
 )
-from openai.types.chat.chat_completion_message_tool_call_param import (
+from openai.types.chat.chat_completion_message_tool_call import (
     Function as OpenAIFunction,
 )
 from pydantic import BaseModel
diff --git a/pyproject.toml b/pyproject.toml
index a77ec5ac9..1b0850631 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -33,7 +33,7 @@ dependencies = [
     "jsonschema",
     "llama-stack-client>=0.2.17",
     "llama-api-client>=0.1.2",
-    "openai>=1.66",
+    "openai>=1.99.6",
     "prompt-toolkit",
     "python-dotenv",
     "python-jose[cryptography]",
diff --git a/uv.lock b/uv.lock
index c10a7962c..f57b5a161 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1674,7 +1674,7 @@ requires-dist = [
     { name = "llama-api-client", specifier = ">=0.1.2" },
     { name = "llama-stack-client", specifier = ">=0.2.17" },
     { name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.2.17" },
-    { name = "openai", specifier = ">=1.66" },
+    { name = "openai", specifier = ">=1.99.6" },
     { name = "opentelemetry-exporter-otlp-proto-http", specifier = ">=1.30.0" },
     { name = "opentelemetry-sdk", specifier = ">=1.30.0" },
     { name = "pandas", marker = "extra == 'ui'" },
@@ -2301,7 +2301,7 @@ wheels = [
 
 [[package]]
 name = "openai"
-version = "1.98.0"
+version = "1.99.6"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
@@ -2313,9 +2313,9 @@ dependencies = [
     { name = "tqdm" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/d8/9d/52eadb15c92802711d6b6cf00df3a6d0d18b588f4c5ba5ff210c6419fc03/openai-1.98.0.tar.gz", hash = "sha256:3ee0fcc50ae95267fd22bd1ad095ba5402098f3df2162592e68109999f685427", size = 496695, upload-time = "2025-07-30T12:48:03.701Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/11/45/38a87bd6949236db5ae3132f41d5861824702b149f86d2627d6900919103/openai-1.99.6.tar.gz", hash = "sha256:f48f4239b938ef187062f3d5199a05b69711d8b600b9a9b6a3853cd271799183", size = 505364, upload-time = "2025-08-09T15:20:54.438Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a8/fe/f64631075b3d63a613c0d8ab761d5941631a470f6fa87eaaee1aa2b4ec0c/openai-1.98.0-py3-none-any.whl", hash = "sha256:b99b794ef92196829120e2df37647722104772d2a74d08305df9ced5f26eae34", size = 767713, upload-time = "2025-07-30T12:48:01.264Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/dd/9aa956485c2856346b3181542fbb0aea4e5b457fa7a523944726746da8da/openai-1.99.6-py3-none-any.whl", hash = "sha256:e40d44b2989588c45ce13819598788b77b8fb80ba2f7ae95ce90d14e46f1bd26", size = 786296, upload-time = "2025-08-09T15:20:51.95Z" },
 ]
 
 [[package]]

From 393f3714b0ea9b71c425ff932510dea4709ea1f7 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 12 Aug 2025 08:44:24 -0700
Subject: [PATCH 15/17] chore(python-deps): bump torch from 2.7.1 to 2.8.0
 (#3082)

Bumps [torch](https://github.com/pytorch/pytorch) from 2.7.1 to 2.8.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/pytorch/pytorch/releases">torch's
releases</a>.</em></p>
<blockquote>
<h1>PyTorch 2.8.0 Release Notes</h1>
<ul>
<li><a
href="https://github.com/pytorch/pytorch/blob/HEAD/#highlights">Highlights</a></li>
<li><a
href="https://github.com/pytorch/pytorch/blob/HEAD/#backwards-incompatible-changes">Backwards
Incompatible Changes</a></li>
<li><a
href="https://github.com/pytorch/pytorch/blob/HEAD/#deprecations">Deprecations</a></li>
<li><a
href="https://github.com/pytorch/pytorch/blob/HEAD/#new-features">New
Features</a></li>
<li><a
href="https://github.com/pytorch/pytorch/blob/HEAD/#improvements">Improvements</a></li>
<li><a
href="https://github.com/pytorch/pytorch/blob/HEAD/#bug-fixes">Bug
fixes</a></li>
<li><a
href="https://github.com/pytorch/pytorch/blob/HEAD/#performance">Performance</a></li>
<li><a
href="https://github.com/pytorch/pytorch/blob/HEAD/#documentation">Documentation</a></li>
<li><a
href="https://github.com/pytorch/pytorch/blob/HEAD/#developers">Developers</a></li>
</ul>
<h1>Highlights</h1>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/pytorch/pytorch/commit/ba56102387ef21a3b04b357e5b183d48f0afefc7"><code>ba56102</code></a>
Cherrypick: Add the RunLLM widget to the website (<a
href="https://redirect.github.com/pytorch/pytorch/issues/159592">#159592</a>)</li>
<li><a
href="https://github.com/pytorch/pytorch/commit/c525a02c89217181b5731d8043c7309a84e84066"><code>c525a02</code></a>
[dynamo, docs] cherry pick torch.compile programming model docs into 2.8
(<a
href="https://redirect.github.com/pytorch/pytorch/issues/15">#15</a>...</li>
<li><a
href="https://github.com/pytorch/pytorch/commit/a1cb3cc05d46d198467bebbb6e8fba50a325d4e7"><code>a1cb3cc</code></a>
[Release Only] Remove nvshmem from list of preload libraries (<a
href="https://redirect.github.com/pytorch/pytorch/issues/158925">#158925</a>)</li>
<li><a
href="https://github.com/pytorch/pytorch/commit/c76b2356bc31654de2af0c98cce1bef291f06f89"><code>c76b235</code></a>
Move out super large one off foreach_copy test (<a
href="https://redirect.github.com/pytorch/pytorch/issues/158880">#158880</a>)</li>
<li><a
href="https://github.com/pytorch/pytorch/commit/20a0e225a01d4ebbffd44a6a59acff628359c772"><code>20a0e22</code></a>
Revert &quot;[Dynamo] Allow inlining into AO quantization modules (<a
href="https://redirect.github.com/pytorch/pytorch/issues/152934">#152934</a>)&quot;
(<a
href="https://redirect.github.com/pytorch/pytorch/issues/158">#158</a>...</li>
<li><a
href="https://github.com/pytorch/pytorch/commit/9167ac8c75481e2beb3746aa37b7f48a213c631e"><code>9167ac8</code></a>
[MPS] Switch Cholesky decomp to column wise (<a
href="https://redirect.github.com/pytorch/pytorch/issues/158237">#158237</a>)</li>
<li><a
href="https://github.com/pytorch/pytorch/commit/5534685c62399db8d1e51b47e2dcbc17deaab230"><code>5534685</code></a>
[MPS] Reimplement <code>tri[ul]</code> as Metal shaders (<a
href="https://redirect.github.com/pytorch/pytorch/issues/158867">#158867</a>)</li>
<li><a
href="https://github.com/pytorch/pytorch/commit/d19e08d74b2a27e661bf57a9015014b757e8ea31"><code>d19e08d</code></a>
Cherry pick PR 158746 (<a
href="https://redirect.github.com/pytorch/pytorch/issues/158801">#158801</a>)</li>
<li><a
href="https://github.com/pytorch/pytorch/commit/a6c044ab9aa14f0864c6a572f7c023432511c5ea"><code>a6c044a</code></a>
[cherry-pick] Unify torch.tensor and torch.ops.aten.scalar_tensor
behavior (#...</li>
<li><a
href="https://github.com/pytorch/pytorch/commit/620ebd0646252bbb22524f5c252ec7e9ab977bee"><code>620ebd0</code></a>
[Dynamo] Use proper sources for constructing dataclass defaults (<a
href="https://redirect.github.com/pytorch/pytorch/issues/158689">#158689</a>)</li>
<li>Additional commits viewable in <a
href="https://github.com/pytorch/pytorch/compare/v2.7.1...v2.8.0">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=torch&package-manager=uv&previous-version=2.7.1&new-version=2.8.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 uv.lock | 75 ++++++++++++++++++++++++++++++---------------------------
 1 file changed, 39 insertions(+), 36 deletions(-)

diff --git a/uv.lock b/uv.lock
index f57b5a161..caafa1197 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1632,10 +1632,10 @@ test = [
     { name = "pypdf" },
     { name = "requests" },
     { name = "sqlalchemy", extra = ["asyncio"] },
-    { name = "torch", version = "2.7.1", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" },
-    { name = "torch", version = "2.7.1+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin'" },
-    { name = "torchvision", version = "0.22.1", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or sys_platform == 'darwin'" },
-    { name = "torchvision", version = "0.22.1+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "torch", version = "2.8.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" },
+    { name = "torch", version = "2.8.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin'" },
+    { name = "torchvision", version = "0.23.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or sys_platform == 'darwin'" },
+    { name = "torchvision", version = "0.23.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
     { name = "transformers" },
     { name = "weaviate-client" },
 ]
@@ -4310,7 +4310,7 @@ wheels = [
 
 [[package]]
 name = "torch"
-version = "2.7.1"
+version = "2.8.0"
 source = { registry = "https://download.pytorch.org/whl/cpu" }
 resolution-markers = [
     "python_full_version >= '3.13' and sys_platform == 'darwin'",
@@ -4326,14 +4326,14 @@ dependencies = [
     { name = "typing-extensions", marker = "sys_platform == 'darwin'" },
 ]
 wheels = [
-    { url = "https://download.pytorch.org/whl/cpu/torch-2.7.1-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:7b4f8b2b83bd08f7d399025a9a7b323bdbb53d20566f1e0d584689bb92d82f9a" },
-    { url = "https://download.pytorch.org/whl/cpu/torch-2.7.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:95af97e7b2cecdc89edc0558962a51921bf9c61538597dbec6b7cc48d31e2e13" },
-    { url = "https://download.pytorch.org/whl/cpu/torch-2.7.1-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:7ecd868a086468e1bcf74b91db425c1c2951a9cfcd0592c4c73377b7e42485ae" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:a47b7986bee3f61ad217d8a8ce24605809ab425baf349f97de758815edd2ef54" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:fbe2e149c5174ef90d29a5f84a554dfaf28e003cb4f61fa2c8c024c17ec7ca58" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:057efd30a6778d2ee5e2374cd63a63f63311aa6f33321e627c655df60abdd390" },
 ]
 
 [[package]]
 name = "torch"
-version = "2.7.1+cpu"
+version = "2.8.0+cpu"
 source = { registry = "https://download.pytorch.org/whl/cpu" }
 resolution-markers = [
     "(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')",
@@ -4351,21 +4351,24 @@ dependencies = [
     { name = "typing-extensions", marker = "sys_platform != 'darwin'" },
 ]
 wheels = [
-    { url = "https://download.pytorch.org/whl/cpu/torch-2.7.1%2Bcpu-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:3bf2db5adf77b433844f080887ade049c4705ddf9fe1a32023ff84ff735aa5ad" },
-    { url = "https://download.pytorch.org/whl/cpu/torch-2.7.1%2Bcpu-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:8f8b3cfc53010a4b4a3c7ecb88c212e9decc4f5eeb6af75c3c803937d2d60947" },
-    { url = "https://download.pytorch.org/whl/cpu/torch-2.7.1%2Bcpu-cp312-cp312-win_amd64.whl", hash = "sha256:0bc887068772233f532b51a3e8c8cfc682ae62bef74bf4e0c53526c8b9e4138f" },
-    { url = "https://download.pytorch.org/whl/cpu/torch-2.7.1%2Bcpu-cp312-cp312-win_arm64.whl", hash = "sha256:a2618775f32eb4126c5b2050686da52001a08cffa331637d9cf51c8250931e00" },
-    { url = "https://download.pytorch.org/whl/cpu/torch-2.7.1%2Bcpu-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:eb17646792ac4374ffc87e42369f45d21eff17c790868963b90483ef0b6db4ef" },
-    { url = "https://download.pytorch.org/whl/cpu/torch-2.7.1%2Bcpu-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:84ea1f6a1d15663037d01b121d6e33bb9da3c90af8e069e5072c30f413455a57" },
-    { url = "https://download.pytorch.org/whl/cpu/torch-2.7.1%2Bcpu-cp313-cp313-win_amd64.whl", hash = "sha256:b66f77f6f67317344ee083aa7ac4751a14395fcb38060d564bf513978d267153" },
-    { url = "https://download.pytorch.org/whl/cpu/torch-2.7.1%2Bcpu-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:56136a2aca6707df3c8811e46ea2d379eaafd18e656e2fd51e8e4d0ca995651b" },
-    { url = "https://download.pytorch.org/whl/cpu/torch-2.7.1%2Bcpu-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:355614185a2aea7155f9c88a20bfd49de5f3063866f3cf9b2f21b6e9e59e31e0" },
-    { url = "https://download.pytorch.org/whl/cpu/torch-2.7.1%2Bcpu-cp313-cp313t-win_amd64.whl", hash = "sha256:464bca1bc9452f2ccd676514688896e66b9488f2a0268ecd3ac497cf09c5aac1" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp312-cp312-linux_s390x.whl", hash = "sha256:0e34e276722ab7dd0dffa9e12fe2135a9b34a0e300c456ed7ad6430229404eb5" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:610f600c102386e581327d5efc18c0d6edecb9820b4140d26163354a99cd800d" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:cb9a8ba8137ab24e36bf1742cb79a1294bd374db570f09fc15a5e1318160db4e" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp312-cp312-win_amd64.whl", hash = "sha256:2be20b2c05a0cce10430cc25f32b689259640d273232b2de357c35729132256d" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp312-cp312-win_arm64.whl", hash = "sha256:99fc421a5d234580e45957a7b02effbf3e1c884a5dd077afc85352c77bf41434" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313-linux_s390x.whl", hash = "sha256:8b5882276633cf91fe3d2d7246c743b94d44a7e660b27f1308007fdb1bb89f7d" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:a5064b5e23772c8d164068cc7c12e01a75faf7b948ecd95a0d4007d7487e5f25" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:8f81dedb4c6076ec325acc3b47525f9c550e5284a18eae1d9061c543f7b6e7de" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313-win_amd64.whl", hash = "sha256:e1ee1b2346ade3ea90306dfbec7e8ff17bc220d344109d189ae09078333b0856" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313-win_arm64.whl", hash = "sha256:64c187345509f2b1bb334feed4666e2c781ca381874bde589182f81247e61f88" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:af81283ac671f434b1b25c95ba295f270e72db1fad48831eb5e4748ff9840041" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:a9dbb6f64f63258bc811e2c0c99640a81e5af93c531ad96e95c5ec777ea46dab" },
+    { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp313-cp313t-win_amd64.whl", hash = "sha256:6d93a7165419bc4b2b907e859ccab0dea5deeab261448ae9a5ec5431f14c0e64" },
 ]
 
 [[package]]
 name = "torchvision"
-version = "0.22.1"
+version = "0.23.0"
 source = { registry = "https://download.pytorch.org/whl/cpu" }
 resolution-markers = [
     "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
@@ -4376,21 +4379,21 @@ resolution-markers = [
 dependencies = [
     { name = "numpy", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or sys_platform == 'darwin'" },
     { name = "pillow", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or sys_platform == 'darwin'" },
-    { name = "torch", version = "2.7.1", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" },
-    { name = "torch", version = "2.7.1+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" },
+    { name = "torch", version = "2.8.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" },
+    { name = "torch", version = "2.8.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" },
 ]
 wheels = [
-    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.22.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:153f1790e505bd6da123e21eee6e83e2e155df05c0fe7d56347303067d8543c5" },
-    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.22.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:964414eef19459d55a10e886e2fca50677550e243586d1678f65e3f6f6bac47a" },
-    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.22.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9c3ae3319624c43cc8127020f46c14aa878406781f0899bb6283ae474afeafbf" },
-    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.22.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:4a614a6a408d2ed74208d0ea6c28a2fbb68290e9a7df206c5fef3f0b6865d307" },
-    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.22.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:043d9e35ed69c2e586aff6eb9e2887382e7863707115668ac9d140da58f42cba" },
-    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.22.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:27142bcc8a984227a6dcf560985e83f52b82a7d3f5fe9051af586a2ccc46ef26" },
+    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.23.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e0e2c04a91403e8dd3af9756c6a024a1d9c0ed9c0d592a8314ded8f4fe30d440" },
+    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.23.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:6dd7c4d329a0e03157803031bc856220c6155ef08c26d4f5bbac938acecf0948" },
+    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.23.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1c37e325e09a184b730c3ef51424f383ec5745378dc0eca244520aca29722600" },
+    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.23.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:2f7fd6c15f3697e80627b77934f77705f3bc0e98278b989b2655de01f6903e1d" },
+    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.23.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:2df618e1143805a7673aaf82cb5720dd9112d4e771983156aaf2ffff692eebf9" },
+    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.23.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:2a3299d2b1d5a7aed2d3b6ffb69c672ca8830671967eb1cee1497bacd82fe47b" },
 ]
 
 [[package]]
 name = "torchvision"
-version = "0.22.1+cpu"
+version = "0.23.0+cpu"
 source = { registry = "https://download.pytorch.org/whl/cpu" }
 resolution-markers = [
     "(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')",
@@ -4399,15 +4402,15 @@ resolution-markers = [
 dependencies = [
     { name = "numpy", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
     { name = "pillow", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
-    { name = "torch", version = "2.7.1+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "torch", version = "2.8.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
 ]
 wheels = [
-    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.22.1%2Bcpu-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b5fa7044bd82c6358e8229351c98070cf3a7bf4a6e89ea46352ae6c65745ef94" },
-    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.22.1%2Bcpu-cp312-cp312-win_amd64.whl", hash = "sha256:433cb4dbced7291f17064cea08ac1e5aebd02ec190e1c207d117ad62a8961f2b" },
-    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.22.1%2Bcpu-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:a93c21f18c33a819616b3dda7655aa4de40b219682c654175b6bbeb65ecc2e5f" },
-    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.22.1%2Bcpu-cp313-cp313-win_amd64.whl", hash = "sha256:34c914ad4728b81848ac802c5fc5eeb8de8ff4058cc59c1463a74ce4f4fbf0d8" },
-    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.22.1%2Bcpu-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:ab7ae82529887c704c1b5d1d5198f65dc777d04fc3858b374503a6deedb82b19" },
-    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.22.1%2Bcpu-cp313-cp313t-win_amd64.whl", hash = "sha256:b2d1c4bdbfd8e6c779dc810a6171b56224f1332fc46986810d4081bed1633804" },
+    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.23.0%2Bcpu-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:ae459d4509d3b837b978dc6c66106601f916b6d2cda75c137e3f5f48324ce1da" },
+    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.23.0%2Bcpu-cp312-cp312-win_amd64.whl", hash = "sha256:a651ccc540cf4c87eb988730c59c2220c52b57adc276f044e7efb9830fa65a1d" },
+    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.23.0%2Bcpu-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:dea90a67d60a5366b0358a0b8d6bf267805278697d6fd950cf0e31139e56d1be" },
+    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.23.0%2Bcpu-cp313-cp313-win_amd64.whl", hash = "sha256:82928788025170c62e7df1120dcdc0cd175bfc31c08374613ce6d1a040bc0cda" },
+    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.23.0%2Bcpu-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:474d77adbbbed5166db3e5636b4b4ae3399c66ef5bfa12536e254b32259c90c0" },
+    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.23.0%2Bcpu-cp313-cp313t-win_amd64.whl", hash = "sha256:8d6a47e23d7896f0ef9aa7ea7179eb6324e82438aa66d19884c2020d0646b104" },
 ]
 
 [[package]]

From 88c4fdc5d7fecd0468815f9eda25d72f722745a6 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 12 Aug 2025 08:44:39 -0700
Subject: [PATCH 16/17] chore(python-deps): bump chromadb from 1.0.15 to 1.0.16
 (#3083)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [chromadb](https://github.com/chroma-core/chroma) from 1.0.15 to
1.0.16.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/chroma-core/chroma/releases">chromadb's
releases</a>.</em></p>
<blockquote>
<h2>1.0.16</h2>
<p>Version: <code>1.0.16</code>
Git ref: <code>refs/tags/1.0.16</code>
Build Date: <code>2025-08-08T00:26</code>
PIP Package: <code>chroma-1.0.16.tar.gz</code>
Github Container Registry Image: <code>:1.0.16</code>
DockerHub Image: <code>:1.0.16</code></p>
<h2>What's Changed</h2>
<ul>
<li>[ENH]: add cache mount &amp; tolerations to garbage collector
template in Helm chart by <a
href="https://github.com/codetheweb"><code>@​codetheweb</code></a> in <a
href="https://redirect.github.com/chroma-core/chroma/pull/5016">chroma-core/chroma#5016</a></li>
<li>[DOC] Fix docs typo by <a
href="https://github.com/itaismith"><code>@​itaismith</code></a> in <a
href="https://redirect.github.com/chroma-core/chroma/pull/5018">chroma-core/chroma#5018</a></li>
<li>[CLN] Change GenericQuotaError from 429 to 422 by <a
href="https://github.com/drewkim"><code>@​drewkim</code></a> in <a
href="https://redirect.github.com/chroma-core/chroma/pull/5022">chroma-core/chroma#5022</a></li>
<li>[CHORE] Fix type error in batch_utils by <a
href="https://github.com/jairad26"><code>@​jairad26</code></a> in <a
href="https://redirect.github.com/chroma-core/chroma/pull/5024">chroma-core/chroma#5024</a></li>
<li>[ENH] Add block-level metrics by <a
href="https://github.com/tanujnay112"><code>@​tanujnay112</code></a> in
<a
href="https://redirect.github.com/chroma-core/chroma/pull/4801">chroma-core/chroma#4801</a></li>
<li>[ENH]: return error on /add if embeddings are not provided by <a
href="https://github.com/codetheweb"><code>@​codetheweb</code></a> in <a
href="https://redirect.github.com/chroma-core/chroma/pull/5033">chroma-core/chroma#5033</a></li>
<li>[DOC] Docs Polish 07/2025 by <a
href="https://github.com/itaismith"><code>@​itaismith</code></a> in <a
href="https://redirect.github.com/chroma-core/chroma/pull/5032">chroma-core/chroma#5032</a></li>
<li>[DOC] Flatten public txt files by <a
href="https://github.com/itaismith"><code>@​itaismith</code></a> in <a
href="https://redirect.github.com/chroma-core/chroma/pull/5040">chroma-core/chroma#5040</a></li>
<li>[ENH]: require embeddings &amp; require min embedding dimension on
/add by <a
href="https://github.com/codetheweb"><code>@​codetheweb</code></a> in <a
href="https://redirect.github.com/chroma-core/chroma/pull/5037">chroma-core/chroma#5037</a></li>
<li>[ENH] - Adds in dark mode support for hero image by <a
href="https://github.com/tjkrusinskichroma"><code>@​tjkrusinskichroma</code></a>
in <a
href="https://redirect.github.com/chroma-core/chroma/pull/5042">chroma-core/chroma#5042</a></li>
<li>[BLD] Use 8core runners for all our windows jobs by <a
href="https://github.com/eculver"><code>@​eculver</code></a> in <a
href="https://redirect.github.com/chroma-core/chroma/pull/5027">chroma-core/chroma#5027</a></li>
<li>[TST] More benchmark queries for regex by <a
href="https://github.com/Sicheng-Pan"><code>@​Sicheng-Pan</code></a> in
<a
href="https://redirect.github.com/chroma-core/chroma/pull/4910">chroma-core/chroma#4910</a></li>
<li>[BUG]: refactor otel/tracing initialization in the frontend to be
independent of hosted entry point by <a
href="https://github.com/c-gamble"><code>@​c-gamble</code></a> in <a
href="https://redirect.github.com/chroma-core/chroma/pull/5028">chroma-core/chroma#5028</a></li>
<li>[BUG] js client: handle 422 billing errors as QuotaExceeded instead
of ChromaConnectionError by <a
href="https://github.com/philipithomas"><code>@​philipithomas</code></a>
in <a
href="https://redirect.github.com/chroma-core/chroma/pull/5049">chroma-core/chroma#5049</a></li>
<li>[BUG] RLS should use 32MB GRPC payload size limit by <a
href="https://github.com/Sicheng-Pan"><code>@​Sicheng-Pan</code></a> in
<a
href="https://redirect.github.com/chroma-core/chroma/pull/5044">chroma-core/chroma#5044</a></li>
<li>[BUG] Sync protoc arch and version in dockerfile by <a
href="https://github.com/Sicheng-Pan"><code>@​Sicheng-Pan</code></a> in
<a
href="https://redirect.github.com/chroma-core/chroma/pull/5045">chroma-core/chroma#5045</a></li>
<li>[BLD] Fix windows runner label by <a
href="https://github.com/eculver"><code>@​eculver</code></a> in <a
href="https://redirect.github.com/chroma-core/chroma/pull/5052">chroma-core/chroma#5052</a></li>
<li>[PERF]: Prefetch segments in get and query by <a
href="https://github.com/sanketkedia"><code>@​sanketkedia</code></a> in
<a
href="https://redirect.github.com/chroma-core/chroma/pull/5053">chroma-core/chroma#5053</a></li>
<li>[PERF]: Parallelize fetching blocks for brute force regex by <a
href="https://github.com/sanketkedia"><code>@​sanketkedia</code></a> in
<a
href="https://redirect.github.com/chroma-core/chroma/pull/5051">chroma-core/chroma#5051</a></li>
<li>[RELEASE] JS 3.0.7 by <a
href="https://github.com/itaismith"><code>@​itaismith</code></a> in <a
href="https://redirect.github.com/chroma-core/chroma/pull/5059">chroma-core/chroma#5059</a></li>
<li>[ENH] Add a delete_many call to the storage API. by <a
href="https://github.com/rescrv"><code>@​rescrv</code></a> in <a
href="https://redirect.github.com/chroma-core/chroma/pull/5020">chroma-core/chroma#5020</a></li>
<li>[ENH] Consume delete_many from the wal3 garbage collector. by <a
href="https://github.com/rescrv"><code>@​rescrv</code></a> in <a
href="https://redirect.github.com/chroma-core/chroma/pull/5021">chroma-core/chroma#5021</a></li>
<li>[ENH]: limit number of concurrent get_all_block_ids() when using
buffer_unordered() by <a
href="https://github.com/codetheweb"><code>@​codetheweb</code></a> in <a
href="https://redirect.github.com/chroma-core/chroma/pull/5062">chroma-core/chroma#5062</a></li>
<li>[ENH]: use new <code>delete_many()</code> storage method in
DeleteUnusedFiles operator by <a
href="https://github.com/codetheweb"><code>@​codetheweb</code></a> in <a
href="https://redirect.github.com/chroma-core/chroma/pull/5061">chroma-core/chroma#5061</a></li>
<li>[BUG]: Disable aws stalled stream protection by <a
href="https://github.com/tanujnay112"><code>@​tanujnay112</code></a> in
<a
href="https://redirect.github.com/chroma-core/chroma/pull/5063">chroma-core/chroma#5063</a></li>
<li>[DOC] Update manage collections docs with correct delete collection
info by <a
href="https://github.com/jairad26"><code>@​jairad26</code></a> in <a
href="https://redirect.github.com/chroma-core/chroma/pull/5066">chroma-core/chroma#5066</a></li>
<li>[BUG] Improve wal3 robustness with better shutdown handling and
error recovery by <a
href="https://github.com/rescrv"><code>@​rescrv</code></a> in <a
href="https://redirect.github.com/chroma-core/chroma/pull/5046">chroma-core/chroma#5046</a></li>
<li>[ENH] Do not do any mutations of the manifest from within GC. by <a
href="https://github.com/rescrv"><code>@​rescrv</code></a> in <a
href="https://redirect.github.com/chroma-core/chroma/pull/5050">chroma-core/chroma#5050</a></li>
<li>[CHORE]: enable change notifier otel/tracing by <a
href="https://github.com/c-gamble"><code>@​c-gamble</code></a> in <a
href="https://redirect.github.com/chroma-core/chroma/pull/5073">chroma-core/chroma#5073</a></li>
<li>[CHORE] Add pprof server to query service by <a
href="https://github.com/eculver"><code>@​eculver</code></a> in <a
href="https://redirect.github.com/chroma-core/chroma/pull/5072">chroma-core/chroma#5072</a></li>
<li>[ENH]: Dedup inserts to the same key in foyer by <a
href="https://github.com/sanketkedia"><code>@​sanketkedia</code></a> in
<a
href="https://redirect.github.com/chroma-core/chroma/pull/5074">chroma-core/chroma#5074</a></li>
<li>[ENH] &quot;Failed to fetch: status: NotFound&quot; be gone. by <a
href="https://github.com/rescrv"><code>@​rescrv</code></a> in <a
href="https://redirect.github.com/chroma-core/chroma/pull/5064">chroma-core/chroma#5064</a></li>
<li>[CLN] Remove the the top most spammy log lines from rls/wal3. by <a
href="https://github.com/rescrv"><code>@​rescrv</code></a> in <a
href="https://redirect.github.com/chroma-core/chroma/pull/5071">chroma-core/chroma#5071</a></li>
<li>[DOC] Fix badge in readme by <a
href="https://github.com/kylediaz"><code>@​kylediaz</code></a> in <a
href="https://redirect.github.com/chroma-core/chroma/pull/5025">chroma-core/chroma#5025</a></li>
<li>[ENH] A tool for patching logs that were deleted before a new
manifest was installed. by <a
href="https://github.com/rescrv"><code>@​rescrv</code></a> in <a
href="https://redirect.github.com/chroma-core/chroma/pull/5083">chroma-core/chroma#5083</a></li>
<li>[BUG] Add billing errors to JS client by <a
href="https://github.com/itaismith"><code>@​itaismith</code></a> in <a
href="https://redirect.github.com/chroma-core/chroma/pull/5084">chroma-core/chroma#5084</a></li>
<li>[CHORE]: Add s3 get metrics and pod name to tracing spans by <a
href="https://github.com/tanujnay112"><code>@​tanujnay112</code></a> in
<a
href="https://redirect.github.com/chroma-core/chroma/pull/5086">chroma-core/chroma#5086</a></li>
<li>[RELEASE] JS 3.0.8 by <a
href="https://github.com/itaismith"><code>@​itaismith</code></a> in <a
href="https://redirect.github.com/chroma-core/chroma/pull/5087">chroma-core/chroma#5087</a></li>
<li>[ENH] A tool to purge the cache. by <a
href="https://github.com/rescrv"><code>@​rescrv</code></a> in <a
href="https://redirect.github.com/chroma-core/chroma/pull/5085">chroma-core/chroma#5085</a></li>
<li>[DOC] Update PR template for migration and observability by <a
href="https://github.com/HammadB"><code>@​HammadB</code></a> in <a
href="https://redirect.github.com/chroma-core/chroma/pull/5089">chroma-core/chroma#5089</a></li>
<li>[CHORE]: Fix s3 get metric name by <a
href="https://github.com/tanujnay112"><code>@​tanujnay112</code></a> in
<a
href="https://redirect.github.com/chroma-core/chroma/pull/5091">chroma-core/chroma#5091</a></li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/chroma-core/chroma/commit/dff3a786db730da7164b534106ca151da8809060"><code>dff3a78</code></a>
[RELEASE] CLI 1.1.5, Python 1.0.16, JS 3.0.11 (<a
href="https://redirect.github.com/chroma-core/chroma/issues/5227">#5227</a>)</li>
<li><a
href="https://github.com/chroma-core/chroma/commit/f60f932b8df63274ff4fdd6a4139fef494aa2646"><code>f60f932</code></a>
[ENH]: Increase nprobe for smaller collections (<a
href="https://redirect.github.com/chroma-core/chroma/issues/5226">#5226</a>)</li>
<li><a
href="https://github.com/chroma-core/chroma/commit/f593a43b5d944cea6bd0f0ab6575bc39c3df9f90"><code>f593a43</code></a>
[ENH] Add <code>InsertRecordSet</code> to JS client (<a
href="https://redirect.github.com/chroma-core/chroma/issues/5225">#5225</a>)</li>
<li><a
href="https://github.com/chroma-core/chroma/commit/76a14c226aa2740107a186de33ad9ec6537b060c"><code>76a14c2</code></a>
[DOC] Made light/dark mode for Chroma logo (<a
href="https://redirect.github.com/chroma-core/chroma/issues/5215">#5215</a>)</li>
<li><a
href="https://github.com/chroma-core/chroma/commit/d80817ede4b650ef2a125ba4171812d8c5b815bb"><code>d80817e</code></a>
[ENH]: Add more tracing in the filter path (<a
href="https://redirect.github.com/chroma-core/chroma/issues/5219">#5219</a>)</li>
<li><a
href="https://github.com/chroma-core/chroma/commit/73abfdc51ad4c22486e0bed5e63db1d22a52487d"><code>73abfdc</code></a>
[ENH] Handle when the garbage doesn't overlap the manifest. (<a
href="https://redirect.github.com/chroma-core/chroma/issues/5207">#5207</a>)</li>
<li><a
href="https://github.com/chroma-core/chroma/commit/fa392226ba537257fbf7ad85ebc1b57bebfa035a"><code>fa39222</code></a>
[BUG] Revert accidentally commited code (<a
href="https://redirect.github.com/chroma-core/chroma/issues/5205">#5205</a>)</li>
<li><a
href="https://github.com/chroma-core/chroma/commit/815c3ac561b207bad30bdf374ad9291e1bf5b0d1"><code>815c3ac</code></a>
[ENH]: Fix CI flake with adaptive nsearch (<a
href="https://redirect.github.com/chroma-core/chroma/issues/5203">#5203</a>)</li>
<li><a
href="https://github.com/chroma-core/chroma/commit/ea66d6929ccc0076ec5649c198d4e888137eedb4"><code>ea66d69</code></a>
[BUG] Switch to rust-tls (<a
href="https://redirect.github.com/chroma-core/chroma/issues/5204">#5204</a>)</li>
<li><a
href="https://github.com/chroma-core/chroma/commit/04aeb2213989878260efd7d53063c7e664c202ca"><code>04aeb22</code></a>
[ENH]: Calculate cache weight of block size instead of hardcoding (<a
href="https://redirect.github.com/chroma-core/chroma/issues/5201">#5201</a>)</li>
<li>Additional commits viewable in <a
href="https://github.com/chroma-core/chroma/compare/1.0.15...1.0.16">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=chromadb&package-manager=uv&previous-version=1.0.15&new-version=1.0.16)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 uv.lock | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/uv.lock b/uv.lock
index caafa1197..9f4ba4adb 100644
--- a/uv.lock
+++ b/uv.lock
@@ -476,7 +476,7 @@ wheels = [
 
 [[package]]
 name = "chromadb"
-version = "1.0.15"
+version = "1.0.16"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "bcrypt" },
@@ -507,13 +507,13 @@ dependencies = [
     { name = "typing-extensions" },
     { name = "uvicorn", extra = ["standard"] },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/ad/e2/0653b2e539db5512d2200c759f1bc7f9ef5609fe47f3c7d24b82f62dc00f/chromadb-1.0.15.tar.gz", hash = "sha256:3e910da3f5414e2204f89c7beca1650847f2bf3bd71f11a2e40aad1eb31050aa", size = 1218840, upload-time = "2025-07-02T17:07:09.875Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/15/2a/5b7e793d2a27c425e9f1813e9cb965b70e9bda08b69ee15a10e07dc3e59a/chromadb-1.0.16.tar.gz", hash = "sha256:3c864b5beb5e131bdc1f83c0b63a01ec481c6ee52028f088563ffba8478478e1", size = 1241545, upload-time = "2025-08-08T00:25:41.414Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/85/5a/866c6f0c2160cbc8dca0cf77b2fb391dcf435b32a58743da1bc1a08dc442/chromadb-1.0.15-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:51791553014297798b53df4e043e9c30f4e8bd157647971a6bb02b04bfa65f82", size = 18838820, upload-time = "2025-07-02T17:07:07.632Z" },
-    { url = "https://files.pythonhosted.org/packages/e1/18/ff9b58ab5d334f5ecff7fdbacd6761bac467176708fa4d2500ae7c048af0/chromadb-1.0.15-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:48015803c0631c3a817befc276436dc084bb628c37fd4214047212afb2056291", size = 18057131, upload-time = "2025-07-02T17:07:05.15Z" },
-    { url = "https://files.pythonhosted.org/packages/31/49/74e34cc5aeeb25aff2c0ede6790b3671e14c1b91574dd8f98d266a4c5aad/chromadb-1.0.15-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3b73cd6fb32fcdd91c577cca16ea6112b691d72b441bb3f2140426d1e79e453a", size = 18595284, upload-time = "2025-07-02T17:06:59.102Z" },
-    { url = "https://files.pythonhosted.org/packages/cb/33/190df917a057067e37f8b48d082d769bed8b3c0c507edefc7b6c6bb577d0/chromadb-1.0.15-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:479f1b401af9e7c20f50642ffb3376abbfd78e2b5b170429f7c79eff52e367db", size = 19526626, upload-time = "2025-07-02T17:07:02.163Z" },
-    { url = "https://files.pythonhosted.org/packages/a1/30/6890da607358993f87a01e80bcce916b4d91515ce865f07dc06845cb472f/chromadb-1.0.15-cp39-abi3-win_amd64.whl", hash = "sha256:e0cb3b93fdc42b1786f151d413ef36299f30f783a30ce08bf0bfb12e552b4190", size = 19520490, upload-time = "2025-07-02T17:07:11.559Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/9d/bffcc814272c9b7982551803b2d45b77f39eeea1b9e965c00c05ee81c649/chromadb-1.0.16-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:144163ce7ca4f4448684d5d0c13ebb37c4d68490ecb60967a95d05cea30e0d2d", size = 18942157, upload-time = "2025-08-08T00:25:38.459Z" },
+    { url = "https://files.pythonhosted.org/packages/58/4e/de0086f3cbcfd667d75d112bb546386803ab5335599bf7099272a675e98b/chromadb-1.0.16-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:4ebcc5894e6fbb6b576452bbf4659746bfe58d9daf99a18363364e9497434bd2", size = 18147831, upload-time = "2025-08-08T00:25:35.546Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/7f/a8aff4ce96281bcb9731d10b2554f41963dd0b47acb4f90a78b2b7c4f199/chromadb-1.0.16-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:937051fc3aae94f7c171503d8f1f7662820aacc75acf45f28d3656c75c5ff1f8", size = 18682195, upload-time = "2025-08-08T00:25:29.654Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/9c/2a97d0257176aae472dff6f1ef1b7050449f384e420120e0f31d2d8f532f/chromadb-1.0.16-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0f5c5ad0c59154a9cab1506b857bab8487b588352e668cf1222c54bb9d52daa", size = 19635695, upload-time = "2025-08-08T00:25:32.68Z" },
+    { url = "https://files.pythonhosted.org/packages/96/8a/f7e810f3cbdc9186ba4e649dc32711b7ab2c23aba37cf61175f731d22293/chromadb-1.0.16-cp39-abi3-win_amd64.whl", hash = "sha256:2528c01bd8b3facca9d0e1ffac866767c386b94604df484fc792ee891c86e09a", size = 19641144, upload-time = "2025-08-08T00:25:43.446Z" },
 ]
 
 [[package]]

From 6812aa1e1e6aad77706f1e063fd4ed6603cf9871 Mon Sep 17 00:00:00 2001
From: Nathan Weinberg <31703736+nathan-weinberg@users.noreply.github.com>
Date: Tue, 12 Aug 2025 11:52:57 -0400
Subject: [PATCH 17/17] chore: bump min python version in docs and tests
 (#3103)

# What does this PR do?
the minimum python version for the project was bumped to 3.12 a couple
months ago, but there remains some artifacts in the repo suggesting we
support >=3.10

Signed-off-by: Nathan Weinberg <nweinber@redhat.com>
---
 docs/source/apis/external.md                               | 4 ++--
 docs/source/providers/external/external-providers-guide.md | 2 +-
 tests/external/llama-stack-api-weather/pyproject.toml      | 2 +-
 tests/external/llama-stack-provider-kaze/pyproject.toml    | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/source/apis/external.md b/docs/source/apis/external.md
index cc13deb9b..5831990b0 100644
--- a/docs/source/apis/external.md
+++ b/docs/source/apis/external.md
@@ -111,7 +111,7 @@ name = "llama-stack-api-weather"
 version = "0.1.0"
 description = "Weather API for Llama Stack"
 readme = "README.md"
-requires-python = ">=3.10"
+requires-python = ">=3.12"
 dependencies = ["llama-stack", "pydantic"]
 
 [build-system]
@@ -231,7 +231,7 @@ name = "llama-stack-provider-kaze"
 version = "0.1.0"
 description = "Kaze weather provider for Llama Stack"
 readme = "README.md"
-requires-python = ">=3.10"
+requires-python = ">=3.12"
 dependencies = ["llama-stack", "pydantic", "aiohttp"]
 
 [build-system]
diff --git a/docs/source/providers/external/external-providers-guide.md b/docs/source/providers/external/external-providers-guide.md
index 2479d406f..e2d4ebea9 100644
--- a/docs/source/providers/external/external-providers-guide.md
+++ b/docs/source/providers/external/external-providers-guide.md
@@ -226,7 +226,7 @@ uv init
 name = "llama-stack-provider-ollama"
 version = "0.1.0"
 description = "Ollama provider for Llama Stack"
-requires-python = ">=3.10"
+requires-python = ">=3.12"
 dependencies = ["llama-stack", "pydantic", "ollama", "aiohttp"]
 ```
 
diff --git a/tests/external/llama-stack-api-weather/pyproject.toml b/tests/external/llama-stack-api-weather/pyproject.toml
index 566e1e9aa..ac2d8d632 100644
--- a/tests/external/llama-stack-api-weather/pyproject.toml
+++ b/tests/external/llama-stack-api-weather/pyproject.toml
@@ -3,7 +3,7 @@ name = "llama-stack-api-weather"
 version = "0.1.0"
 description = "Weather API for Llama Stack"
 readme = "README.md"
-requires-python = ">=3.10"
+requires-python = ">=3.12"
 dependencies = ["llama-stack", "pydantic"]
 
 [build-system]
diff --git a/tests/external/llama-stack-provider-kaze/pyproject.toml b/tests/external/llama-stack-provider-kaze/pyproject.toml
index 7bbf1f843..e2438a18a 100644
--- a/tests/external/llama-stack-provider-kaze/pyproject.toml
+++ b/tests/external/llama-stack-provider-kaze/pyproject.toml
@@ -3,7 +3,7 @@ name = "llama-stack-provider-kaze"
 version = "0.1.0"
 description = "Kaze weather provider for Llama Stack"
 readme = "README.md"
-requires-python = ">=3.10"
+requires-python = ">=3.12"
 dependencies = ["llama-stack", "pydantic", "aiohttp"]
 
 [build-system]