From f2b83800cc0a00b2f2ca8b3b0d246868f166a5fb Mon Sep 17 00:00:00 2001
From: Yuan Tang <terrytangyuan@gmail.com>
Date: Sat, 10 May 2025 21:32:44 -0400
Subject: [PATCH 1/5] docs: Add link to Discord to README (#2126)

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index b2b2d12d9..5dfe3577a 100644
--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@
 [![Unit Tests](https://github.com/meta-llama/llama-stack/actions/workflows/unit-tests.yml/badge.svg?branch=main)](https://github.com/meta-llama/llama-stack/actions/workflows/unit-tests.yml?query=branch%3Amain)
 [![Integration Tests](https://github.com/meta-llama/llama-stack/actions/workflows/integration-tests.yml/badge.svg?branch=main)](https://github.com/meta-llama/llama-stack/actions/workflows/integration-tests.yml?query=branch%3Amain)
 
-[**Quick Start**](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html) | [**Documentation**](https://llama-stack.readthedocs.io/en/latest/index.html) | [**Colab Notebook**](./docs/getting_started.ipynb)
+[**Quick Start**](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html) | [**Documentation**](https://llama-stack.readthedocs.io/en/latest/index.html) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack)
 
 ### ✨🎉 Llama 4 Support  🎉✨
 We released [Version 0.2.0](https://github.com/meta-llama/llama-stack/releases/tag/v0.2.0) with support for the Llama 4 herd of models released by Meta.

From dd7be274b95af4b775fef9f0ebfb40bbc1e206c9 Mon Sep 17 00:00:00 2001
From: Ilya Kolchinsky <58424190+ilya-kolchinsky@users.noreply.github.com>
Date: Mon, 12 May 2025 11:25:13 +0200
Subject: [PATCH 2/5] fix: raise an error when no vector DB IDs are provided to
 the RAG tool (#1911)

# What does this PR do?
This PR fixes the behavior of the `/tool-runtime/rag-tool/query`
endpoint when invoked with an empty `vector_db_ids` parameter.
As of now, it simply returns an empty result, which leads to a
misleading error message from the server and makes it difficult and
time-consuming to detect the problem with the input parameter.
The proposed fix is to return an indicative error message in this case.


## Test Plan
Running the following script:
```
agent = Agent(
    client,
    model=MODEL_ID,
    instructions=SYSTEM_PROMPT,
    tools=[
        dict(
            name="builtin::rag/knowledge_search",
            args={
                "vector_db_ids": [],
            },
        )
    ],
)

response = agent.create_turn(
    messages=[
        {
            "role": "user",
            "content": "How to install OpenShift?",
        }
    ],
    session_id=agent.create_session(f"rag-session")
)
```
results in the following error message in the non-patched version:
```
{"type": "function", "name": "knowledge_search", "parameters": {"query": "installing OpenShift"}}400: Invalid value: Tool call result (id: 494b8020-90bb-449b-aa76-10960d6b2cc2, name: knowledge_search) does not have any content
```
and in the following one in the patched version:
```
{"type": "function", "name": "knowledge_search", "parameters": {"query": "installing OpenShift"}}400: Invalid value: No vector DBs were provided to the RAG tool. Please provide at least one DB.
```
---
 .../inline/tool_runtime/rag/memory.py         |  4 +++-
 tests/unit/rag/test_rag_query.py              | 19 +++++++++++++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)
 create mode 100644 tests/unit/rag/test_rag_query.py

diff --git a/llama_stack/providers/inline/tool_runtime/rag/memory.py b/llama_stack/providers/inline/tool_runtime/rag/memory.py
index df0257718..968f93354 100644
--- a/llama_stack/providers/inline/tool_runtime/rag/memory.py
+++ b/llama_stack/providers/inline/tool_runtime/rag/memory.py
@@ -105,7 +105,9 @@ class MemoryToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime, RAGToolRuntime):
         query_config: RAGQueryConfig | None = None,
     ) -> RAGQueryResult:
         if not vector_db_ids:
-            return RAGQueryResult(content=None)
+            raise ValueError(
+                "No vector DBs were provided to the knowledge search tool. Please provide at least one vector DB ID."
+            )
 
         query_config = query_config or RAGQueryConfig()
         query = await generate_rag_query(
diff --git a/tests/unit/rag/test_rag_query.py b/tests/unit/rag/test_rag_query.py
new file mode 100644
index 000000000..b9fd8cca4
--- /dev/null
+++ b/tests/unit/rag/test_rag_query.py
@@ -0,0 +1,19 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from unittest.mock import MagicMock
+
+import pytest
+
+from llama_stack.providers.inline.tool_runtime.rag.memory import MemoryToolRuntimeImpl
+
+
+class TestRagQuery:
+    @pytest.mark.asyncio
+    async def test_query_raises_on_empty_vector_db_ids(self):
+        rag_tool = MemoryToolRuntimeImpl(config=MagicMock(), vector_io_api=MagicMock(), inference_api=MagicMock())
+        with pytest.raises(ValueError):
+            await rag_tool.query(content=MagicMock(), vector_db_ids=[])

From db21eab713a194d404f949bd69c3f276abd0f517 Mon Sep 17 00:00:00 2001
From: Ihar Hrachyshka <ihar.hrachyshka@gmail.com>
Date: Mon, 12 May 2025 05:49:59 -0400
Subject: [PATCH 3/5] fix: catch TimeoutError in place of asyncio.TimeoutError
 (#2131)

# What does this PR do?

As per docs [1], since python 3.11 wait_for() raises TimeoutError. Since
we currently support python 3.10+, we have to catch both.

[1]:
https://docs.python.org/3.12/library/asyncio-task.html#asyncio.wait_for

[//]: # (If resolving an issue, uncomment and update the line below)
[//]: # (Closes #[issue-number])

## Test Plan

No explicit testing; just code hardening to reflect docs.

[//]: # (## Documentation)

Signed-off-by: Ihar Hrachyshka <ihar.hrachyshka@gmail.com>
---
 llama_stack/distribution/providers.py       | 2 +-
 llama_stack/distribution/routers/routers.py | 2 +-
 llama_stack/distribution/server/server.py   | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/llama_stack/distribution/providers.py b/llama_stack/distribution/providers.py
index 157fd1e0e..29b7109dd 100644
--- a/llama_stack/distribution/providers.py
+++ b/llama_stack/distribution/providers.py
@@ -99,7 +99,7 @@ class ProviderImpl(Providers):
             try:
                 health = await asyncio.wait_for(impl.health(), timeout=timeout)
                 return api_name, health
-            except asyncio.TimeoutError:
+            except (asyncio.TimeoutError, TimeoutError):
                 return (
                     api_name,
                     HealthResponse(
diff --git a/llama_stack/distribution/routers/routers.py b/llama_stack/distribution/routers/routers.py
index 7e80a067f..371d34904 100644
--- a/llama_stack/distribution/routers/routers.py
+++ b/llama_stack/distribution/routers/routers.py
@@ -630,7 +630,7 @@ class InferenceRouter(Inference):
                     continue
                 health = await asyncio.wait_for(impl.health(), timeout=timeout)
                 health_statuses[provider_id] = health
-            except asyncio.TimeoutError:
+            except (asyncio.TimeoutError, TimeoutError):
                 health_statuses[provider_id] = HealthResponse(
                     status=HealthStatus.ERROR,
                     message=f"Health check timed out after {timeout} seconds",
diff --git a/llama_stack/distribution/server/server.py b/llama_stack/distribution/server/server.py
index 85c576753..e34a62b00 100644
--- a/llama_stack/distribution/server/server.py
+++ b/llama_stack/distribution/server/server.py
@@ -114,7 +114,7 @@ def translate_exception(exc: Exception) -> HTTPException | RequestValidationErro
         return HTTPException(status_code=400, detail=str(exc))
     elif isinstance(exc, PermissionError):
         return HTTPException(status_code=403, detail=f"Permission denied: {str(exc)}")
-    elif isinstance(exc, TimeoutError):
+    elif isinstance(exc, asyncio.TimeoutError | TimeoutError):
         return HTTPException(status_code=504, detail=f"Operation timed out: {str(exc)}")
     elif isinstance(exc, NotImplementedError):
         return HTTPException(status_code=501, detail=f"Not implemented: {str(exc)}")
@@ -139,7 +139,7 @@ async def shutdown(app):
                 await asyncio.wait_for(impl.shutdown(), timeout=5)
             else:
                 logger.warning("No shutdown method for %s", impl_name)
-        except asyncio.TimeoutError:
+        except (asyncio.TimeoutError, TimeoutError):
             logger.exception("Shutdown timeout for %s ", impl_name, exc_info=True)
         except (Exception, asyncio.CancelledError) as e:
             logger.exception("Failed to shutdown %s: %s", impl_name, {e})

From 9a6e91cd931cc4d7a738bb1b8c968f0631b75fbd Mon Sep 17 00:00:00 2001
From: Matthew Farrellee <matt@cs.wisc.edu>
Date: Mon, 12 May 2025 09:27:01 -0400
Subject: [PATCH 4/5] fix: chromadb type hint (#2136)

```
$ INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
  CHROMADB_URL=http://localhost:8000 \
  llama stack build --image-type conda --image-name llama \
    --providers vector_io=remote::chromadb,inference=remote::ollama \
    --run
...
  File ".../llama_stack/providers/remote/vector_io/chroma/chroma.py", line 31, in <module>
    ChromaClientType = chromadb.AsyncHttpClient | chromadb.PersistentClient
TypeError: unsupported operand type(s) for |: 'function' and 'function'
```

issue: AsyncHttpClient and PersistentClient are functions that return
AsyncClientAPI and ClientAPI types, respectively. | cannot be used to
construct a type from functions.

previously the code was Union[AsyncHttpClient, PersistentClient], which
did not trigger an error

# What does this PR do?

Closes #2135
---
 llama_stack/providers/remote/vector_io/chroma/chroma.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llama_stack/providers/remote/vector_io/chroma/chroma.py b/llama_stack/providers/remote/vector_io/chroma/chroma.py
index 5381a48ef..a919963ab 100644
--- a/llama_stack/providers/remote/vector_io/chroma/chroma.py
+++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py
@@ -26,8 +26,7 @@ from .config import ChromaVectorIOConfig as RemoteChromaVectorIOConfig
 
 log = logging.getLogger(__name__)
 
-
-ChromaClientType = chromadb.AsyncHttpClient | chromadb.PersistentClient
+ChromaClientType = chromadb.api.AsyncClientAPI | chromadb.api.ClientAPI
 
 
 # this is a helper to allow us to use async and non-async chroma clients interchangeably

From 675f34e79dcf5d5936a28776a9d000c911769430 Mon Sep 17 00:00:00 2001
From: Krzysztof Malczuk <2000krzysztof@gmail.com>
Date: Mon, 12 May 2025 16:05:40 +0100
Subject: [PATCH 5/5] fix: Syntax error with missing stubs at the end of some
 function calls (#2116)

# What does this PR do?
This PR adds stubs to the end of functions create_agent_turn,
create_openai_response and job_result.

[//]: # (If resolving an issue, uncomment and update the line below)
[//]: # (Closes #[issue-number])

## Test Plan
Ran provided unit tests

[//]: # (## Documentation)
---
 llama_stack/apis/agents/agents.py | 2 ++
 llama_stack/apis/eval/eval.py     | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/llama_stack/apis/agents/agents.py b/llama_stack/apis/agents/agents.py
index 91e57dbbe..f4367d09b 100644
--- a/llama_stack/apis/agents/agents.py
+++ b/llama_stack/apis/agents/agents.py
@@ -415,6 +415,7 @@ class Agents(Protocol):
         :returns: If stream=False, returns a Turn object.
                   If stream=True, returns an SSE event stream of AgentTurnResponseStreamChunk
         """
+        ...
 
     @webmethod(
         route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume",
@@ -606,3 +607,4 @@ class Agents(Protocol):
         :param model: The underlying LLM used for completions.
         :param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses.
         """
+        ...
diff --git a/llama_stack/apis/eval/eval.py b/llama_stack/apis/eval/eval.py
index 23ca89a94..38699d3f5 100644
--- a/llama_stack/apis/eval/eval.py
+++ b/llama_stack/apis/eval/eval.py
@@ -95,6 +95,7 @@ class Eval(Protocol):
         :param benchmark_config: The configuration for the benchmark.
         :return: The job that was created to run the evaluation.
         """
+        ...
 
     @webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST")
     async def evaluate_rows(
@@ -112,6 +113,7 @@ class Eval(Protocol):
         :param benchmark_config: The configuration for the benchmark.
         :return: EvaluateResponse object containing generations and scores
         """
+        ...
 
     @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET")
     async def job_status(self, benchmark_id: str, job_id: str) -> Job:
@@ -140,3 +142,4 @@ class Eval(Protocol):
         :param job_id: The ID of the job to get the result of.
         :return: The result of the job.
         """
+        ...