From f2b83800cc0a00b2f2ca8b3b0d246868f166a5fb Mon Sep 17 00:00:00 2001 From: Yuan Tang Date: Sat, 10 May 2025 21:32:44 -0400 Subject: [PATCH 1/5] docs: Add link to Discord to README (#2126) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b2b2d12d9..5dfe3577a 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ [![Unit Tests](https://github.com/meta-llama/llama-stack/actions/workflows/unit-tests.yml/badge.svg?branch=main)](https://github.com/meta-llama/llama-stack/actions/workflows/unit-tests.yml?query=branch%3Amain) [![Integration Tests](https://github.com/meta-llama/llama-stack/actions/workflows/integration-tests.yml/badge.svg?branch=main)](https://github.com/meta-llama/llama-stack/actions/workflows/integration-tests.yml?query=branch%3Amain) -[**Quick Start**](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html) | [**Documentation**](https://llama-stack.readthedocs.io/en/latest/index.html) | [**Colab Notebook**](./docs/getting_started.ipynb) +[**Quick Start**](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html) | [**Documentation**](https://llama-stack.readthedocs.io/en/latest/index.html) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack) ### ✨🎉 Llama 4 Support 🎉✨ We released [Version 0.2.0](https://github.com/meta-llama/llama-stack/releases/tag/v0.2.0) with support for the Llama 4 herd of models released by Meta. From dd7be274b95af4b775fef9f0ebfb40bbc1e206c9 Mon Sep 17 00:00:00 2001 From: Ilya Kolchinsky <58424190+ilya-kolchinsky@users.noreply.github.com> Date: Mon, 12 May 2025 11:25:13 +0200 Subject: [PATCH 2/5] fix: raise an error when no vector DB IDs are provided to the RAG tool (#1911) # What does this PR do? This PR fixes the behavior of the `/tool-runtime/rag-tool/query` endpoint when invoked with an empty `vector_db_ids` parameter. As of now, it simply returns an empty result, which leads to a misleading error message from the server and makes it difficult and time-consuming to detect the problem with the input parameter. The proposed fix is to return an indicative error message in this case. ## Test Plan Running the following script: ``` agent = Agent( client, model=MODEL_ID, instructions=SYSTEM_PROMPT, tools=[ dict( name="builtin::rag/knowledge_search", args={ "vector_db_ids": [], }, ) ], ) response = agent.create_turn( messages=[ { "role": "user", "content": "How to install OpenShift?", } ], session_id=agent.create_session(f"rag-session") ) ``` results in the following error message in the non-patched version: ``` {"type": "function", "name": "knowledge_search", "parameters": {"query": "installing OpenShift"}}400: Invalid value: Tool call result (id: 494b8020-90bb-449b-aa76-10960d6b2cc2, name: knowledge_search) does not have any content ``` and in the following one in the patched version: ``` {"type": "function", "name": "knowledge_search", "parameters": {"query": "installing OpenShift"}}400: Invalid value: No vector DBs were provided to the RAG tool. Please provide at least one DB. ``` --- .../inline/tool_runtime/rag/memory.py | 4 +++- tests/unit/rag/test_rag_query.py | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 tests/unit/rag/test_rag_query.py diff --git a/llama_stack/providers/inline/tool_runtime/rag/memory.py b/llama_stack/providers/inline/tool_runtime/rag/memory.py index df0257718..968f93354 100644 --- a/llama_stack/providers/inline/tool_runtime/rag/memory.py +++ b/llama_stack/providers/inline/tool_runtime/rag/memory.py @@ -105,7 +105,9 @@ class MemoryToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime, RAGToolRuntime): query_config: RAGQueryConfig | None = None, ) -> RAGQueryResult: if not vector_db_ids: - return RAGQueryResult(content=None) + raise ValueError( + "No vector DBs were provided to the knowledge search tool. Please provide at least one vector DB ID." + ) query_config = query_config or RAGQueryConfig() query = await generate_rag_query( diff --git a/tests/unit/rag/test_rag_query.py b/tests/unit/rag/test_rag_query.py new file mode 100644 index 000000000..b9fd8cca4 --- /dev/null +++ b/tests/unit/rag/test_rag_query.py @@ -0,0 +1,19 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from unittest.mock import MagicMock + +import pytest + +from llama_stack.providers.inline.tool_runtime.rag.memory import MemoryToolRuntimeImpl + + +class TestRagQuery: + @pytest.mark.asyncio + async def test_query_raises_on_empty_vector_db_ids(self): + rag_tool = MemoryToolRuntimeImpl(config=MagicMock(), vector_io_api=MagicMock(), inference_api=MagicMock()) + with pytest.raises(ValueError): + await rag_tool.query(content=MagicMock(), vector_db_ids=[]) From db21eab713a194d404f949bd69c3f276abd0f517 Mon Sep 17 00:00:00 2001 From: Ihar Hrachyshka Date: Mon, 12 May 2025 05:49:59 -0400 Subject: [PATCH 3/5] fix: catch TimeoutError in place of asyncio.TimeoutError (#2131) # What does this PR do? As per docs [1], since python 3.11 wait_for() raises TimeoutError. Since we currently support python 3.10+, we have to catch both. [1]: https://docs.python.org/3.12/library/asyncio-task.html#asyncio.wait_for [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan No explicit testing; just code hardening to reflect docs. [//]: # (## Documentation) Signed-off-by: Ihar Hrachyshka --- llama_stack/distribution/providers.py | 2 +- llama_stack/distribution/routers/routers.py | 2 +- llama_stack/distribution/server/server.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/llama_stack/distribution/providers.py b/llama_stack/distribution/providers.py index 157fd1e0e..29b7109dd 100644 --- a/llama_stack/distribution/providers.py +++ b/llama_stack/distribution/providers.py @@ -99,7 +99,7 @@ class ProviderImpl(Providers): try: health = await asyncio.wait_for(impl.health(), timeout=timeout) return api_name, health - except asyncio.TimeoutError: + except (asyncio.TimeoutError, TimeoutError): return ( api_name, HealthResponse( diff --git a/llama_stack/distribution/routers/routers.py b/llama_stack/distribution/routers/routers.py index 7e80a067f..371d34904 100644 --- a/llama_stack/distribution/routers/routers.py +++ b/llama_stack/distribution/routers/routers.py @@ -630,7 +630,7 @@ class InferenceRouter(Inference): continue health = await asyncio.wait_for(impl.health(), timeout=timeout) health_statuses[provider_id] = health - except asyncio.TimeoutError: + except (asyncio.TimeoutError, TimeoutError): health_statuses[provider_id] = HealthResponse( status=HealthStatus.ERROR, message=f"Health check timed out after {timeout} seconds", diff --git a/llama_stack/distribution/server/server.py b/llama_stack/distribution/server/server.py index 85c576753..e34a62b00 100644 --- a/llama_stack/distribution/server/server.py +++ b/llama_stack/distribution/server/server.py @@ -114,7 +114,7 @@ def translate_exception(exc: Exception) -> HTTPException | RequestValidationErro return HTTPException(status_code=400, detail=str(exc)) elif isinstance(exc, PermissionError): return HTTPException(status_code=403, detail=f"Permission denied: {str(exc)}") - elif isinstance(exc, TimeoutError): + elif isinstance(exc, asyncio.TimeoutError | TimeoutError): return HTTPException(status_code=504, detail=f"Operation timed out: {str(exc)}") elif isinstance(exc, NotImplementedError): return HTTPException(status_code=501, detail=f"Not implemented: {str(exc)}") @@ -139,7 +139,7 @@ async def shutdown(app): await asyncio.wait_for(impl.shutdown(), timeout=5) else: logger.warning("No shutdown method for %s", impl_name) - except asyncio.TimeoutError: + except (asyncio.TimeoutError, TimeoutError): logger.exception("Shutdown timeout for %s ", impl_name, exc_info=True) except (Exception, asyncio.CancelledError) as e: logger.exception("Failed to shutdown %s: %s", impl_name, {e}) From 9a6e91cd931cc4d7a738bb1b8c968f0631b75fbd Mon Sep 17 00:00:00 2001 From: Matthew Farrellee Date: Mon, 12 May 2025 09:27:01 -0400 Subject: [PATCH 4/5] fix: chromadb type hint (#2136) ``` $ INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ CHROMADB_URL=http://localhost:8000 \ llama stack build --image-type conda --image-name llama \ --providers vector_io=remote::chromadb,inference=remote::ollama \ --run ... File ".../llama_stack/providers/remote/vector_io/chroma/chroma.py", line 31, in ChromaClientType = chromadb.AsyncHttpClient | chromadb.PersistentClient TypeError: unsupported operand type(s) for |: 'function' and 'function' ``` issue: AsyncHttpClient and PersistentClient are functions that return AsyncClientAPI and ClientAPI types, respectively. | cannot be used to construct a type from functions. previously the code was Union[AsyncHttpClient, PersistentClient], which did not trigger an error # What does this PR do? Closes #2135 --- llama_stack/providers/remote/vector_io/chroma/chroma.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llama_stack/providers/remote/vector_io/chroma/chroma.py b/llama_stack/providers/remote/vector_io/chroma/chroma.py index 5381a48ef..a919963ab 100644 --- a/llama_stack/providers/remote/vector_io/chroma/chroma.py +++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py @@ -26,8 +26,7 @@ from .config import ChromaVectorIOConfig as RemoteChromaVectorIOConfig log = logging.getLogger(__name__) - -ChromaClientType = chromadb.AsyncHttpClient | chromadb.PersistentClient +ChromaClientType = chromadb.api.AsyncClientAPI | chromadb.api.ClientAPI # this is a helper to allow us to use async and non-async chroma clients interchangeably From 675f34e79dcf5d5936a28776a9d000c911769430 Mon Sep 17 00:00:00 2001 From: Krzysztof Malczuk <2000krzysztof@gmail.com> Date: Mon, 12 May 2025 16:05:40 +0100 Subject: [PATCH 5/5] fix: Syntax error with missing stubs at the end of some function calls (#2116) # What does this PR do? This PR adds stubs to the end of functions create_agent_turn, create_openai_response and job_result. [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan Ran provided unit tests [//]: # (## Documentation) --- llama_stack/apis/agents/agents.py | 2 ++ llama_stack/apis/eval/eval.py | 3 +++ 2 files changed, 5 insertions(+) diff --git a/llama_stack/apis/agents/agents.py b/llama_stack/apis/agents/agents.py index 91e57dbbe..f4367d09b 100644 --- a/llama_stack/apis/agents/agents.py +++ b/llama_stack/apis/agents/agents.py @@ -415,6 +415,7 @@ class Agents(Protocol): :returns: If stream=False, returns a Turn object. If stream=True, returns an SSE event stream of AgentTurnResponseStreamChunk """ + ... @webmethod( route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume", @@ -606,3 +607,4 @@ class Agents(Protocol): :param model: The underlying LLM used for completions. :param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses. """ + ... diff --git a/llama_stack/apis/eval/eval.py b/llama_stack/apis/eval/eval.py index 23ca89a94..38699d3f5 100644 --- a/llama_stack/apis/eval/eval.py +++ b/llama_stack/apis/eval/eval.py @@ -95,6 +95,7 @@ class Eval(Protocol): :param benchmark_config: The configuration for the benchmark. :return: The job that was created to run the evaluation. """ + ... @webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST") async def evaluate_rows( @@ -112,6 +113,7 @@ class Eval(Protocol): :param benchmark_config: The configuration for the benchmark. :return: EvaluateResponse object containing generations and scores """ + ... @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET") async def job_status(self, benchmark_id: str, job_id: str) -> Job: @@ -140,3 +142,4 @@ class Eval(Protocol): :param job_id: The ID of the job to get the result of. :return: The result of the job. """ + ...