From b5a9ef4c6d9dd2a6d16383107bb9765da66a3faa Mon Sep 17 00:00:00 2001 From: Daniel Alvarez Sanchez Date: Wed, 16 Apr 2025 02:31:12 +0200 Subject: [PATCH] fix: Do not send an empty 'tools' list to remote vllm (#1957) Fixes: #1955 Since 0.2.0, the vLLM gets an empty list (vs ``None``in 0.1.9 and before) when there are no tools configured which causes the issue described in #1955 p. This patch avoids sending the 'tools' param to the vLLM altogether instead of an empty list. It also adds a small unit test to avoid regressions. The OpenAI [specification](https://platform.openai.com/docs/api-reference/chat/create) does not explicitly state that the list cannot be empty but I found this out through experimentation and it might depend on the actual remote vllm. In any case, as this parameter is Optional, is best to skip it altogether if there's no tools configured. Signed-off-by: Daniel Alvarez --- .../providers/remote/inference/vllm/vllm.py | 3 ++- .../providers/inference/test_remote_vllm.py | 18 +++++++++++++++++- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py index 2b9eae1e9..d141afa86 100644 --- a/llama_stack/providers/remote/inference/vllm/vllm.py +++ b/llama_stack/providers/remote/inference/vllm/vllm.py @@ -374,7 +374,8 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate): options["max_tokens"] = self.config.max_tokens input_dict: dict[str, Any] = {} - if isinstance(request, ChatCompletionRequest) and request.tools is not None: + # Only include the 'tools' param if there is any. It can break things if an empty list is sent to the vLLM. + if isinstance(request, ChatCompletionRequest) and request.tools: input_dict = {"tools": _convert_to_vllm_tools_in_request(request.tools)} if isinstance(request, ChatCompletionRequest): diff --git a/tests/unit/providers/inference/test_remote_vllm.py b/tests/unit/providers/inference/test_remote_vllm.py index 9c2281d85..88399198d 100644 --- a/tests/unit/providers/inference/test_remote_vllm.py +++ b/tests/unit/providers/inference/test_remote_vllm.py @@ -26,7 +26,12 @@ from openai.types.chat.chat_completion_chunk import ( ) from openai.types.model import Model as OpenAIModel -from llama_stack.apis.inference import ToolChoice, ToolConfig +from llama_stack.apis.inference import ( + ChatCompletionRequest, + ToolChoice, + ToolConfig, + UserMessage, +) from llama_stack.apis.models import Model from llama_stack.models.llama.datatypes import StopReason from llama_stack.providers.remote.inference.vllm.config import VLLMInferenceAdapterConfig @@ -232,3 +237,14 @@ def test_chat_completion_doesnt_block_event_loop(caplog): # above. asyncio_warnings = [record.message for record in caplog.records if record.name == "asyncio"] assert not asyncio_warnings + + +@pytest.mark.asyncio +async def test_get_params_empty_tools(vllm_inference_adapter): + request = ChatCompletionRequest( + tools=[], + model="test_model", + messages=[UserMessage(content="test")], + ) + params = await vllm_inference_adapter._get_params(request) + assert "tools" not in params