From 538d601472d805c6bceb3ca3831a3526147ce767 Mon Sep 17 00:00:00 2001 From: Daniel Alvarez Date: Tue, 15 Apr 2025 16:06:51 +0200 Subject: [PATCH] Do not send an empty 'tools' param to remote vllm Fixes: #1955 Since 0.2.0, the vLLM gets an empty list (vs ``None`` in 0.1.9 and before) when there are no tools configured which causes the issue described in #1955. This patch avoids sending the 'tools' param to the vLLM altogether instead of an empty list. It also adds a small unit test to avoid regressions. Signed-off-by: Daniel Alvarez --- .../providers/remote/inference/vllm/vllm.py | 3 ++- .../providers/inference/test_remote_vllm.py | 18 +++++++++++++++++- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py index 2b9eae1e9..d141afa86 100644 --- a/llama_stack/providers/remote/inference/vllm/vllm.py +++ b/llama_stack/providers/remote/inference/vllm/vllm.py @@ -374,7 +374,8 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate): options["max_tokens"] = self.config.max_tokens input_dict: dict[str, Any] = {} - if isinstance(request, ChatCompletionRequest) and request.tools is not None: + # Only include the 'tools' param if there is any. It can break things if an empty list is sent to the vLLM. + if isinstance(request, ChatCompletionRequest) and request.tools: input_dict = {"tools": _convert_to_vllm_tools_in_request(request.tools)} if isinstance(request, ChatCompletionRequest): diff --git a/tests/unit/providers/inference/test_remote_vllm.py b/tests/unit/providers/inference/test_remote_vllm.py index 9c2281d85..88399198d 100644 --- a/tests/unit/providers/inference/test_remote_vllm.py +++ b/tests/unit/providers/inference/test_remote_vllm.py @@ -26,7 +26,12 @@ from openai.types.chat.chat_completion_chunk import ( ) from openai.types.model import Model as OpenAIModel -from llama_stack.apis.inference import ToolChoice, ToolConfig +from llama_stack.apis.inference import ( + ChatCompletionRequest, + ToolChoice, + ToolConfig, + UserMessage, +) from llama_stack.apis.models import Model from llama_stack.models.llama.datatypes import StopReason from llama_stack.providers.remote.inference.vllm.config import VLLMInferenceAdapterConfig @@ -232,3 +237,14 @@ def test_chat_completion_doesnt_block_event_loop(caplog): # above. asyncio_warnings = [record.message for record in caplog.records if record.name == "asyncio"] assert not asyncio_warnings + + +@pytest.mark.asyncio +async def test_get_params_empty_tools(vllm_inference_adapter): + request = ChatCompletionRequest( + tools=[], + model="test_model", + messages=[UserMessage(content="test")], + ) + params = await vllm_inference_adapter._get_params(request) + assert "tools" not in params