Do not send an empty 'tools' param to remote vllm

Fixes: #1955 Since 0.2.0, the vLLM gets an empty list (vs ``None`` in 0.1.9 and before) when there are no tools configured which causes the issue described in #1955. This patch avoids sending the 'tools' param to the vLLM altogether instead of an empty list. It also adds a small unit test to avoid regressions. Signed-off-by: Daniel Alvarez <dalvarez@redhat.com>
2025-12-30 19:53:53 +00:00 · 2025-04-15 16:06:51 +02:00 · 2025-04-15 16:06:51 +02:00 · 538d601472
commit 538d601472
parent 83b5523e2d
2 changed files with 19 additions and 2 deletions
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@ -374,7 +374,8 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
            options["max_tokens"] = self.config.max_tokens

        input_dict: dict[str, Any] = {}
-        if isinstance(request, ChatCompletionRequest) and request.tools is not None:
+        # Only include the 'tools' param if there is any. It can break things if an empty list is sent to the vLLM.
+        if isinstance(request, ChatCompletionRequest) and request.tools:
            input_dict = {"tools": _convert_to_vllm_tools_in_request(request.tools)}

        if isinstance(request, ChatCompletionRequest):
--- a/tests/unit/providers/inference/test_remote_vllm.py
+++ b/tests/unit/providers/inference/test_remote_vllm.py
@ -26,7 +26,12 @@ from openai.types.chat.chat_completion_chunk import (
 )
 from openai.types.model import Model as OpenAIModel

-from llama_stack.apis.inference import ToolChoice, ToolConfig
+from llama_stack.apis.inference import (
+    ChatCompletionRequest,
+    ToolChoice,
+    ToolConfig,
+    UserMessage,
+)
 from llama_stack.apis.models import Model
 from llama_stack.models.llama.datatypes import StopReason
 from llama_stack.providers.remote.inference.vllm.config import VLLMInferenceAdapterConfig
@ -232,3 +237,14 @@ def test_chat_completion_doesnt_block_event_loop(caplog):
    # above.
    asyncio_warnings = [record.message for record in caplog.records if record.name == "asyncio"]
    assert not asyncio_warnings
+
+
+@pytest.mark.asyncio
+async def test_get_params_empty_tools(vllm_inference_adapter):
+    request = ChatCompletionRequest(
+        tools=[],
+        model="test_model",
+        messages=[UserMessage(content="test")],
+    )
+    params = await vllm_inference_adapter._get_params(request)
+    assert "tools" not in params