From b5a9ef4c6d9dd2a6d16383107bb9765da66a3faa Mon Sep 17 00:00:00 2001
From: Daniel Alvarez Sanchez <dalvarez@redhat.com>
Date: Wed, 16 Apr 2025 02:31:12 +0200
Subject: [PATCH] fix: Do not send an empty 'tools' list to remote vllm (#1957)

Fixes: #1955

Since 0.2.0, the vLLM gets an empty list (vs ``None``in 0.1.9 and
before) when there are no tools configured which causes the issue
described in #1955 p. This patch avoids sending the 'tools' param to the
vLLM altogether instead of an empty list.

It also adds a small unit test to avoid regressions.

The OpenAI
[specification](https://platform.openai.com/docs/api-reference/chat/create)
does not explicitly state that the list cannot be empty but I found this
out through experimentation and it might depend on the actual remote
vllm. In any case, as this parameter is Optional, is best to skip it
altogether if there's no tools configured.

Signed-off-by: Daniel Alvarez <dalvarez@redhat.com>
---
 .../providers/remote/inference/vllm/vllm.py    |  3 ++-
 .../providers/inference/test_remote_vllm.py    | 18 +++++++++++++++++-
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py
index 2b9eae1e9..d141afa86 100644
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@@ -374,7 +374,8 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
             options["max_tokens"] = self.config.max_tokens
 
         input_dict: dict[str, Any] = {}
-        if isinstance(request, ChatCompletionRequest) and request.tools is not None:
+        # Only include the 'tools' param if there is any. It can break things if an empty list is sent to the vLLM.
+        if isinstance(request, ChatCompletionRequest) and request.tools:
             input_dict = {"tools": _convert_to_vllm_tools_in_request(request.tools)}
 
         if isinstance(request, ChatCompletionRequest):
diff --git a/tests/unit/providers/inference/test_remote_vllm.py b/tests/unit/providers/inference/test_remote_vllm.py
index 9c2281d85..88399198d 100644
--- a/tests/unit/providers/inference/test_remote_vllm.py
+++ b/tests/unit/providers/inference/test_remote_vllm.py
@@ -26,7 +26,12 @@ from openai.types.chat.chat_completion_chunk import (
 )
 from openai.types.model import Model as OpenAIModel
 
-from llama_stack.apis.inference import ToolChoice, ToolConfig
+from llama_stack.apis.inference import (
+    ChatCompletionRequest,
+    ToolChoice,
+    ToolConfig,
+    UserMessage,
+)
 from llama_stack.apis.models import Model
 from llama_stack.models.llama.datatypes import StopReason
 from llama_stack.providers.remote.inference.vllm.config import VLLMInferenceAdapterConfig
@@ -232,3 +237,14 @@ def test_chat_completion_doesnt_block_event_loop(caplog):
     # above.
     asyncio_warnings = [record.message for record in caplog.records if record.name == "asyncio"]
     assert not asyncio_warnings
+
+
+@pytest.mark.asyncio
+async def test_get_params_empty_tools(vllm_inference_adapter):
+    request = ChatCompletionRequest(
+        tools=[],
+        model="test_model",
+        messages=[UserMessage(content="test")],
+    )
+    params = await vllm_inference_adapter._get_params(request)
+    assert "tools" not in params