Do not send an empty 'tools' param to remote vllm

Fixes: #1955

Since 0.2.0, the vLLM gets an empty list (vs ``None`` in 0.1.9 and
before) when there are no tools configured which causes the issue described
in #1955. This patch avoids sending the 'tools' param to the vLLM altogether
instead of an empty list.

It also adds a small unit test to avoid regressions.

Signed-off-by: Daniel Alvarez <dalvarez@redhat.com>
This commit is contained in:
Daniel Alvarez 2025-04-15 16:06:51 +02:00
parent 83b5523e2d
commit 538d601472
2 changed files with 19 additions and 2 deletions

View file

@ -374,7 +374,8 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
options["max_tokens"] = self.config.max_tokens options["max_tokens"] = self.config.max_tokens
input_dict: dict[str, Any] = {} input_dict: dict[str, Any] = {}
if isinstance(request, ChatCompletionRequest) and request.tools is not None: # Only include the 'tools' param if there is any. It can break things if an empty list is sent to the vLLM.
if isinstance(request, ChatCompletionRequest) and request.tools:
input_dict = {"tools": _convert_to_vllm_tools_in_request(request.tools)} input_dict = {"tools": _convert_to_vllm_tools_in_request(request.tools)}
if isinstance(request, ChatCompletionRequest): if isinstance(request, ChatCompletionRequest):

View file

@ -26,7 +26,12 @@ from openai.types.chat.chat_completion_chunk import (
) )
from openai.types.model import Model as OpenAIModel from openai.types.model import Model as OpenAIModel
from llama_stack.apis.inference import ToolChoice, ToolConfig from llama_stack.apis.inference import (
ChatCompletionRequest,
ToolChoice,
ToolConfig,
UserMessage,
)
from llama_stack.apis.models import Model from llama_stack.apis.models import Model
from llama_stack.models.llama.datatypes import StopReason from llama_stack.models.llama.datatypes import StopReason
from llama_stack.providers.remote.inference.vllm.config import VLLMInferenceAdapterConfig from llama_stack.providers.remote.inference.vllm.config import VLLMInferenceAdapterConfig
@ -232,3 +237,14 @@ def test_chat_completion_doesnt_block_event_loop(caplog):
# above. # above.
asyncio_warnings = [record.message for record in caplog.records if record.name == "asyncio"] asyncio_warnings = [record.message for record in caplog.records if record.name == "asyncio"]
assert not asyncio_warnings assert not asyncio_warnings
@pytest.mark.asyncio
async def test_get_params_empty_tools(vllm_inference_adapter):
request = ChatCompletionRequest(
tools=[],
model="test_model",
messages=[UserMessage(content="test")],
)
params = await vllm_inference_adapter._get_params(request)
assert "tools" not in params