Added proper support for calling tools without parameters.

2025-08-02 08:44:44 +00:00 · 2025-05-09 12:38:59 +02:00 · 2025-05-09 12:38:59 +02:00 · 3be85c717f
commit 3be85c717f
parent 7a8e398f09
2 changed files with 85 additions and 31 deletions
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@ -166,47 +166,52 @@ def _process_vllm_chat_completion_end_of_stream(
 ) -> list[OpenAIChatCompletionChunk]:
    chunks = []
    args_str = tool_call_buf.arguments
    args = None
    try:
        args = {} if not args_str else json.loads(args_str)
    except Exception as e:
        log.warning(f"Failed to parse tool call buffer arguments: {args_str} \nError: {e}")
    if finish_reason is not None:
        actual_finish_reason = _convert_to_vllm_finish_reason(finish_reason)
    else:
        actual_finish_reason = StopReason.end_of_message
-    if args:
+    if tool_call_buf.tool_name:
-        chunks.append(
+        # at least one tool call request is received
-            ChatCompletionResponseStreamChunk(
+
-                event=ChatCompletionResponseEvent(
+        args_str = tool_call_buf.arguments or "{}"
-                    event_type=current_event_type,
+        args = {}
-                    delta=ToolCallDelta(
+        args_parsed_successfully = True
-                        tool_call=ToolCall(
+        try:
-                            call_id=tool_call_buf.call_id,
+            args = json.loads(args_str)
-                            tool_name=tool_call_buf.tool_name,
+        except Exception as e:
-                            arguments=args,
+            args_parsed_successfully = False
-                            arguments_json=args_str,
+            log.warning(f"Failed to parse tool call buffer arguments: {args_str} \nError: {e}")
        if args_parsed_successfully:
            chunks.append(
                ChatCompletionResponseStreamChunk(
                    event=ChatCompletionResponseEvent(
                        event_type=current_event_type,
                        delta=ToolCallDelta(
                            tool_call=ToolCall(
                                call_id=tool_call_buf.call_id,
                                tool_name=tool_call_buf.tool_name,
                                arguments=args,
                                arguments_json=args_str,
                            ),
                            parse_status=ToolCallParseStatus.succeeded,
                        ),
-                        parse_status=ToolCallParseStatus.succeeded,
+                    )
                    ),
                )
            )
-        )
+        else:
-    elif args_str:
+            chunks.append(
-        chunks.append(
+                ChatCompletionResponseStreamChunk(
-            ChatCompletionResponseStreamChunk(
+                    event=ChatCompletionResponseEvent(
-                event=ChatCompletionResponseEvent(
+                        event_type=ChatCompletionResponseEventType.progress,
-                    event_type=ChatCompletionResponseEventType.progress,
+                        delta=ToolCallDelta(
-                    delta=ToolCallDelta(
+                            tool_call=str(tool_call_buf),
-                        tool_call=str(tool_call_buf),
+                            parse_status=ToolCallParseStatus.failed,
-                        parse_status=ToolCallParseStatus.failed,
+                        ),
-                    ),
+                    )
                )
            )
        )
    chunks.append(
        ChatCompletionResponseStreamChunk(
--- a/tests/unit/providers/inference/test_remote_vllm.py
+++ b/tests/unit/providers/inference/test_remote_vllm.py
@ -348,3 +348,52 @@ async def test_process_vllm_chat_completion_stream_response_no_finish_reason():
    assert chunks[-2].event.delta.type == "tool_call"
    assert chunks[-2].event.delta.tool_call.tool_name == mock_tool_name
    assert chunks[-2].event.delta.tool_call.arguments == mock_tool_arguments
@pytest.mark.asyncio
 async def test_process_vllm_chat_completion_stream_response_tool_without_args():
    """
    Tests the edge case where no arguments are provided for the tool call.
    Tool calls with no arguments should be treated as regular tool calls, which was not the case until now.
    """
    mock_tool_name = "mock_tool"
    async def mock_stream():
        mock_chunks = [
            OpenAIChatCompletionChunk(
                id="chunk-1",
                created=1,
                model="foo",
                object="chat.completion.chunk",
                choices=[
                    {
                        "delta": {
                            "content": None,
                            "tool_calls": [
                                {
                                    "index": 0,
                                    "id": "mock_id",
                                    "type": "function",
                                    "function": {
                                        "name": mock_tool_name,
                                        "arguments": "",
                                    },
                                }
                            ],
                        },
                        "finish_reason": None,
                        "logprobs": None,
                        "index": 0,
                    }
                ],
            ),
        ]
        for chunk in mock_chunks:
            yield chunk
    chunks = [chunk async for chunk in _process_vllm_chat_completion_stream_response(mock_stream())]
    assert len(chunks) == 2
    assert chunks[-1].event.event_type == ChatCompletionResponseEventType.complete
    assert chunks[-2].event.delta.type == "tool_call"
    assert chunks[-2].event.delta.tool_call.tool_name == mock_tool_name
    assert chunks[-2].event.delta.tool_call.arguments == {}