From 4aa586d7af218f79f9986d7d9908fe05c721c1f9 Mon Sep 17 00:00:00 2001
From: Jaideep Rao <jrao@redhat.com>
Date: Fri, 26 Sep 2025 08:17:33 -0400
Subject: [PATCH] Fix: Ensure that tool calls with no arguments get handled
 correctly #3560

When a model decides to use an MCP tool call that requires no arguments, it sets the arguments field to None. This causes validation errors because this field gets removed when being parsed by an openai compatible inference provider like vLLM
This PR ensures that, as soon as the tool call args are accumulated while streaming, we check to ensure no tool call function arguments are set to None - if they are we replace them with "{}"

Closes #3456

Added new unit test to verify that any tool calls with function arguments set to None get handled correctly

Signed-off-by: Jaideep Rao <jrao@redhat.com>
---
 .../meta_reference/responses/streaming.py     |   5 +-
 .../agents/test_openai_responses.py           |  33 +++++
 .../meta_reference/test_openai_responses.py   | 126 ++++++++++++++++++
 3 files changed, 163 insertions(+), 1 deletion(-)

diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
index 2f45ad2a3..82c72fafc 100644
--- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
@@ -328,8 +328,11 @@ class StreamingResponseOrchestrator:
 
         # Emit arguments.done events for completed tool calls (differentiate between MCP and function calls)
         for tool_call_index in sorted(chat_response_tool_calls.keys()):
+            tool_call = chat_response_tool_calls[tool_call_index]
+            # Ensure that arguments, if sent back to the inference provider, are not None
+            tool_call.function.arguments = tool_call.function.arguments or "{}"
             tool_call_item_id = tool_call_item_ids[tool_call_index]
-            final_arguments = chat_response_tool_calls[tool_call_index].function.arguments or ""
+            final_arguments = tool_call.function.arguments
             tool_call_name = chat_response_tool_calls[tool_call_index].function.name
 
             # Check if this is an MCP tool call
diff --git a/tests/integration/agents/test_openai_responses.py b/tests/integration/agents/test_openai_responses.py
index c783cf99b..6648257e6 100644
--- a/tests/integration/agents/test_openai_responses.py
+++ b/tests/integration/agents/test_openai_responses.py
@@ -264,3 +264,36 @@ def test_function_call_output_response(openai_client, client_with_models, text_m
     assert (
         "sunny" in response2.output[0].content[0].text.lower() or "warm" in response2.output[0].content[0].text.lower()
     )
+
+
+def test_function_call_output_response_with_none_arguments(openai_client, client_with_models, text_model_id):
+    """Test handling of function call outputs in responses when function does not accept arguments."""
+    if isinstance(client_with_models, LlamaStackAsLibraryClient):
+        pytest.skip("OpenAI responses are not supported when testing with library client yet.")
+
+    client = openai_client
+
+    # First create a response that triggers a function call
+    response = client.responses.create(
+        model=text_model_id,
+        input=[
+            {
+                "role": "user",
+                "content": "what's the current time? You MUST call the `get_current_time` function to find out.",
+            }
+        ],
+        tools=[
+            {
+                "type": "function",
+                "name": "get_current_time",
+                "description": "Get the current time",
+                "parameters": {},
+            }
+        ],
+        stream=False,
+    )
+
+    # Verify we got a function call
+    assert response.output[0].type == "function_call"
+    assert response.output[0].arguments == "{}"
+    _ = response.output[0].call_id
diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
index 38ce365c1..0e6c5245a 100644
--- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py
+++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
@@ -328,6 +328,132 @@ async def test_create_openai_response_with_tool_call_type_none(openai_responses_
     assert chunks[5].response.output[0].name == "get_weather"
 
 
+async def test_create_openai_response_with_tool_call_function_arguments_none(openai_responses_impl, mock_inference_api):
+    """Test creating an OpenAI response with a tool call response that has a function that does not accept arguments, or arguments set to None when they are not mandatory."""
+    # Setup
+    input_text = "What is the time right now?"
+    model = "meta-llama/Llama-3.1-8B-Instruct"
+
+    async def fake_stream_toolcall():
+        yield ChatCompletionChunk(
+            id="123",
+            choices=[
+                Choice(
+                    index=0,
+                    delta=ChoiceDelta(
+                        tool_calls=[
+                            ChoiceDeltaToolCall(
+                                index=0,
+                                id="tc_123",
+                                function=ChoiceDeltaToolCallFunction(name="get_current_time", arguments=None),
+                                type=None,
+                            )
+                        ]
+                    ),
+                ),
+            ],
+            created=1,
+            model=model,
+            object="chat.completion.chunk",
+        )
+
+    mock_inference_api.openai_chat_completion.return_value = fake_stream_toolcall()
+
+    # Function does not accept arguments
+    result = await openai_responses_impl.create_openai_response(
+        input=input_text,
+        model=model,
+        stream=True,
+        temperature=0.1,
+        tools=[
+            OpenAIResponseInputToolFunction(
+                name="get_current_time",
+                description="Get current time for system's timezone",
+                parameters={},
+            )
+        ],
+    )
+
+    # Check that we got the content from our mocked tool execution result
+    chunks = [chunk async for chunk in result]
+
+    # Verify event types
+    # Should have: response.created, output_item.added, function_call_arguments.delta,
+    # function_call_arguments.done, output_item.done, response.completed
+    assert len(chunks) == 5
+
+    # Verify inference API was called correctly (after iterating over result)
+    first_call = mock_inference_api.openai_chat_completion.call_args_list[0]
+    assert first_call.kwargs["messages"][0].content == input_text
+    assert first_call.kwargs["tools"] is not None
+    assert first_call.kwargs["temperature"] == 0.1
+
+    # Check response.created event (should have empty output)
+    assert chunks[0].type == "response.created"
+    assert len(chunks[0].response.output) == 0
+
+    # Check streaming events
+    assert chunks[1].type == "response.output_item.added"
+    assert chunks[2].type == "response.function_call_arguments.done"
+    assert chunks[3].type == "response.output_item.done"
+
+    # Check response.completed event (should have the tool call with arguments set to "{}")
+    assert chunks[4].type == "response.completed"
+    assert len(chunks[4].response.output) == 1
+    assert chunks[4].response.output[0].type == "function_call"
+    assert chunks[4].response.output[0].name == "get_current_time"
+    assert chunks[4].response.output[0].arguments == "{}"
+
+    mock_inference_api.openai_chat_completion.return_value = fake_stream_toolcall()
+
+    # Function accepts optional arguments
+    result = await openai_responses_impl.create_openai_response(
+        input=input_text,
+        model=model,
+        stream=True,
+        temperature=0.1,
+        tools=[
+            OpenAIResponseInputToolFunction(
+                name="get_current_time",
+                description="Get current time for system's timezone",
+                parameters={
+                    "timezone": "string",
+                },
+            )
+        ],
+    )
+
+    # Check that we got the content from our mocked tool execution result
+    chunks = [chunk async for chunk in result]
+
+    # Verify event types
+    # Should have: response.created, output_item.added, function_call_arguments.delta,
+    # function_call_arguments.done, output_item.done, response.completed
+    assert len(chunks) == 5
+
+    # Verify inference API was called correctly (after iterating over result)
+    first_call = mock_inference_api.openai_chat_completion.call_args_list[0]
+    assert first_call.kwargs["messages"][0].content == input_text
+    assert first_call.kwargs["tools"] is not None
+    assert first_call.kwargs["temperature"] == 0.1
+
+    # Check response.created event (should have empty output)
+    assert chunks[0].type == "response.created"
+    assert len(chunks[0].response.output) == 0
+
+    # Check streaming events
+    assert chunks[1].type == "response.output_item.added"
+    assert chunks[2].type == "response.function_call_arguments.done"
+    assert chunks[3].type == "response.output_item.done"
+
+    # Check response.completed event (should have the tool call with arguments set to "{}")
+    assert chunks[4].type == "response.completed"
+    assert len(chunks[4].response.output) == 1
+    assert chunks[4].response.output[0].type == "function_call"
+    assert chunks[4].response.output[0].name == "get_current_time"
+    assert chunks[4].response.output[0].arguments == "{}"
+
+
 async def test_create_openai_response_with_multiple_messages(openai_responses_impl, mock_inference_api):
     """Test creating an OpenAI response with multiple messages."""
     # Setup