# What does this PR do?
When a model decides to use an MCP tool call that requires no arguments,
it sets the `arguments` field to `None`. This causes the user to see a
`400 bad requst error` due to validation errors down the stack because
this field gets removed when being parsed by an openai compatible
inference provider like vLLM
This PR ensures that, as soon as the tool call args are accumulated
while streaming, we check to ensure no tool call function arguments are
set to None - if they are we replace them with "{}"
<!-- If resolving an issue, uncomment and update the line below -->
Closes#3456
## Test Plan
Added new unit test to verify that any tool calls with function
arguments set to `None` get handled correctly
---------
Signed-off-by: Jaideep Rao <jrao@redhat.com>
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
# What does this PR do?
Fireworks doesn't allow repsonse_format with tool use. The default
response format is 'text' anyway, so we can safely omit.
## Test Plan
Below script failed without the change, runs after.
```
#!/usr/bin/env python3
"""
Script to test Responses API with kubernetes-mcp-server.
This script:
1. Connects to the llama stack server
2. Uses the Responses API with MCP tools
3. Asks for the list of Kubernetes namespaces using the kubernetes-mcp-server
"""
import json
from openai import OpenAI
# Connect to the llama stack server
base_url = "http://localhost:8321/v1"
client = OpenAI(base_url=base_url, api_key="fake")
# Define the MCP tool pointing to the kubernetes-mcp-server
# The kubernetes-mcp-server is running on port 3000 with SSE endpoint at /sse
mcp_server_url = "http://localhost:3000/sse"
tools = [
{
"type": "mcp",
"server_label": "k8s",
"server_url": mcp_server_url,
}
]
# Create a response request asking for k8s namespaces
print("Sending request to list Kubernetes namespaces...")
print(f"Using MCP server at: {mcp_server_url}")
print("Available tools will be listed automatically by the MCP server.")
print()
response = client.responses.create(
# model="meta-llama/Llama-3.2-3B-Instruct", # Using the vllm model
model="fireworks/accounts/fireworks/models/llama4-scout-instruct-basic",
# model="openai/gpt-4o",
input="what are all the Kubernetes namespaces? Use tool call to `namespaces_list`. make sure to adhere to the tool calling format UNDER ALL CIRCUMSTANCES.",
tools=tools,
stream=False,
)
print("\n" + "=" * 80)
print("RESPONSE OUTPUT:")
print("=" * 80)
# Print the output
for i, output in enumerate(response.output):
print(f"\n[Output {i + 1}] Type: {output.type}")
if output.type == "mcp_list_tools":
print(f" Server: {output.server_label}")
print(f" Tools available: {[t.name for t in output.tools]}")
elif output.type == "mcp_call":
print(f" Tool called: {output.name}")
print(f" Arguments: {output.arguments}")
print(f" Result: {output.output}")
if output.error:
print(f" Error: {output.error}")
elif output.type == "message":
print(f" Role: {output.role}")
print(f" Content: {output.content}")
print("\n" + "=" * 80)
print("FINAL RESPONSE TEXT:")
print("=" * 80)
print(response.output_text)
```