fix: don't pass default response format in Responses (#3614)

# What does this PR do? Fireworks doesn't allow repsonse_format with tool use. The default response format is 'text' anyway, so we can safely omit. ## Test Plan Below script failed without the change, runs after. ``` #!/usr/bin/env python3 """ Script to test Responses API with kubernetes-mcp-server. This script: 1. Connects to the llama stack server 2. Uses the Responses API with MCP tools 3. Asks for the list of Kubernetes namespaces using the kubernetes-mcp-server """ import json from openai import OpenAI # Connect to the llama stack server base_url = "http://localhost:8321/v1" client = OpenAI(base_url=base_url, api_key="fake") # Define the MCP tool pointing to the kubernetes-mcp-server # The kubernetes-mcp-server is running on port 3000 with SSE endpoint at /sse mcp_server_url = "http://localhost:3000/sse" tools = [ { "type": "mcp", "server_label": "k8s", "server_url": mcp_server_url, } ] # Create a response request asking for k8s namespaces print("Sending request to list Kubernetes namespaces...") print(f"Using MCP server at: {mcp_server_url}") print("Available tools will be listed automatically by the MCP server.") print() response = client.responses.create( # model="meta-llama/Llama-3.2-3B-Instruct", # Using the vllm model model="fireworks/accounts/fireworks/models/llama4-scout-instruct-basic", # model="openai/gpt-4o", input="what are all the Kubernetes namespaces? Use tool call to `namespaces_list`. make sure to adhere to the tool calling format UNDER ALL CIRCUMSTANCES.", tools=tools, stream=False, ) print("\n" + "=" * 80) print("RESPONSE OUTPUT:") print("=" * 80) # Print the output for i, output in enumerate(response.output): print(f"\n[Output {i + 1}] Type: {output.type}") if output.type == "mcp_list_tools": print(f" Server: {output.server_label}") print(f" Tools available: {[t.name for t in output.tools]}") elif output.type == "mcp_call": print(f" Tool called: {output.name}") print(f" Arguments: {output.arguments}") print(f" Result: {output.output}") if output.error: print(f" Error: {output.error}") elif output.type == "message": print(f" Role: {output.role}") print(f" Content: {output.content}") print("\n" + "=" * 80) print("FINAL RESPONSE TEXT:") print("=" * 80) print(response.output_text) ```
2025-12-05 18:27:22 +00:00 · 2025-09-30 14:52:24 -07:00 · 2025-09-30 14:52:24 -07:00 · ac7c35fbe6
commit ac7c35fbe6
parent d350e3662b
10 changed files with 7573 additions and 89 deletions
--- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py
+++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
@ -37,7 +37,6 @@ from llama_stack.apis.inference import (
    OpenAIJSONSchema,
    OpenAIResponseFormatJSONObject,
    OpenAIResponseFormatJSONSchema,
-    OpenAIResponseFormatText,
    OpenAIUserMessageParam,
 )
 from llama_stack.apis.tools.tools import Tool, ToolGroups, ToolInvocationResult, ToolParameter, ToolRuntime
@ -148,7 +147,7 @@ async def test_create_openai_response_with_string_input(openai_responses_impl, m
    mock_inference_api.openai_chat_completion.assert_called_once_with(
        model=model,
        messages=[OpenAIUserMessageParam(role="user", content="What is the capital of Ireland?", name=None)],
-        response_format=OpenAIResponseFormatText(),
+        response_format=None,
        tools=None,
        stream=True,
        temperature=0.1,
@ -823,16 +822,16 @@ async def test_store_response_uses_rehydrated_input_with_previous_response(
@pytest.mark.parametrize(
    "text_format, response_format",
    [
-        (OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")), OpenAIResponseFormatText()),
+        (OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")), None),
        (
            OpenAIResponseText(format=OpenAIResponseTextFormat(name="Test", schema={"foo": "bar"}, type="json_schema")),
            OpenAIResponseFormatJSONSchema(json_schema=OpenAIJSONSchema(name="Test", schema={"foo": "bar"})),
        ),
        (OpenAIResponseText(format=OpenAIResponseTextFormat(type="json_object")), OpenAIResponseFormatJSONObject()),
-        # ensure text param with no format specified defaults to text
-        (OpenAIResponseText(format=None), OpenAIResponseFormatText()),
-        # ensure text param of None defaults to text
-        (None, OpenAIResponseFormatText()),
+        # ensure text param with no format specified defaults to None
+        (OpenAIResponseText(format=None), None),
+        # ensure text param of None defaults to None
+        (None, None),
    ],
 )
 async def test_create_openai_response_with_text_format(
@ -855,7 +854,6 @@ async def test_create_openai_response_with_text_format(
    # Verify
    first_call = mock_inference_api.openai_chat_completion.call_args_list[0]
    assert first_call.kwargs["messages"][0].content == input_text
-    assert first_call.kwargs["response_format"] is not None
    assert first_call.kwargs["response_format"] == response_format