fix: don't pass default response format in Responses

# What does this PR do? ## Test Plan
2025-12-05 10:23:44 +00:00 · 2025-09-30 11:33:57 -07:00 · 2025-09-30 11:33:57 -07:00 · f387e4023f
commit f387e4023f
parent 6cce553c93
2 changed files with 10 additions and 9 deletions
--- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
@ -127,13 +127,16 @@ class StreamingResponseOrchestrator:
        messages = self.ctx.messages.copy()

        while True:
+            # Text is the default response format for chat completion so don't need to pass it
+            # (some providers don't support non-empty response_format when tools are present)
+            response_format = None if self.ctx.response_format.type == "text" else self.ctx.response_format
            completion_result = await self.inference_api.openai_chat_completion(
                model=self.ctx.model,
                messages=messages,
                tools=self.ctx.chat_tools,
                stream=True,
                temperature=self.ctx.temperature,
-                response_format=self.ctx.response_format,
+                response_format=response_format,
            )

            # Process streaming chunks and build complete response
--- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py
+++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
@ -37,7 +37,6 @@ from llama_stack.apis.inference import (
    OpenAIJSONSchema,
    OpenAIResponseFormatJSONObject,
    OpenAIResponseFormatJSONSchema,
-    OpenAIResponseFormatText,
    OpenAIUserMessageParam,
 )
 from llama_stack.apis.tools.tools import Tool, ToolGroups, ToolInvocationResult, ToolParameter, ToolRuntime
@ -148,7 +147,7 @@ async def test_create_openai_response_with_string_input(openai_responses_impl, m
    mock_inference_api.openai_chat_completion.assert_called_once_with(
        model=model,
        messages=[OpenAIUserMessageParam(role="user", content="What is the capital of Ireland?", name=None)],
-        response_format=OpenAIResponseFormatText(),
+        response_format=None,
        tools=None,
        stream=True,
        temperature=0.1,
@ -823,16 +822,16 @@ async def test_store_response_uses_rehydrated_input_with_previous_response(
@pytest.mark.parametrize(
    "text_format, response_format",
    [
-        (OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")), OpenAIResponseFormatText()),
+        (OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")), None),
        (
            OpenAIResponseText(format=OpenAIResponseTextFormat(name="Test", schema={"foo": "bar"}, type="json_schema")),
            OpenAIResponseFormatJSONSchema(json_schema=OpenAIJSONSchema(name="Test", schema={"foo": "bar"})),
        ),
        (OpenAIResponseText(format=OpenAIResponseTextFormat(type="json_object")), OpenAIResponseFormatJSONObject()),
-        # ensure text param with no format specified defaults to text
-        (OpenAIResponseText(format=None), OpenAIResponseFormatText()),
-        # ensure text param of None defaults to text
-        (None, OpenAIResponseFormatText()),
+        # ensure text param with no format specified defaults to None
+        (OpenAIResponseText(format=None), None),
+        # ensure text param of None defaults to None
+        (None, None),
    ],
 )
 async def test_create_openai_response_with_text_format(
@ -855,7 +854,6 @@ async def test_create_openai_response_with_text_format(
    # Verify
    first_call = mock_inference_api.openai_chat_completion.call_args_list[0]
    assert first_call.kwargs["messages"][0].content == input_text
-    assert first_call.kwargs["response_format"] is not None
    assert first_call.kwargs["response_format"] == response_format