From a03f0cabfd1c17c4b30af84fcd337dda668414ee Mon Sep 17 00:00:00 2001
From: Eric Huang <erichuang@meta.com>
Date: Tue, 30 Sep 2025 11:28:31 -0700
Subject: [PATCH] fix: don't pass default response format in Responses

# What does this PR do?


## Test Plan
---
 .../inline/agents/meta_reference/responses/streaming.py    | 5 ++++-
 .../agents/meta_reference/test_openai_responses.py         | 7 +++----
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
index 2f45ad2a3..179f7f023 100644
--- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
@@ -127,13 +127,16 @@ class StreamingResponseOrchestrator:
         messages = self.ctx.messages.copy()
 
         while True:
+            # Text is the default response format for chat completion so don't need to pass it
+            # (some providers don't support non-empty response_format when tools are present)
+            response_format = None if self.ctx.response_format.type == "text" else self.ctx.response_format
             completion_result = await self.inference_api.openai_chat_completion(
                 model=self.ctx.model,
                 messages=messages,
                 tools=self.ctx.chat_tools,
                 stream=True,
                 temperature=self.ctx.temperature,
-                response_format=self.ctx.response_format,
+                response_format=response_format,
             )
 
             # Process streaming chunks and build complete response
diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
index 38ce365c1..eb77c2dbe 100644
--- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py
+++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
@@ -148,7 +148,7 @@ async def test_create_openai_response_with_string_input(openai_responses_impl, m
     mock_inference_api.openai_chat_completion.assert_called_once_with(
         model=model,
         messages=[OpenAIUserMessageParam(role="user", content="What is the capital of Ireland?", name=None)],
-        response_format=OpenAIResponseFormatText(),
+        response_format=None,
         tools=None,
         stream=True,
         temperature=0.1,
@@ -831,8 +831,8 @@ async def test_store_response_uses_rehydrated_input_with_previous_response(
         (OpenAIResponseText(format=OpenAIResponseTextFormat(type="json_object")), OpenAIResponseFormatJSONObject()),
         # ensure text param with no format specified defaults to text
         (OpenAIResponseText(format=None), OpenAIResponseFormatText()),
-        # ensure text param of None defaults to text
-        (None, OpenAIResponseFormatText()),
+        # ensure text param of None defaults to None
+        (None, None),
     ],
 )
 async def test_create_openai_response_with_text_format(
@@ -855,7 +855,6 @@ async def test_create_openai_response_with_text_format(
     # Verify
     first_call = mock_inference_api.openai_chat_completion.call_args_list[0]
     assert first_call.kwargs["messages"][0].content == input_text
-    assert first_call.kwargs["response_format"] is not None
     assert first_call.kwargs["response_format"] == response_format