From 96886afacaf16e45c74cdb341280c98fa0ce74f0 Mon Sep 17 00:00:00 2001
From: grs <gsim@redhat.com>
Date: Wed, 8 Oct 2025 15:47:17 +0100
Subject: [PATCH] fix(responses): fix regression in support for mcp tool
 require_approval argument (#3731)

# What does this PR do?

It prevents a tool call message being added to the chat completions
message without a corresponding tool call result, which is needed in the
case that an approval is required first or if the approval request is
denied. In both these cases the tool call messages is popped of the next
turn messages.

Closes #3728

## Test Plan
Ran the integration tests
Manual check of both approval and denial against gpt-4o

Signed-off-by: Gordon Sim <gsim@redhat.com>
---
 .../agents/meta_reference/responses/openai_responses.py   | 2 +-
 .../inline/agents/meta_reference/responses/streaming.py   | 8 +++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
index 8ccdcb0e1..245203f10 100644
--- a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
@@ -269,7 +269,7 @@ class OpenAIResponsesImpl:
             response_tools=tools,
             temperature=temperature,
             response_format=response_format,
-            inputs=input,
+            inputs=all_input,
         )
 
         # Create orchestrator and delegate streaming logic
diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
index 8a662e6db..895d13a7f 100644
--- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
@@ -175,6 +175,8 @@ class StreamingResponseOrchestrator:
             ):
                 yield stream_event
 
+            messages = next_turn_messages
+
             if not function_tool_calls and not non_function_tool_calls:
                 break
 
@@ -187,9 +189,7 @@ class StreamingResponseOrchestrator:
                 logger.info(f"Exiting inference loop since iteration count({n_iter}) exceeds {self.max_infer_iters=}")
                 break
 
-            messages = next_turn_messages
-
-        self.final_messages = messages.copy() + [current_response.choices[0].message]
+        self.final_messages = messages.copy()
 
         # Create final response
         final_response = OpenAIResponseObject(
@@ -232,9 +232,11 @@ class StreamingResponseOrchestrator:
                                     non_function_tool_calls.append(tool_call)
                                 else:
                                     logger.info(f"Approval denied for {tool_call.id} on {tool_call.function.name}")
+                                    next_turn_messages.pop()
                             else:
                                 logger.info(f"Requesting approval for {tool_call.id} on {tool_call.function.name}")
                                 approvals.append(tool_call)
+                                next_turn_messages.pop()
                         else:
                             non_function_tool_calls.append(tool_call)