From 96886afacaf16e45c74cdb341280c98fa0ce74f0 Mon Sep 17 00:00:00 2001 From: grs Date: Wed, 8 Oct 2025 15:47:17 +0100 Subject: [PATCH] fix(responses): fix regression in support for mcp tool require_approval argument (#3731) # What does this PR do? It prevents a tool call message being added to the chat completions message without a corresponding tool call result, which is needed in the case that an approval is required first or if the approval request is denied. In both these cases the tool call messages is popped of the next turn messages. Closes #3728 ## Test Plan Ran the integration tests Manual check of both approval and denial against gpt-4o Signed-off-by: Gordon Sim --- .../agents/meta_reference/responses/openai_responses.py | 2 +- .../inline/agents/meta_reference/responses/streaming.py | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py index 8ccdcb0e1..245203f10 100644 --- a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +++ b/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py @@ -269,7 +269,7 @@ class OpenAIResponsesImpl: response_tools=tools, temperature=temperature, response_format=response_format, - inputs=input, + inputs=all_input, ) # Create orchestrator and delegate streaming logic diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py index 8a662e6db..895d13a7f 100644 --- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py @@ -175,6 +175,8 @@ class StreamingResponseOrchestrator: ): yield stream_event + messages = next_turn_messages + if not function_tool_calls and not non_function_tool_calls: break @@ -187,9 +189,7 @@ class StreamingResponseOrchestrator: logger.info(f"Exiting inference loop since iteration count({n_iter}) exceeds {self.max_infer_iters=}") break - messages = next_turn_messages - - self.final_messages = messages.copy() + [current_response.choices[0].message] + self.final_messages = messages.copy() # Create final response final_response = OpenAIResponseObject( @@ -232,9 +232,11 @@ class StreamingResponseOrchestrator: non_function_tool_calls.append(tool_call) else: logger.info(f"Approval denied for {tool_call.id} on {tool_call.function.name}") + next_turn_messages.pop() else: logger.info(f"Requesting approval for {tool_call.id} on {tool_call.function.name}") approvals.append(tool_call) + next_turn_messages.pop() else: non_function_tool_calls.append(tool_call)