feat(responses): stream progress of tool calls (#3135)

# What does this PR do?
Enhances tool execution streaming by adding support for real-time progress events during tool calls. This implementation adds streaming events for MCP and web search tools, including in-progress, searching, completed, and failed states. 

The refactored `_execute_tool_call` method now returns an async iterator that yields streaming events throughout the tool execution lifecycle.

## Test Plan
Updated the integration test `test_response_streaming_multi_turn_tool_execution` to verify the presence and structure of new streaming events, including:
- Checking for MCP in-progress and completed events
- Verifying that progress events contain required fields (item_id, output_index, sequence_number)
- Ensuring completed events have the necessary sequence_number field
This commit is contained in:
Ashwin Bharambe 2025-08-13 16:31:25 -07:00 committed by GitHub
parent 5b312a80b9
commit 8638537d14
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 141 additions and 18 deletions

View file

@ -598,6 +598,10 @@ def test_response_streaming_multi_turn_tool_execution(compat_client, text_model_
item_added_events = [chunk for chunk in chunks if chunk.type == "response.output_item.added"]
item_done_events = [chunk for chunk in chunks if chunk.type == "response.output_item.done"]
# Should have tool execution progress events
mcp_in_progress_events = [chunk for chunk in chunks if chunk.type == "response.mcp_call.in_progress"]
mcp_completed_events = [chunk for chunk in chunks if chunk.type == "response.mcp_call.completed"]
# Verify we have substantial streaming activity (not just batch events)
assert len(chunks) > 10, f"Expected rich streaming with many events, got only {len(chunks)} chunks"
@ -609,6 +613,24 @@ def test_response_streaming_multi_turn_tool_execution(compat_client, text_model_
assert len(item_added_events) > 0, f"Expected response.output_item.added events, got chunk types: {chunk_types}"
assert len(item_done_events) > 0, f"Expected response.output_item.done events, got chunk types: {chunk_types}"
# Should have tool execution progress events
assert len(mcp_in_progress_events) > 0, (
f"Expected response.mcp_call.in_progress events, got chunk types: {chunk_types}"
)
assert len(mcp_completed_events) > 0, (
f"Expected response.mcp_call.completed events, got chunk types: {chunk_types}"
)
# MCP failed events are optional (only if errors occur)
# Verify progress events have proper structure
for progress_event in mcp_in_progress_events:
assert hasattr(progress_event, "item_id"), "Progress event should have 'item_id' field"
assert hasattr(progress_event, "output_index"), "Progress event should have 'output_index' field"
assert hasattr(progress_event, "sequence_number"), "Progress event should have 'sequence_number' field"
for completed_event in mcp_completed_events:
assert hasattr(completed_event, "sequence_number"), "Completed event should have 'sequence_number' field"
# Verify delta events have proper structure
for delta_event in delta_events:
assert hasattr(delta_event, "delta"), "Delta event should have 'delta' field"