Merge branch 'main' into milvus/search-modes

This commit is contained in:
Francisco Arceo 2025-08-14 07:36:48 -06:00 committed by GitHub
commit 2d0d13b826
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 621 additions and 56 deletions

View file

@ -590,25 +590,59 @@ def test_response_streaming_multi_turn_tool_execution(compat_client, text_model_
# Verify tool call streaming events are present
chunk_types = [chunk.type for chunk in chunks]
# Should have function call arguments delta events for tool calls
delta_events = [chunk for chunk in chunks if chunk.type == "response.function_call_arguments.delta"]
done_events = [chunk for chunk in chunks if chunk.type == "response.function_call_arguments.done"]
# Should have function call or MCP arguments delta/done events for tool calls
delta_events = [
chunk
for chunk in chunks
if chunk.type in ["response.function_call_arguments.delta", "response.mcp_call.arguments.delta"]
]
done_events = [
chunk
for chunk in chunks
if chunk.type in ["response.function_call_arguments.done", "response.mcp_call.arguments.done"]
]
# Should have output item events for tool calls
item_added_events = [chunk for chunk in chunks if chunk.type == "response.output_item.added"]
item_done_events = [chunk for chunk in chunks if chunk.type == "response.output_item.done"]
# Should have tool execution progress events
mcp_in_progress_events = [chunk for chunk in chunks if chunk.type == "response.mcp_call.in_progress"]
mcp_completed_events = [chunk for chunk in chunks if chunk.type == "response.mcp_call.completed"]
# Verify we have substantial streaming activity (not just batch events)
assert len(chunks) > 10, f"Expected rich streaming with many events, got only {len(chunks)} chunks"
# Since this test involves MCP tool calls, we should see streaming events
assert len(delta_events) > 0, f"Expected function_call_arguments.delta events, got chunk types: {chunk_types}"
assert len(done_events) > 0, f"Expected function_call_arguments.done events, got chunk types: {chunk_types}"
assert len(delta_events) > 0, (
f"Expected function_call_arguments.delta or mcp_call.arguments.delta events, got chunk types: {chunk_types}"
)
assert len(done_events) > 0, (
f"Expected function_call_arguments.done or mcp_call.arguments.done events, got chunk types: {chunk_types}"
)
# Should have output item events for function calls
assert len(item_added_events) > 0, f"Expected response.output_item.added events, got chunk types: {chunk_types}"
assert len(item_done_events) > 0, f"Expected response.output_item.done events, got chunk types: {chunk_types}"
# Should have tool execution progress events
assert len(mcp_in_progress_events) > 0, (
f"Expected response.mcp_call.in_progress events, got chunk types: {chunk_types}"
)
assert len(mcp_completed_events) > 0, (
f"Expected response.mcp_call.completed events, got chunk types: {chunk_types}"
)
# MCP failed events are optional (only if errors occur)
# Verify progress events have proper structure
for progress_event in mcp_in_progress_events:
assert hasattr(progress_event, "item_id"), "Progress event should have 'item_id' field"
assert hasattr(progress_event, "output_index"), "Progress event should have 'output_index' field"
assert hasattr(progress_event, "sequence_number"), "Progress event should have 'sequence_number' field"
for completed_event in mcp_completed_events:
assert hasattr(completed_event, "sequence_number"), "Completed event should have 'sequence_number' field"
# Verify delta events have proper structure
for delta_event in delta_events:
assert hasattr(delta_event, "delta"), "Delta event should have 'delta' field"
@ -648,22 +682,32 @@ def test_response_streaming_multi_turn_tool_execution(compat_client, text_model_
assert isinstance(done_event.output_index, int), "Output index should be integer"
assert done_event.output_index >= 0, "Output index should be non-negative"
# Group function call argument events by item_id (these should have proper tracking)
function_call_events_by_item_id = {}
# Group function call and MCP argument events by item_id (these should have proper tracking)
argument_events_by_item_id = {}
for chunk in chunks:
if hasattr(chunk, "item_id") and chunk.type in [
"response.function_call_arguments.delta",
"response.function_call_arguments.done",
"response.mcp_call.arguments.delta",
"response.mcp_call.arguments.done",
]:
item_id = chunk.item_id
if item_id not in function_call_events_by_item_id:
function_call_events_by_item_id[item_id] = []
function_call_events_by_item_id[item_id].append(chunk)
if item_id not in argument_events_by_item_id:
argument_events_by_item_id[item_id] = []
argument_events_by_item_id[item_id].append(chunk)
for item_id, related_events in function_call_events_by_item_id.items():
# Should have at least one delta and one done event for a complete function call
delta_events = [e for e in related_events if e.type == "response.function_call_arguments.delta"]
done_events = [e for e in related_events if e.type == "response.function_call_arguments.done"]
for item_id, related_events in argument_events_by_item_id.items():
# Should have at least one delta and one done event for a complete tool call
delta_events = [
e
for e in related_events
if e.type in ["response.function_call_arguments.delta", "response.mcp_call.arguments.delta"]
]
done_events = [
e
for e in related_events
if e.type in ["response.function_call_arguments.done", "response.mcp_call.arguments.done"]
]
assert len(delta_events) > 0, f"Item {item_id} should have at least one delta event"
assert len(done_events) == 1, f"Item {item_id} should have exactly one done event"
@ -672,6 +716,33 @@ def test_response_streaming_multi_turn_tool_execution(compat_client, text_model_
for event in related_events:
assert event.item_id == item_id, f"Event should have consistent item_id {item_id}, got {event.item_id}"
# Verify content part events if they exist (for text streaming)
content_part_added_events = [chunk for chunk in chunks if chunk.type == "response.content_part.added"]
content_part_done_events = [chunk for chunk in chunks if chunk.type == "response.content_part.done"]
# Content part events should be paired (if any exist)
if len(content_part_added_events) > 0:
assert len(content_part_done_events) > 0, (
"Should have content_part.done events if content_part.added events exist"
)
# Verify content part event structure
for added_event in content_part_added_events:
assert hasattr(added_event, "response_id"), "Content part added event should have response_id"
assert hasattr(added_event, "item_id"), "Content part added event should have item_id"
assert hasattr(added_event, "part"), "Content part added event should have part"
# TODO: enable this after the client types are updated
# assert added_event.part.type == "output_text", "Content part should be an output_text"
for done_event in content_part_done_events:
assert hasattr(done_event, "response_id"), "Content part done event should have response_id"
assert hasattr(done_event, "item_id"), "Content part done event should have item_id"
assert hasattr(done_event, "part"), "Content part done event should have part"
# TODO: enable this after the client types are updated
# assert len(done_event.part.text) > 0, "Content part should have text when done"
# Basic pairing check: each output_item.added should be followed by some activity
# (but we can't enforce strict 1:1 pairing due to the complexity of multi-turn scenarios)
assert len(item_added_events) > 0, "Should have at least one output_item.added event"