mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-11 13:44:38 +00:00
feat(responses)!: add in_progress, failed, content part events (#3765)
## Summary - add schema + runtime support for response.in_progress / response.failed / response.incomplete - stream content parts with proper indexes and reasoning slots - align tests + docs with the richer event payloads ## Testing - uv run pytest tests/unit/providers/agents/meta_reference/test_openai_responses.py::test_create_openai_response_with_string_input - uv run pytest tests/unit/providers/agents/meta_reference/test_response_conversion_utils.py
This commit is contained in:
parent
a548169b99
commit
e039b61d26
12 changed files with 1431 additions and 221 deletions
|
@ -16,18 +16,19 @@ class StreamingValidator:
|
|||
|
||||
def assert_basic_event_sequence(self):
|
||||
"""Verify basic created -> completed event sequence."""
|
||||
assert len(self.chunks) >= 2, f"Expected at least 2 chunks (created + completed), got {len(self.chunks)}"
|
||||
assert len(self.chunks) >= 2, f"Expected at least 2 chunks (created + terminal), got {len(self.chunks)}"
|
||||
assert self.chunks[0].type == "response.created", (
|
||||
f"First chunk should be response.created, got {self.chunks[0].type}"
|
||||
)
|
||||
assert self.chunks[-1].type == "response.completed", (
|
||||
f"Last chunk should be response.completed, got {self.chunks[-1].type}"
|
||||
assert any(t in self.event_types for t in ["response.completed", "response.incomplete", "response.failed"]), (
|
||||
"Expected a terminal response event (completed, incomplete, or failed)"
|
||||
)
|
||||
|
||||
# Verify event order
|
||||
terminal_types = ["response.completed", "response.incomplete", "response.failed"]
|
||||
terminal_indices = [self.event_types.index(t) for t in terminal_types if t in self.event_types]
|
||||
assert terminal_indices, "Expected at least one terminal event index"
|
||||
created_index = self.event_types.index("response.created")
|
||||
completed_index = self.event_types.index("response.completed")
|
||||
assert created_index < completed_index, "response.created should come before response.completed"
|
||||
assert created_index < min(terminal_indices), "response.created should precede terminal events"
|
||||
|
||||
def assert_response_consistency(self):
|
||||
"""Verify response ID consistency across events."""
|
||||
|
@ -137,8 +138,23 @@ class StreamingValidator:
|
|||
for chunk in self.chunks:
|
||||
if chunk.type == "response.created":
|
||||
assert chunk.response.status == "in_progress"
|
||||
elif chunk.type == "response.in_progress":
|
||||
assert chunk.response.status == "in_progress"
|
||||
assert isinstance(chunk.sequence_number, int)
|
||||
elif chunk.type == "response.incomplete":
|
||||
assert chunk.response.status == "incomplete"
|
||||
assert isinstance(chunk.sequence_number, int)
|
||||
elif chunk.type == "response.failed":
|
||||
assert chunk.response.status == "failed"
|
||||
assert isinstance(chunk.sequence_number, int)
|
||||
assert chunk.response.error is not None
|
||||
elif chunk.type == "response.completed":
|
||||
assert chunk.response.status == "completed"
|
||||
elif chunk.type in {"response.content_part.added", "response.content_part.done"}:
|
||||
assert chunk.item_id, "Content part events should have non-empty item_id"
|
||||
assert isinstance(chunk.content_index, int)
|
||||
assert isinstance(chunk.output_index, int)
|
||||
assert chunk.response_id, "Content part events should include response_id"
|
||||
elif hasattr(chunk, "item_id"):
|
||||
assert chunk.item_id, "Events with item_id should have non-empty item_id"
|
||||
elif hasattr(chunk, "sequence_number"):
|
||||
|
|
|
@ -156,9 +156,10 @@ async def test_create_openai_response_with_string_input(openai_responses_impl, m
|
|||
)
|
||||
|
||||
# Should have content part events for text streaming
|
||||
# Expected: response.created, content_part.added, output_text.delta, content_part.done, response.completed
|
||||
assert len(chunks) >= 4
|
||||
# Expected: response.created, response.in_progress, content_part.added, output_text.delta, content_part.done, response.completed
|
||||
assert len(chunks) >= 5
|
||||
assert chunks[0].type == "response.created"
|
||||
assert any(chunk.type == "response.in_progress" for chunk in chunks)
|
||||
|
||||
# Check for content part events
|
||||
content_part_added_events = [c for c in chunks if c.type == "response.content_part.added"]
|
||||
|
@ -169,6 +170,14 @@ async def test_create_openai_response_with_string_input(openai_responses_impl, m
|
|||
assert len(content_part_done_events) >= 1, "Should have content_part.done event for text"
|
||||
assert len(text_delta_events) >= 1, "Should have text delta events"
|
||||
|
||||
added_event = content_part_added_events[0]
|
||||
done_event = content_part_done_events[0]
|
||||
assert added_event.content_index == 0
|
||||
assert done_event.content_index == 0
|
||||
assert added_event.output_index == done_event.output_index == 0
|
||||
assert added_event.item_id == done_event.item_id
|
||||
assert added_event.response_id == done_event.response_id
|
||||
|
||||
# Verify final event is completion
|
||||
assert chunks[-1].type == "response.completed"
|
||||
|
||||
|
@ -177,6 +186,8 @@ async def test_create_openai_response_with_string_input(openai_responses_impl, m
|
|||
assert final_response.model == model
|
||||
assert len(final_response.output) == 1
|
||||
assert isinstance(final_response.output[0], OpenAIResponseMessage)
|
||||
assert final_response.output[0].id == added_event.item_id
|
||||
assert final_response.id == added_event.response_id
|
||||
|
||||
openai_responses_impl.responses_store.store_response_object.assert_called_once()
|
||||
assert final_response.output[0].content[0].text == "Dublin"
|
||||
|
@ -303,9 +314,20 @@ async def test_create_openai_response_with_tool_call_type_none(openai_responses_
|
|||
chunks = [chunk async for chunk in result]
|
||||
|
||||
# Verify event types
|
||||
# Should have: response.created, output_item.added, function_call_arguments.delta,
|
||||
# function_call_arguments.done, output_item.done, response.completed
|
||||
assert len(chunks) == 6
|
||||
# Should have: response.created, response.in_progress, output_item.added,
|
||||
# function_call_arguments.delta, function_call_arguments.done, output_item.done, response.completed
|
||||
assert len(chunks) == 7
|
||||
|
||||
event_types = [chunk.type for chunk in chunks]
|
||||
assert event_types == [
|
||||
"response.created",
|
||||
"response.in_progress",
|
||||
"response.output_item.added",
|
||||
"response.function_call_arguments.delta",
|
||||
"response.function_call_arguments.done",
|
||||
"response.output_item.done",
|
||||
"response.completed",
|
||||
]
|
||||
|
||||
# Verify inference API was called correctly (after iterating over result)
|
||||
first_call = mock_inference_api.openai_chat_completion.call_args_list[0]
|
||||
|
@ -314,25 +336,19 @@ async def test_create_openai_response_with_tool_call_type_none(openai_responses_
|
|||
assert first_call.kwargs["temperature"] == 0.1
|
||||
|
||||
# Check response.created event (should have empty output)
|
||||
assert chunks[0].type == "response.created"
|
||||
assert len(chunks[0].response.output) == 0
|
||||
|
||||
# Check streaming events
|
||||
assert chunks[1].type == "response.output_item.added"
|
||||
assert chunks[2].type == "response.function_call_arguments.delta"
|
||||
assert chunks[3].type == "response.function_call_arguments.done"
|
||||
assert chunks[4].type == "response.output_item.done"
|
||||
|
||||
# Check response.completed event (should have the tool call)
|
||||
assert chunks[5].type == "response.completed"
|
||||
assert len(chunks[5].response.output) == 1
|
||||
assert chunks[5].response.output[0].type == "function_call"
|
||||
assert chunks[5].response.output[0].name == "get_weather"
|
||||
completed_chunk = chunks[-1]
|
||||
assert completed_chunk.type == "response.completed"
|
||||
assert len(completed_chunk.response.output) == 1
|
||||
assert completed_chunk.response.output[0].type == "function_call"
|
||||
assert completed_chunk.response.output[0].name == "get_weather"
|
||||
|
||||
|
||||
async def test_create_openai_response_with_tool_call_function_arguments_none(openai_responses_impl, mock_inference_api):
|
||||
"""Test creating an OpenAI response with a tool call response that has a function that does not accept arguments, or arguments set to None when they are not mandatory."""
|
||||
# Setup
|
||||
"""Test creating an OpenAI response with tool calls that omit arguments."""
|
||||
|
||||
input_text = "What is the time right now?"
|
||||
model = "meta-llama/Llama-3.1-8B-Instruct"
|
||||
|
||||
|
@ -359,9 +375,21 @@ async def test_create_openai_response_with_tool_call_function_arguments_none(ope
|
|||
object="chat.completion.chunk",
|
||||
)
|
||||
|
||||
mock_inference_api.openai_chat_completion.return_value = fake_stream_toolcall()
|
||||
def assert_common_expectations(chunks) -> None:
|
||||
first_call = mock_inference_api.openai_chat_completion.call_args_list[0]
|
||||
assert first_call.kwargs["messages"][0].content == input_text
|
||||
assert first_call.kwargs["tools"] is not None
|
||||
assert first_call.kwargs["temperature"] == 0.1
|
||||
assert len(chunks[0].response.output) == 0
|
||||
completed_chunk = chunks[-1]
|
||||
assert completed_chunk.type == "response.completed"
|
||||
assert len(completed_chunk.response.output) == 1
|
||||
assert completed_chunk.response.output[0].type == "function_call"
|
||||
assert completed_chunk.response.output[0].name == "get_current_time"
|
||||
assert completed_chunk.response.output[0].arguments == "{}"
|
||||
|
||||
# Function does not accept arguments
|
||||
mock_inference_api.openai_chat_completion.return_value = fake_stream_toolcall()
|
||||
result = await openai_responses_impl.create_openai_response(
|
||||
input=input_text,
|
||||
model=model,
|
||||
|
@ -369,46 +397,23 @@ async def test_create_openai_response_with_tool_call_function_arguments_none(ope
|
|||
temperature=0.1,
|
||||
tools=[
|
||||
OpenAIResponseInputToolFunction(
|
||||
name="get_current_time",
|
||||
description="Get current time for system's timezone",
|
||||
parameters={},
|
||||
name="get_current_time", description="Get current time for system's timezone", parameters={}
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
# Check that we got the content from our mocked tool execution result
|
||||
chunks = [chunk async for chunk in result]
|
||||
|
||||
# Verify event types
|
||||
# Should have: response.created, output_item.added, function_call_arguments.delta,
|
||||
# function_call_arguments.done, output_item.done, response.completed
|
||||
assert len(chunks) == 5
|
||||
|
||||
# Verify inference API was called correctly (after iterating over result)
|
||||
first_call = mock_inference_api.openai_chat_completion.call_args_list[0]
|
||||
assert first_call.kwargs["messages"][0].content == input_text
|
||||
assert first_call.kwargs["tools"] is not None
|
||||
assert first_call.kwargs["temperature"] == 0.1
|
||||
|
||||
# Check response.created event (should have empty output)
|
||||
assert chunks[0].type == "response.created"
|
||||
assert len(chunks[0].response.output) == 0
|
||||
|
||||
# Check streaming events
|
||||
assert chunks[1].type == "response.output_item.added"
|
||||
assert chunks[2].type == "response.function_call_arguments.done"
|
||||
assert chunks[3].type == "response.output_item.done"
|
||||
|
||||
# Check response.completed event (should have the tool call with arguments set to "{}")
|
||||
assert chunks[4].type == "response.completed"
|
||||
assert len(chunks[4].response.output) == 1
|
||||
assert chunks[4].response.output[0].type == "function_call"
|
||||
assert chunks[4].response.output[0].name == "get_current_time"
|
||||
assert chunks[4].response.output[0].arguments == "{}"
|
||||
|
||||
mock_inference_api.openai_chat_completion.return_value = fake_stream_toolcall()
|
||||
assert [chunk.type for chunk in chunks] == [
|
||||
"response.created",
|
||||
"response.in_progress",
|
||||
"response.output_item.added",
|
||||
"response.function_call_arguments.done",
|
||||
"response.output_item.done",
|
||||
"response.completed",
|
||||
]
|
||||
assert_common_expectations(chunks)
|
||||
|
||||
# Function accepts optional arguments
|
||||
mock_inference_api.openai_chat_completion.return_value = fake_stream_toolcall()
|
||||
result = await openai_responses_impl.create_openai_response(
|
||||
input=input_text,
|
||||
model=model,
|
||||
|
@ -418,42 +423,47 @@ async def test_create_openai_response_with_tool_call_function_arguments_none(ope
|
|||
OpenAIResponseInputToolFunction(
|
||||
name="get_current_time",
|
||||
description="Get current time for system's timezone",
|
||||
parameters={
|
||||
"timezone": "string",
|
||||
},
|
||||
parameters={"timezone": "string"},
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
# Check that we got the content from our mocked tool execution result
|
||||
chunks = [chunk async for chunk in result]
|
||||
assert [chunk.type for chunk in chunks] == [
|
||||
"response.created",
|
||||
"response.in_progress",
|
||||
"response.output_item.added",
|
||||
"response.function_call_arguments.done",
|
||||
"response.output_item.done",
|
||||
"response.completed",
|
||||
]
|
||||
assert_common_expectations(chunks)
|
||||
|
||||
# Verify event types
|
||||
# Should have: response.created, output_item.added, function_call_arguments.delta,
|
||||
# function_call_arguments.done, output_item.done, response.completed
|
||||
assert len(chunks) == 5
|
||||
|
||||
# Verify inference API was called correctly (after iterating over result)
|
||||
first_call = mock_inference_api.openai_chat_completion.call_args_list[0]
|
||||
assert first_call.kwargs["messages"][0].content == input_text
|
||||
assert first_call.kwargs["tools"] is not None
|
||||
assert first_call.kwargs["temperature"] == 0.1
|
||||
|
||||
# Check response.created event (should have empty output)
|
||||
assert chunks[0].type == "response.created"
|
||||
assert len(chunks[0].response.output) == 0
|
||||
|
||||
# Check streaming events
|
||||
assert chunks[1].type == "response.output_item.added"
|
||||
assert chunks[2].type == "response.function_call_arguments.done"
|
||||
assert chunks[3].type == "response.output_item.done"
|
||||
|
||||
# Check response.completed event (should have the tool call with arguments set to "{}")
|
||||
assert chunks[4].type == "response.completed"
|
||||
assert len(chunks[4].response.output) == 1
|
||||
assert chunks[4].response.output[0].type == "function_call"
|
||||
assert chunks[4].response.output[0].name == "get_current_time"
|
||||
assert chunks[4].response.output[0].arguments == "{}"
|
||||
# Function accepts optional arguments with additional optional fields
|
||||
mock_inference_api.openai_chat_completion.return_value = fake_stream_toolcall()
|
||||
result = await openai_responses_impl.create_openai_response(
|
||||
input=input_text,
|
||||
model=model,
|
||||
stream=True,
|
||||
temperature=0.1,
|
||||
tools=[
|
||||
OpenAIResponseInputToolFunction(
|
||||
name="get_current_time",
|
||||
description="Get current time for system's timezone",
|
||||
parameters={"timezone": "string", "location": "string"},
|
||||
)
|
||||
],
|
||||
)
|
||||
chunks = [chunk async for chunk in result]
|
||||
assert [chunk.type for chunk in chunks] == [
|
||||
"response.created",
|
||||
"response.in_progress",
|
||||
"response.output_item.added",
|
||||
"response.function_call_arguments.done",
|
||||
"response.output_item.done",
|
||||
"response.completed",
|
||||
]
|
||||
assert_common_expectations(chunks)
|
||||
mock_inference_api.openai_chat_completion.return_value = fake_stream_toolcall()
|
||||
|
||||
|
||||
async def test_create_openai_response_with_multiple_messages(openai_responses_impl, mock_inference_api):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue