diff --git a/src/llama_stack/apis/agents/openai_responses.py b/src/llama_stack/apis/agents/openai_responses.py index 972b03c94..69e2b2012 100644 --- a/src/llama_stack/apis/agents/openai_responses.py +++ b/src/llama_stack/apis/agents/openai_responses.py @@ -4,6 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +from collections.abc import Sequence from typing import Annotated, Any, Literal from pydantic import BaseModel, Field, model_validator @@ -202,7 +203,7 @@ class OpenAIResponseMessage(BaseModel): scenarios. """ - content: str | list[OpenAIResponseInputMessageContent] | list[OpenAIResponseOutputMessageContent] + content: str | Sequence[OpenAIResponseInputMessageContent] | Sequence[OpenAIResponseOutputMessageContent] role: Literal["system"] | Literal["developer"] | Literal["user"] | Literal["assistant"] type: Literal["message"] = "message" @@ -254,10 +255,10 @@ class OpenAIResponseOutputMessageFileSearchToolCall(BaseModel): """ id: str - queries: list[str] + queries: Sequence[str] status: str type: Literal["file_search_call"] = "file_search_call" - results: list[OpenAIResponseOutputMessageFileSearchToolCallResults] | None = None + results: Sequence[OpenAIResponseOutputMessageFileSearchToolCallResults] | None = None @json_schema_type @@ -597,7 +598,7 @@ class OpenAIResponseObject(BaseModel): id: str model: str object: Literal["response"] = "response" - output: list[OpenAIResponseOutput] + output: Sequence[OpenAIResponseOutput] parallel_tool_calls: bool = False previous_response_id: str | None = None prompt: OpenAIResponsePrompt | None = None @@ -607,7 +608,7 @@ class OpenAIResponseObject(BaseModel): # before the field was added. New responses will have this set always. text: OpenAIResponseText = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")) top_p: float | None = None - tools: list[OpenAIResponseTool] | None = None + tools: Sequence[OpenAIResponseTool] | None = None truncation: str | None = None usage: OpenAIResponseUsage | None = None instructions: str | None = None @@ -1315,7 +1316,7 @@ class ListOpenAIResponseInputItem(BaseModel): :param object: Object type identifier, always "list" """ - data: list[OpenAIResponseInput] + data: Sequence[OpenAIResponseInput] object: Literal["list"] = "list" @@ -1326,7 +1327,7 @@ class OpenAIResponseObjectWithInput(OpenAIResponseObject): :param input: List of input items that led to this response """ - input: list[OpenAIResponseInput] + input: Sequence[OpenAIResponseInput] def to_response_object(self) -> OpenAIResponseObject: """Convert to OpenAIResponseObject by excluding input field.""" @@ -1344,7 +1345,7 @@ class ListOpenAIResponseObject(BaseModel): :param object: Object type identifier, always "list" """ - data: list[OpenAIResponseObjectWithInput] + data: Sequence[OpenAIResponseObjectWithInput] has_more: bool first_id: str last_id: str diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py index 524ca1b0e..44749aeb2 100644 --- a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py @@ -289,16 +289,19 @@ class OpenAIResponsesImpl: failed_response = None async for stream_chunk in stream_gen: - if stream_chunk.type in {"response.completed", "response.incomplete"}: - if final_response is not None: - raise ValueError( - "The response stream produced multiple terminal responses! " - f"Earlier response from {final_event_type}" - ) - final_response = stream_chunk.response - final_event_type = stream_chunk.type - elif stream_chunk.type == "response.failed": - failed_response = stream_chunk.response + match stream_chunk.type: + case "response.completed" | "response.incomplete": + if final_response is not None: + raise ValueError( + "The response stream produced multiple terminal responses! " + f"Earlier response from {final_event_type}" + ) + final_response = stream_chunk.response + final_event_type = stream_chunk.type + case "response.failed": + failed_response = stream_chunk.response + case _: + pass # Other event types don't have .response if failed_response is not None: error_message = ( @@ -370,14 +373,16 @@ class OpenAIResponsesImpl: output_items = [] async for stream_chunk in orchestrator.create_response(): - if stream_chunk.type in {"response.completed", "response.incomplete"}: - final_response = stream_chunk.response - elif stream_chunk.type == "response.failed": - failed_response = stream_chunk.response - - if stream_chunk.type == "response.output_item.done": - item = stream_chunk.item - output_items.append(item) + match stream_chunk.type: + case "response.completed" | "response.incomplete": + final_response = stream_chunk.response + case "response.failed": + failed_response = stream_chunk.response + case "response.output_item.done": + item = stream_chunk.item + output_items.append(item) + case _: + pass # Other event types # Store and sync before yielding terminal events # This ensures the storage/syncing happens even if the consumer breaks after receiving the event