diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py index 44749aeb2..f6769e838 100644 --- a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py @@ -91,7 +91,8 @@ class OpenAIResponsesImpl: input: str | list[OpenAIResponseInput], previous_response: _OpenAIResponseObjectWithInputAndMessages, ): - new_input_items = previous_response.input.copy() + # Convert Sequence to list for mutation + new_input_items = list(previous_response.input) new_input_items.extend(previous_response.output) if isinstance(input, str): @@ -107,7 +108,7 @@ class OpenAIResponsesImpl: tools: list[OpenAIResponseInputTool] | None, previous_response_id: str | None, conversation: str | None, - ) -> tuple[str | list[OpenAIResponseInput], list[OpenAIMessageParam]]: + ) -> tuple[str | list[OpenAIResponseInput], list[OpenAIMessageParam], ToolContext]: """Process input with optional previous response context. Returns: @@ -208,6 +209,9 @@ class OpenAIResponsesImpl: messages: list[OpenAIMessageParam], ) -> None: new_input_id = f"msg_{uuid.uuid4()}" + # Type input_items_data as the full OpenAIResponseInput union to avoid list invariance issues + input_items_data: list[OpenAIResponseInput] = [] + if isinstance(input, str): # synthesize a message from the input string input_content = OpenAIResponseInputMessageContentText(text=input) @@ -219,7 +223,6 @@ class OpenAIResponsesImpl: input_items_data = [input_content_item] else: # we already have a list of messages - input_items_data = [] for input_item in input: if isinstance(input_item, OpenAIResponseMessage): # These may or may not already have an id, so dump to dict, check for id, and add if missing @@ -329,6 +332,11 @@ class OpenAIResponsesImpl: max_infer_iters: int | None = 10, guardrail_ids: list[str] | None = None, ) -> AsyncIterator[OpenAIResponseObjectStream]: + # These should never be None when called from create_openai_response (which sets defaults) + # but we assert here to help mypy understand the types + assert text is not None, "text must not be None" + assert max_infer_iters is not None, "max_infer_iters must not be None" + # Input preprocessing all_input, messages, tool_context = await self._process_input_with_previous_response( input, tools, previous_response_id, conversation @@ -371,7 +379,8 @@ class OpenAIResponsesImpl: final_response = None failed_response = None - output_items = [] + # Type as ConversationItem to avoid list invariance issues + output_items: list[ConversationItem] = [] async for stream_chunk in orchestrator.create_response(): match stream_chunk.type: case "response.completed" | "response.incomplete": @@ -415,7 +424,8 @@ class OpenAIResponsesImpl: self, conversation_id: str, input: str | list[OpenAIResponseInput] | None, output_items: list[ConversationItem] ) -> None: """Sync content and response messages to the conversation.""" - conversation_items = [] + # Type as ConversationItem union to avoid list invariance issues + conversation_items: list[ConversationItem] = [] if isinstance(input, str): conversation_items.append( diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py index 2cbfead40..bba5e60af 100644 --- a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py @@ -111,7 +111,7 @@ class StreamingResponseOrchestrator: text: OpenAIResponseText, max_infer_iters: int, tool_executor, # Will be the tool execution logic from the main class - instructions: str, + instructions: str | None, safety_api, guardrail_ids: list[str] | None = None, prompt: OpenAIResponsePrompt | None = None,