From 65c56d0ee8b72cc89613a68018299ad121aa7c19 Mon Sep 17 00:00:00 2001 From: Ben Browning Date: Thu, 8 May 2025 06:47:53 -0400 Subject: [PATCH] chore: Address review feedback with minor code cleanups The bulk of the change here is making the naming and contents of the conversion to/from Responses API inputs -> Chat Completion API messages and Chat Completion API choices -> Responses API outputs more clear with some code comments, method renaming, and slight refactoring. There are also some other minor changes, like moving a pydantic model from the api/ to the implementation since it's not actually exposed via the API, as well as making some if/else usage more clear. Signed-off-by: Ben Browning --- llama_stack/apis/agents/openai_responses.py | 5 - .../agents/meta_reference/openai_responses.py | 156 ++++++++++-------- .../meta_reference/fixtures/__init__.py | 14 +- .../meta_reference/test_openai_responses.py | 2 +- 4 files changed, 93 insertions(+), 84 deletions(-) diff --git a/llama_stack/apis/agents/openai_responses.py b/llama_stack/apis/agents/openai_responses.py index 2eb56575b..511cf4f86 100644 --- a/llama_stack/apis/agents/openai_responses.py +++ b/llama_stack/apis/agents/openai_responses.py @@ -181,8 +181,3 @@ register_schema(OpenAIResponseInputTool, name="OpenAIResponseInputTool") class OpenAIResponseInputItemList(BaseModel): data: list[OpenAIResponseInput] object: Literal["list"] = "list" - - -class OpenAIResponsePreviousResponseWithInputItems(BaseModel): - input_items: OpenAIResponseInputItemList - response: OpenAIResponseObject diff --git a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py index 681b47ca4..4d2f40226 100644 --- a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py +++ b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py @@ -10,10 +10,12 @@ from collections.abc import AsyncIterator from typing import cast from openai.types.chat import ChatCompletionToolParam +from pydantic import BaseModel from llama_stack.apis.agents.openai_responses import ( OpenAIResponseInput, OpenAIResponseInputItemList, + OpenAIResponseInputMessageContent, OpenAIResponseInputMessageContentImage, OpenAIResponseInputMessageContentText, OpenAIResponseInputTool, @@ -24,10 +26,10 @@ from llama_stack.apis.agents.openai_responses import ( OpenAIResponseObjectStreamResponseCompleted, OpenAIResponseObjectStreamResponseCreated, OpenAIResponseOutput, + OpenAIResponseOutputMessageContent, OpenAIResponseOutputMessageContentOutputText, OpenAIResponseOutputMessageFunctionToolCall, OpenAIResponseOutputMessageWebSearchToolCall, - OpenAIResponsePreviousResponseWithInputItems, ) from llama_stack.apis.inference.inference import ( Inference, @@ -56,34 +58,46 @@ logger = get_logger(name=__name__, category="openai_responses") OPENAI_RESPONSES_PREFIX = "openai_responses:" +async def _convert_response_content_to_chat_content( + content: str | list[OpenAIResponseInputMessageContent] | list[OpenAIResponseOutputMessageContent], +) -> str | list[OpenAIChatCompletionContentPartParam]: + """ + Convert the content parts from an OpenAI Response API request into OpenAI Chat Completion content parts. + + The content schemas of each API look similar, but are not exactly the same. + """ + if isinstance(content, str): + return content + + converted_parts = [] + for content_part in content: + if isinstance(content_part, OpenAIResponseInputMessageContentText): + converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part.text)) + elif isinstance(content_part, OpenAIResponseOutputMessageContentOutputText): + converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part.text)) + elif isinstance(content_part, OpenAIResponseInputMessageContentImage): + if content_part.image_url: + image_url = OpenAIImageURL(url=content_part.image_url, detail=content_part.detail) + converted_parts.append(OpenAIChatCompletionContentPartImageParam(image_url=image_url)) + elif isinstance(content_part, str): + converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part)) + else: + raise ValueError( + f"Llama Stack OpenAI Responses does not yet support content type '{type(content_part)}' in this context" + ) + return converted_parts + + async def _convert_response_input_to_chat_messages( input: str | list[OpenAIResponseInput], ) -> list[OpenAIMessageParam]: + """ + Convert the input from an OpenAI Response API request into OpenAI Chat Completion messages. + """ messages: list[OpenAIMessageParam] = [] - content: str | list[OpenAIChatCompletionContentPartParam] = "" if isinstance(input, list): for input_message in input: - if isinstance(input_message.content, list): - content = [] - for input_message_content in input_message.content: - if isinstance(input_message_content, OpenAIResponseInputMessageContentText): - content.append(OpenAIChatCompletionContentPartTextParam(text=input_message_content.text)) - elif isinstance(input_message_content, OpenAIResponseOutputMessageContentOutputText): - content.append(OpenAIChatCompletionContentPartTextParam(text=input_message_content.text)) - elif isinstance(input_message_content, OpenAIResponseInputMessageContentImage): - if input_message_content.image_url: - image_url = OpenAIImageURL( - url=input_message_content.image_url, detail=input_message_content.detail - ) - content.append(OpenAIChatCompletionContentPartImageParam(image_url=image_url)) - elif isinstance(input_message_content, str): - content.append(OpenAIChatCompletionContentPartTextParam(text=input_message_content)) - else: - raise ValueError( - f"Llama Stack OpenAI Responses does not yet support content type '{type(input_message_content)}' in this context" - ) - else: - content = input_message.content + content = await _convert_response_content_to_chat_content(input_message.content) message_type = await _get_message_type_by_role(input_message.role) if message_type is None: raise ValueError( @@ -95,24 +109,26 @@ async def _convert_response_input_to_chat_messages( return messages -async def _openai_choices_to_output_messages(choices: list[OpenAIChoice]) -> list[OpenAIResponseMessage]: - output_messages = [] - for choice in choices: - output_content = "" - if isinstance(choice.message.content, str): - output_content = choice.message.content - elif isinstance(choice.message.content, OpenAIChatCompletionContentPartTextParam): - output_content = choice.message.content.text - # TODO: handle image content - output_messages.append( - OpenAIResponseMessage( - id=f"msg_{uuid.uuid4()}", - content=[OpenAIResponseOutputMessageContentOutputText(text=output_content)], - status="completed", - role="assistant", - ) +async def _convert_chat_choice_to_response_message(choice: OpenAIChoice) -> OpenAIResponseMessage: + """ + Convert an OpenAI Chat Completion choice into an OpenAI Response output message. + """ + output_content = "" + if isinstance(choice.message.content, str): + output_content = choice.message.content + elif isinstance(choice.message.content, OpenAIChatCompletionContentPartTextParam): + output_content = choice.message.content.text + else: + raise ValueError( + f"Llama Stack OpenAI Responses does not yet support output content type: {type(choice.message.content)}" ) - return output_messages + + return OpenAIResponseMessage( + id=f"msg_{uuid.uuid4()}", + content=[OpenAIResponseOutputMessageContentOutputText(text=output_content)], + status="completed", + role="assistant", + ) async def _get_message_type_by_role(role: str): @@ -125,6 +141,11 @@ async def _get_message_type_by_role(role: str): return role_to_type.get(role) +class OpenAIResponsePreviousResponseWithInputItems(BaseModel): + input_items: OpenAIResponseInputItemList + response: OpenAIResponseObject + + class OpenAIResponsesImpl: def __init__( self, @@ -159,9 +180,10 @@ class OpenAIResponsesImpl: # new input items from the current request if isinstance(input, str): - # Normalize input to a list of OpenAIResponseInputMessage objects - input = [OpenAIResponseMessage(content=input, role="user")] - new_input_items.extend(input) + new_input_items.append(OpenAIResponseMessage(content=input, role="user")) + else: + new_input_items.extend(input) + input = new_input_items return input @@ -231,30 +253,26 @@ class OpenAIResponsesImpl: chat_response = OpenAIChatCompletion(**chat_response.model_dump()) output_messages: list[OpenAIResponseOutput] = [] - # TODO: should we check more than choices[0] here? - if chat_response.choices[0].message.tool_calls and tools: - # TODO: Should we support a mix of custom and builtin tools? - # in other words, should we check for more than tools[0]? - if isinstance(tools[0], OpenAIResponseInputToolFunction): - choice = chat_response.choices[0] - for tool_call in choice.message.tool_calls: - output_messages.append( - OpenAIResponseOutputMessageFunctionToolCall( - arguments=tool_call.function.arguments or "", - call_id=tool_call.id, - name=tool_call.function.name or "", - id=f"fc_{uuid.uuid4()}", - status="completed", + for choice in chat_response.choices: + if choice.message.tool_calls and tools: + # Assume if the first tool is a function, all tools are functions + if isinstance(tools[0], OpenAIResponseInputToolFunction): + for tool_call in choice.message.tool_calls: + output_messages.append( + OpenAIResponseOutputMessageFunctionToolCall( + arguments=tool_call.function.arguments or "", + call_id=tool_call.id, + name=tool_call.function.name or "", + id=f"fc_{uuid.uuid4()}", + status="completed", + ) ) + else: + output_messages.extend( + await self._execute_tool_and_return_final_output(model, stream, choice, messages, temperature) ) else: - output_messages.extend( - await self._execute_tool_and_return_final_output( - model, stream, chat_response, messages, temperature - ) - ) - else: - output_messages.extend(await _openai_choices_to_output_messages(chat_response.choices)) + output_messages.append(await _convert_chat_choice_to_response_message(choice)) response = OpenAIResponseObject( created_at=chat_response.created, id=f"resp-{uuid.uuid4()}", @@ -347,12 +365,11 @@ class OpenAIResponsesImpl: self, model_id: str, stream: bool, - chat_response: OpenAIChatCompletion, + choice: OpenAIChoice, messages: list[OpenAIMessageParam], temperature: float, ) -> list[OpenAIResponseOutput]: output_messages: list[OpenAIResponseOutput] = [] - choice = chat_response.choices[0] # If the choice is not an assistant message, we don't need to execute any tools if not isinstance(choice.message, OpenAIAssistantMessageParam): @@ -362,6 +379,9 @@ class OpenAIResponsesImpl: if not choice.message.tool_calls: return output_messages + # Copy the messages list to avoid mutating the original list + messages = messages.copy() + # Add the assistant message with tool_calls response to the messages list messages.append(choice.message) @@ -407,7 +427,9 @@ class OpenAIResponsesImpl: ) # type cast to appease mypy tool_results_chat_response = cast(OpenAIChatCompletion, tool_results_chat_response) - tool_final_outputs = await _openai_choices_to_output_messages(tool_results_chat_response.choices) + tool_final_outputs = [ + await _convert_chat_choice_to_response_message(choice) for choice in tool_results_chat_response.choices + ] # TODO: Wire in annotations with URLs, titles, etc to these output messages output_messages.extend(tool_final_outputs) return output_messages diff --git a/tests/unit/providers/agents/meta_reference/fixtures/__init__.py b/tests/unit/providers/agents/meta_reference/fixtures/__init__.py index 06da285eb..e112bb6e5 100644 --- a/tests/unit/providers/agents/meta_reference/fixtures/__init__.py +++ b/tests/unit/providers/agents/meta_reference/fixtures/__init__.py @@ -12,19 +12,11 @@ from llama_stack.apis.inference.inference import ( OpenAIChatCompletion, ) +FIXTURES_DIR = os.path.dirname(os.path.abspath(__file__)) + def load_chat_completion_fixture(filename: str) -> OpenAIChatCompletion: - """ - Load a YAML fixture file and convert it to an OpenAIChatCompletion object. - - Args: - filename: Name of the YAML file (without path) - - Returns: - OpenAIChatCompletion object - """ - fixtures_dir = os.path.dirname(os.path.abspath(__file__)) - fixture_path = os.path.join(fixtures_dir, filename) + fixture_path = os.path.join(FIXTURES_DIR, filename) with open(fixture_path) as f: data = yaml.safe_load(f) diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py index 6cba7401e..3fe68cb9d 100644 --- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py +++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py @@ -16,7 +16,6 @@ from llama_stack.apis.agents.openai_responses import ( OpenAIResponseObject, OpenAIResponseOutputMessageContentOutputText, OpenAIResponseOutputMessageWebSearchToolCall, - OpenAIResponsePreviousResponseWithInputItems, ) from llama_stack.apis.inference.inference import ( OpenAIAssistantMessageParam, @@ -26,6 +25,7 @@ from llama_stack.apis.inference.inference import ( ) from llama_stack.apis.tools.tools import Tool, ToolGroups, ToolInvocationResult, ToolParameter, ToolRuntime from llama_stack.providers.inline.agents.meta_reference.openai_responses import ( + OpenAIResponsePreviousResponseWithInputItems, OpenAIResponsesImpl, ) from llama_stack.providers.utils.kvstore import KVStore