chore: Address review feedback with minor code cleanups

The bulk of the change here is making the naming and contents of the
conversion to/from Responses API inputs -> Chat Completion API
messages and Chat Completion API choices -> Responses API outputs more
clear with some code comments, method renaming, and slight
refactoring.

There are also some other minor changes, like moving a pydantic model
from the api/ to the implementation since it's not actually exposed
via the API, as well as making some if/else usage more clear.

Signed-off-by: Ben Browning <bbrownin@redhat.com>
This commit is contained in:
Ben Browning 2025-05-08 06:47:53 -04:00
parent 9166baa716
commit 65c56d0ee8
4 changed files with 93 additions and 84 deletions

View file

@ -181,8 +181,3 @@ register_schema(OpenAIResponseInputTool, name="OpenAIResponseInputTool")
class OpenAIResponseInputItemList(BaseModel): class OpenAIResponseInputItemList(BaseModel):
data: list[OpenAIResponseInput] data: list[OpenAIResponseInput]
object: Literal["list"] = "list" object: Literal["list"] = "list"
class OpenAIResponsePreviousResponseWithInputItems(BaseModel):
input_items: OpenAIResponseInputItemList
response: OpenAIResponseObject

View file

@ -10,10 +10,12 @@ from collections.abc import AsyncIterator
from typing import cast from typing import cast
from openai.types.chat import ChatCompletionToolParam from openai.types.chat import ChatCompletionToolParam
from pydantic import BaseModel
from llama_stack.apis.agents.openai_responses import ( from llama_stack.apis.agents.openai_responses import (
OpenAIResponseInput, OpenAIResponseInput,
OpenAIResponseInputItemList, OpenAIResponseInputItemList,
OpenAIResponseInputMessageContent,
OpenAIResponseInputMessageContentImage, OpenAIResponseInputMessageContentImage,
OpenAIResponseInputMessageContentText, OpenAIResponseInputMessageContentText,
OpenAIResponseInputTool, OpenAIResponseInputTool,
@ -24,10 +26,10 @@ from llama_stack.apis.agents.openai_responses import (
OpenAIResponseObjectStreamResponseCompleted, OpenAIResponseObjectStreamResponseCompleted,
OpenAIResponseObjectStreamResponseCreated, OpenAIResponseObjectStreamResponseCreated,
OpenAIResponseOutput, OpenAIResponseOutput,
OpenAIResponseOutputMessageContent,
OpenAIResponseOutputMessageContentOutputText, OpenAIResponseOutputMessageContentOutputText,
OpenAIResponseOutputMessageFunctionToolCall, OpenAIResponseOutputMessageFunctionToolCall,
OpenAIResponseOutputMessageWebSearchToolCall, OpenAIResponseOutputMessageWebSearchToolCall,
OpenAIResponsePreviousResponseWithInputItems,
) )
from llama_stack.apis.inference.inference import ( from llama_stack.apis.inference.inference import (
Inference, Inference,
@ -56,34 +58,46 @@ logger = get_logger(name=__name__, category="openai_responses")
OPENAI_RESPONSES_PREFIX = "openai_responses:" OPENAI_RESPONSES_PREFIX = "openai_responses:"
async def _convert_response_content_to_chat_content(
content: str | list[OpenAIResponseInputMessageContent] | list[OpenAIResponseOutputMessageContent],
) -> str | list[OpenAIChatCompletionContentPartParam]:
"""
Convert the content parts from an OpenAI Response API request into OpenAI Chat Completion content parts.
The content schemas of each API look similar, but are not exactly the same.
"""
if isinstance(content, str):
return content
converted_parts = []
for content_part in content:
if isinstance(content_part, OpenAIResponseInputMessageContentText):
converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part.text))
elif isinstance(content_part, OpenAIResponseOutputMessageContentOutputText):
converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part.text))
elif isinstance(content_part, OpenAIResponseInputMessageContentImage):
if content_part.image_url:
image_url = OpenAIImageURL(url=content_part.image_url, detail=content_part.detail)
converted_parts.append(OpenAIChatCompletionContentPartImageParam(image_url=image_url))
elif isinstance(content_part, str):
converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part))
else:
raise ValueError(
f"Llama Stack OpenAI Responses does not yet support content type '{type(content_part)}' in this context"
)
return converted_parts
async def _convert_response_input_to_chat_messages( async def _convert_response_input_to_chat_messages(
input: str | list[OpenAIResponseInput], input: str | list[OpenAIResponseInput],
) -> list[OpenAIMessageParam]: ) -> list[OpenAIMessageParam]:
"""
Convert the input from an OpenAI Response API request into OpenAI Chat Completion messages.
"""
messages: list[OpenAIMessageParam] = [] messages: list[OpenAIMessageParam] = []
content: str | list[OpenAIChatCompletionContentPartParam] = ""
if isinstance(input, list): if isinstance(input, list):
for input_message in input: for input_message in input:
if isinstance(input_message.content, list): content = await _convert_response_content_to_chat_content(input_message.content)
content = []
for input_message_content in input_message.content:
if isinstance(input_message_content, OpenAIResponseInputMessageContentText):
content.append(OpenAIChatCompletionContentPartTextParam(text=input_message_content.text))
elif isinstance(input_message_content, OpenAIResponseOutputMessageContentOutputText):
content.append(OpenAIChatCompletionContentPartTextParam(text=input_message_content.text))
elif isinstance(input_message_content, OpenAIResponseInputMessageContentImage):
if input_message_content.image_url:
image_url = OpenAIImageURL(
url=input_message_content.image_url, detail=input_message_content.detail
)
content.append(OpenAIChatCompletionContentPartImageParam(image_url=image_url))
elif isinstance(input_message_content, str):
content.append(OpenAIChatCompletionContentPartTextParam(text=input_message_content))
else:
raise ValueError(
f"Llama Stack OpenAI Responses does not yet support content type '{type(input_message_content)}' in this context"
)
else:
content = input_message.content
message_type = await _get_message_type_by_role(input_message.role) message_type = await _get_message_type_by_role(input_message.role)
if message_type is None: if message_type is None:
raise ValueError( raise ValueError(
@ -95,24 +109,26 @@ async def _convert_response_input_to_chat_messages(
return messages return messages
async def _openai_choices_to_output_messages(choices: list[OpenAIChoice]) -> list[OpenAIResponseMessage]: async def _convert_chat_choice_to_response_message(choice: OpenAIChoice) -> OpenAIResponseMessage:
output_messages = [] """
for choice in choices: Convert an OpenAI Chat Completion choice into an OpenAI Response output message.
output_content = "" """
if isinstance(choice.message.content, str): output_content = ""
output_content = choice.message.content if isinstance(choice.message.content, str):
elif isinstance(choice.message.content, OpenAIChatCompletionContentPartTextParam): output_content = choice.message.content
output_content = choice.message.content.text elif isinstance(choice.message.content, OpenAIChatCompletionContentPartTextParam):
# TODO: handle image content output_content = choice.message.content.text
output_messages.append( else:
OpenAIResponseMessage( raise ValueError(
id=f"msg_{uuid.uuid4()}", f"Llama Stack OpenAI Responses does not yet support output content type: {type(choice.message.content)}"
content=[OpenAIResponseOutputMessageContentOutputText(text=output_content)],
status="completed",
role="assistant",
)
) )
return output_messages
return OpenAIResponseMessage(
id=f"msg_{uuid.uuid4()}",
content=[OpenAIResponseOutputMessageContentOutputText(text=output_content)],
status="completed",
role="assistant",
)
async def _get_message_type_by_role(role: str): async def _get_message_type_by_role(role: str):
@ -125,6 +141,11 @@ async def _get_message_type_by_role(role: str):
return role_to_type.get(role) return role_to_type.get(role)
class OpenAIResponsePreviousResponseWithInputItems(BaseModel):
input_items: OpenAIResponseInputItemList
response: OpenAIResponseObject
class OpenAIResponsesImpl: class OpenAIResponsesImpl:
def __init__( def __init__(
self, self,
@ -159,9 +180,10 @@ class OpenAIResponsesImpl:
# new input items from the current request # new input items from the current request
if isinstance(input, str): if isinstance(input, str):
# Normalize input to a list of OpenAIResponseInputMessage objects new_input_items.append(OpenAIResponseMessage(content=input, role="user"))
input = [OpenAIResponseMessage(content=input, role="user")] else:
new_input_items.extend(input) new_input_items.extend(input)
input = new_input_items input = new_input_items
return input return input
@ -231,30 +253,26 @@ class OpenAIResponsesImpl:
chat_response = OpenAIChatCompletion(**chat_response.model_dump()) chat_response = OpenAIChatCompletion(**chat_response.model_dump())
output_messages: list[OpenAIResponseOutput] = [] output_messages: list[OpenAIResponseOutput] = []
# TODO: should we check more than choices[0] here? for choice in chat_response.choices:
if chat_response.choices[0].message.tool_calls and tools: if choice.message.tool_calls and tools:
# TODO: Should we support a mix of custom and builtin tools? # Assume if the first tool is a function, all tools are functions
# in other words, should we check for more than tools[0]? if isinstance(tools[0], OpenAIResponseInputToolFunction):
if isinstance(tools[0], OpenAIResponseInputToolFunction): for tool_call in choice.message.tool_calls:
choice = chat_response.choices[0] output_messages.append(
for tool_call in choice.message.tool_calls: OpenAIResponseOutputMessageFunctionToolCall(
output_messages.append( arguments=tool_call.function.arguments or "",
OpenAIResponseOutputMessageFunctionToolCall( call_id=tool_call.id,
arguments=tool_call.function.arguments or "", name=tool_call.function.name or "",
call_id=tool_call.id, id=f"fc_{uuid.uuid4()}",
name=tool_call.function.name or "", status="completed",
id=f"fc_{uuid.uuid4()}", )
status="completed",
) )
else:
output_messages.extend(
await self._execute_tool_and_return_final_output(model, stream, choice, messages, temperature)
) )
else: else:
output_messages.extend( output_messages.append(await _convert_chat_choice_to_response_message(choice))
await self._execute_tool_and_return_final_output(
model, stream, chat_response, messages, temperature
)
)
else:
output_messages.extend(await _openai_choices_to_output_messages(chat_response.choices))
response = OpenAIResponseObject( response = OpenAIResponseObject(
created_at=chat_response.created, created_at=chat_response.created,
id=f"resp-{uuid.uuid4()}", id=f"resp-{uuid.uuid4()}",
@ -347,12 +365,11 @@ class OpenAIResponsesImpl:
self, self,
model_id: str, model_id: str,
stream: bool, stream: bool,
chat_response: OpenAIChatCompletion, choice: OpenAIChoice,
messages: list[OpenAIMessageParam], messages: list[OpenAIMessageParam],
temperature: float, temperature: float,
) -> list[OpenAIResponseOutput]: ) -> list[OpenAIResponseOutput]:
output_messages: list[OpenAIResponseOutput] = [] output_messages: list[OpenAIResponseOutput] = []
choice = chat_response.choices[0]
# If the choice is not an assistant message, we don't need to execute any tools # If the choice is not an assistant message, we don't need to execute any tools
if not isinstance(choice.message, OpenAIAssistantMessageParam): if not isinstance(choice.message, OpenAIAssistantMessageParam):
@ -362,6 +379,9 @@ class OpenAIResponsesImpl:
if not choice.message.tool_calls: if not choice.message.tool_calls:
return output_messages return output_messages
# Copy the messages list to avoid mutating the original list
messages = messages.copy()
# Add the assistant message with tool_calls response to the messages list # Add the assistant message with tool_calls response to the messages list
messages.append(choice.message) messages.append(choice.message)
@ -407,7 +427,9 @@ class OpenAIResponsesImpl:
) )
# type cast to appease mypy # type cast to appease mypy
tool_results_chat_response = cast(OpenAIChatCompletion, tool_results_chat_response) tool_results_chat_response = cast(OpenAIChatCompletion, tool_results_chat_response)
tool_final_outputs = await _openai_choices_to_output_messages(tool_results_chat_response.choices) tool_final_outputs = [
await _convert_chat_choice_to_response_message(choice) for choice in tool_results_chat_response.choices
]
# TODO: Wire in annotations with URLs, titles, etc to these output messages # TODO: Wire in annotations with URLs, titles, etc to these output messages
output_messages.extend(tool_final_outputs) output_messages.extend(tool_final_outputs)
return output_messages return output_messages

View file

@ -12,19 +12,11 @@ from llama_stack.apis.inference.inference import (
OpenAIChatCompletion, OpenAIChatCompletion,
) )
FIXTURES_DIR = os.path.dirname(os.path.abspath(__file__))
def load_chat_completion_fixture(filename: str) -> OpenAIChatCompletion: def load_chat_completion_fixture(filename: str) -> OpenAIChatCompletion:
""" fixture_path = os.path.join(FIXTURES_DIR, filename)
Load a YAML fixture file and convert it to an OpenAIChatCompletion object.
Args:
filename: Name of the YAML file (without path)
Returns:
OpenAIChatCompletion object
"""
fixtures_dir = os.path.dirname(os.path.abspath(__file__))
fixture_path = os.path.join(fixtures_dir, filename)
with open(fixture_path) as f: with open(fixture_path) as f:
data = yaml.safe_load(f) data = yaml.safe_load(f)

View file

@ -16,7 +16,6 @@ from llama_stack.apis.agents.openai_responses import (
OpenAIResponseObject, OpenAIResponseObject,
OpenAIResponseOutputMessageContentOutputText, OpenAIResponseOutputMessageContentOutputText,
OpenAIResponseOutputMessageWebSearchToolCall, OpenAIResponseOutputMessageWebSearchToolCall,
OpenAIResponsePreviousResponseWithInputItems,
) )
from llama_stack.apis.inference.inference import ( from llama_stack.apis.inference.inference import (
OpenAIAssistantMessageParam, OpenAIAssistantMessageParam,
@ -26,6 +25,7 @@ from llama_stack.apis.inference.inference import (
) )
from llama_stack.apis.tools.tools import Tool, ToolGroups, ToolInvocationResult, ToolParameter, ToolRuntime from llama_stack.apis.tools.tools import Tool, ToolGroups, ToolInvocationResult, ToolParameter, ToolRuntime
from llama_stack.providers.inline.agents.meta_reference.openai_responses import ( from llama_stack.providers.inline.agents.meta_reference.openai_responses import (
OpenAIResponsePreviousResponseWithInputItems,
OpenAIResponsesImpl, OpenAIResponsesImpl,
) )
from llama_stack.providers.utils.kvstore import KVStore from llama_stack.providers.utils.kvstore import KVStore