Responses API: Finish wiring up function tool calls

This finishes the plumbing for function tool call and adds a basic
verification test (that passes for me locally against Llama 4 Scout in
vllm).

Signed-off-by: Ben Browning <bbrownin@redhat.com>
This commit is contained in:
Ben Browning 2025-04-30 17:01:00 -04:00
parent 1990df2c50
commit 924213a689
6 changed files with 148 additions and 6 deletions

View file

@ -77,8 +77,20 @@ class OpenAIResponseOutputMessageWebSearchToolCall(BaseModel):
type: Literal["web_search_call"] = "web_search_call"
@json_schema_type
class OpenAIResponseOutputMessageFunctionToolCall(BaseModel):
arguments: str
call_id: str
name: str
type: Literal["function_call"] = "function_call"
id: str
status: str
OpenAIResponseOutput = Annotated[
OpenAIResponseMessage | OpenAIResponseOutputMessageWebSearchToolCall,
OpenAIResponseMessage
| OpenAIResponseOutputMessageWebSearchToolCall
| OpenAIResponseOutputMessageFunctionToolCall,
Field(discriminator="type"),
]
register_schema(OpenAIResponseOutput, name="OpenAIResponseOutput")

View file

@ -17,6 +17,7 @@ from llama_stack.apis.agents.openai_responses import (
OpenAIResponseInputMessageContentImage,
OpenAIResponseInputMessageContentText,
OpenAIResponseInputTool,
OpenAIResponseInputToolFunction,
OpenAIResponseMessage,
OpenAIResponseObject,
OpenAIResponseObjectStream,
@ -24,6 +25,7 @@ from llama_stack.apis.agents.openai_responses import (
OpenAIResponseObjectStreamResponseCreated,
OpenAIResponseOutput,
OpenAIResponseOutputMessageContentOutputText,
OpenAIResponseOutputMessageFunctionToolCall,
OpenAIResponseOutputMessageWebSearchToolCall,
OpenAIResponsePreviousResponseWithInputItems,
)
@ -221,10 +223,28 @@ class OpenAIResponsesImpl:
chat_response = OpenAIChatCompletion(**chat_response.model_dump())
output_messages: list[OpenAIResponseOutput] = []
if chat_response.choices[0].message.tool_calls:
output_messages.extend(
await self._execute_tool_and_return_final_output(model, stream, chat_response, messages, temperature)
)
# TODO: should we check more than choices[0] here?
if chat_response.choices[0].message.tool_calls and tools:
# TODO: Should we support a mix of custom and builtin tools?
# in other words, should we check for more than tools[0]?
if isinstance(tools[0], OpenAIResponseInputToolFunction):
choice = chat_response.choices[0]
for tool_call in choice.message.tool_calls:
output_messages.append(
OpenAIResponseOutputMessageFunctionToolCall(
arguments=tool_call.function.arguments or "",
call_id=tool_call.id,
name=tool_call.function.name or "",
id=f"fc_{uuid.uuid4()}",
status="completed",
)
)
else:
output_messages.extend(
await self._execute_tool_and_return_final_output(
model, stream, chat_response, messages, temperature
)
)
else:
output_messages.extend(await _openai_choices_to_output_messages(chat_response.choices))
response = OpenAIResponseObject(