diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 15342de86..f1bde880b 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -6471,11 +6471,47 @@
{
"$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
},
+ {
+ "$ref": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall"
+ },
+ {
+ "$ref": "#/components/schemas/OpenAIResponseInputFunctionToolCallOutput"
+ },
{
"$ref": "#/components/schemas/OpenAIResponseMessage"
}
]
},
+ "OpenAIResponseInputFunctionToolCallOutput": {
+ "type": "object",
+ "properties": {
+ "call_id": {
+ "type": "string"
+ },
+ "output": {
+ "type": "string"
+ },
+ "type": {
+ "type": "string",
+ "const": "function_call_output",
+ "default": "function_call_output"
+ },
+ "id": {
+ "type": "string"
+ },
+ "status": {
+ "type": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "call_id",
+ "output",
+ "type"
+ ],
+ "title": "OpenAIResponseInputFunctionToolCallOutput",
+ "description": "This represents the output of a function call that gets passed back to the model."
+ },
"OpenAIResponseInputMessageContent": {
"oneOf": [
{
@@ -6764,6 +6800,41 @@
],
"title": "OpenAIResponseOutputMessageContentOutputText"
},
+ "OpenAIResponseOutputMessageFunctionToolCall": {
+ "type": "object",
+ "properties": {
+ "arguments": {
+ "type": "string"
+ },
+ "call_id": {
+ "type": "string"
+ },
+ "name": {
+ "type": "string"
+ },
+ "type": {
+ "type": "string",
+ "const": "function_call",
+ "default": "function_call"
+ },
+ "id": {
+ "type": "string"
+ },
+ "status": {
+ "type": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "arguments",
+ "call_id",
+ "name",
+ "type",
+ "id",
+ "status"
+ ],
+ "title": "OpenAIResponseOutputMessageFunctionToolCall"
+ },
"OpenAIResponseOutputMessageWebSearchToolCall": {
"type": "object",
"properties": {
@@ -6934,41 +7005,6 @@
}
}
},
- "OpenAIResponseOutputMessageFunctionToolCall": {
- "type": "object",
- "properties": {
- "arguments": {
- "type": "string"
- },
- "call_id": {
- "type": "string"
- },
- "name": {
- "type": "string"
- },
- "type": {
- "type": "string",
- "const": "function_call",
- "default": "function_call"
- },
- "id": {
- "type": "string"
- },
- "status": {
- "type": "string"
- }
- },
- "additionalProperties": false,
- "required": [
- "arguments",
- "call_id",
- "name",
- "type",
- "id",
- "status"
- ],
- "title": "OpenAIResponseOutputMessageFunctionToolCall"
- },
"OpenAIResponseObjectStream": {
"oneOf": [
{
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index bc71ce915..10b5deec2 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -4537,7 +4537,34 @@ components:
OpenAIResponseInput:
oneOf:
- $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+ - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+ - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
- $ref: '#/components/schemas/OpenAIResponseMessage'
+ "OpenAIResponseInputFunctionToolCallOutput":
+ type: object
+ properties:
+ call_id:
+ type: string
+ output:
+ type: string
+ type:
+ type: string
+ const: function_call_output
+ default: function_call_output
+ id:
+ type: string
+ status:
+ type: string
+ additionalProperties: false
+ required:
+ - call_id
+ - output
+ - type
+ title: >-
+ OpenAIResponseInputFunctionToolCallOutput
+ description: >-
+ This represents the output of a function call that gets passed back to the
+ model.
OpenAIResponseInputMessageContent:
oneOf:
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
@@ -4721,6 +4748,33 @@ components:
- type
title: >-
OpenAIResponseOutputMessageContentOutputText
+ "OpenAIResponseOutputMessageFunctionToolCall":
+ type: object
+ properties:
+ arguments:
+ type: string
+ call_id:
+ type: string
+ name:
+ type: string
+ type:
+ type: string
+ const: function_call
+ default: function_call
+ id:
+ type: string
+ status:
+ type: string
+ additionalProperties: false
+ required:
+ - arguments
+ - call_id
+ - name
+ - type
+ - id
+ - status
+ title: >-
+ OpenAIResponseOutputMessageFunctionToolCall
"OpenAIResponseOutputMessageWebSearchToolCall":
type: object
properties:
@@ -4840,33 +4894,6 @@ components:
message: '#/components/schemas/OpenAIResponseMessage'
web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
- "OpenAIResponseOutputMessageFunctionToolCall":
- type: object
- properties:
- arguments:
- type: string
- call_id:
- type: string
- name:
- type: string
- type:
- type: string
- const: function_call
- default: function_call
- id:
- type: string
- status:
- type: string
- additionalProperties: false
- required:
- - arguments
- - call_id
- - name
- - type
- - id
- - status
- title: >-
- OpenAIResponseOutputMessageFunctionToolCall
OpenAIResponseObjectStream:
oneOf:
- $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
diff --git a/llama_stack/apis/agents/openai_responses.py b/llama_stack/apis/agents/openai_responses.py
index 511cf4f86..dcf0c7f9c 100644
--- a/llama_stack/apis/agents/openai_responses.py
+++ b/llama_stack/apis/agents/openai_responses.py
@@ -130,9 +130,24 @@ OpenAIResponseObjectStream = Annotated[
register_schema(OpenAIResponseObjectStream, name="OpenAIResponseObjectStream")
+@json_schema_type
+class OpenAIResponseInputFunctionToolCallOutput(BaseModel):
+ """
+ This represents the output of a function call that gets passed back to the model.
+ """
+
+ call_id: str
+ output: str
+ type: Literal["function_call_output"] = "function_call_output"
+ id: str | None = None
+ status: str | None = None
+
+
OpenAIResponseInput = Annotated[
# Responses API allows output messages to be passed in as input
OpenAIResponseOutputMessageWebSearchToolCall
+ | OpenAIResponseOutputMessageFunctionToolCall
+ | OpenAIResponseInputFunctionToolCallOutput
|
# Fallback to the generic message type as a last resort
OpenAIResponseMessage,
diff --git a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
index 4d2f40226..b2853e2c3 100644
--- a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
+++ b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
@@ -14,6 +14,7 @@ from pydantic import BaseModel
from llama_stack.apis.agents.openai_responses import (
OpenAIResponseInput,
+ OpenAIResponseInputFunctionToolCallOutput,
OpenAIResponseInputItemList,
OpenAIResponseInputMessageContent,
OpenAIResponseInputMessageContentImage,
@@ -38,6 +39,7 @@ from llama_stack.apis.inference.inference import (
OpenAIChatCompletionContentPartImageParam,
OpenAIChatCompletionContentPartParam,
OpenAIChatCompletionContentPartTextParam,
+ OpenAIChatCompletionToolCall,
OpenAIChatCompletionToolCallFunction,
OpenAIChoice,
OpenAIDeveloperMessageParam,
@@ -97,13 +99,31 @@ async def _convert_response_input_to_chat_messages(
messages: list[OpenAIMessageParam] = []
if isinstance(input, list):
for input_message in input:
- content = await _convert_response_content_to_chat_content(input_message.content)
- message_type = await _get_message_type_by_role(input_message.role)
- if message_type is None:
- raise ValueError(
- f"Llama Stack OpenAI Responses does not yet support message role '{input_message.role}' in this context"
+ if isinstance(input_message, OpenAIResponseInputFunctionToolCallOutput):
+ messages.append(
+ OpenAIToolMessageParam(
+ content=input_message.output,
+ tool_call_id=input_message.call_id,
+ )
)
- messages.append(message_type(content=content))
+ elif isinstance(input_message, OpenAIResponseOutputMessageFunctionToolCall):
+ tool_call = OpenAIChatCompletionToolCall(
+ index=0,
+ id=input_message.call_id,
+ function=OpenAIChatCompletionToolCallFunction(
+ name=input_message.name,
+ arguments=input_message.arguments,
+ ),
+ )
+ messages.append(OpenAIAssistantMessageParam(tool_calls=[tool_call]))
+ else:
+ content = await _convert_response_content_to_chat_content(input_message.content)
+ message_type = await _get_message_type_by_role(input_message.role)
+ if message_type is None:
+ raise ValueError(
+ f"Llama Stack OpenAI Responses does not yet support message role '{input_message.role}' in this context"
+ )
+ messages.append(message_type(content=content))
else:
messages.append(OpenAIUserMessageParam(content=input))
return messages
@@ -222,6 +242,7 @@ class OpenAIResponsesImpl:
# TODO: refactor this into a separate method that handles streaming
chat_response_id = ""
chat_response_content = []
+ chat_response_tool_calls: dict[int, OpenAIChatCompletionToolCall] = {}
# TODO: these chunk_ fields are hacky and only take the last chunk into account
chunk_created = 0
chunk_model = ""
@@ -235,7 +256,26 @@ class OpenAIResponsesImpl:
chat_response_content.append(chunk_choice.delta.content or "")
if chunk_choice.finish_reason:
chunk_finish_reason = chunk_choice.finish_reason
- assistant_message = OpenAIAssistantMessageParam(content="".join(chat_response_content))
+
+ if chunk_choice.delta.tool_calls:
+ for tool_call in chunk_choice.delta.tool_calls:
+ if tool_call.index not in chat_response_tool_calls:
+ chat_response_tool_calls[tool_call.index] = OpenAIChatCompletionToolCall(
+ **tool_call.model_dump()
+ )
+ chat_response_tool_calls[tool_call.index].function.arguments = (
+ chat_response_tool_calls[tool_call.index].function.arguments
+ + tool_call.function.arguments
+ )
+
+ if chat_response_tool_calls:
+ tool_calls = [chat_response_tool_calls[i] for i in sorted(chat_response_tool_calls.keys())]
+ else:
+ tool_calls = None
+ assistant_message = OpenAIAssistantMessageParam(
+ content="".join(chat_response_content),
+ tool_calls=tool_calls,
+ )
chat_response = OpenAIChatCompletion(
id=chat_response_id,
choices=[