diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index 15342de86..f1bde880b 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -6471,11 +6471,47 @@ { "$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall" }, + { + "$ref": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputFunctionToolCallOutput" + }, { "$ref": "#/components/schemas/OpenAIResponseMessage" } ] }, + "OpenAIResponseInputFunctionToolCallOutput": { + "type": "object", + "properties": { + "call_id": { + "type": "string" + }, + "output": { + "type": "string" + }, + "type": { + "type": "string", + "const": "function_call_output", + "default": "function_call_output" + }, + "id": { + "type": "string" + }, + "status": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "call_id", + "output", + "type" + ], + "title": "OpenAIResponseInputFunctionToolCallOutput", + "description": "This represents the output of a function call that gets passed back to the model." + }, "OpenAIResponseInputMessageContent": { "oneOf": [ { @@ -6764,6 +6800,41 @@ ], "title": "OpenAIResponseOutputMessageContentOutputText" }, + "OpenAIResponseOutputMessageFunctionToolCall": { + "type": "object", + "properties": { + "arguments": { + "type": "string" + }, + "call_id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "type": { + "type": "string", + "const": "function_call", + "default": "function_call" + }, + "id": { + "type": "string" + }, + "status": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "arguments", + "call_id", + "name", + "type", + "id", + "status" + ], + "title": "OpenAIResponseOutputMessageFunctionToolCall" + }, "OpenAIResponseOutputMessageWebSearchToolCall": { "type": "object", "properties": { @@ -6934,41 +7005,6 @@ } } }, - "OpenAIResponseOutputMessageFunctionToolCall": { - "type": "object", - "properties": { - "arguments": { - "type": "string" - }, - "call_id": { - "type": "string" - }, - "name": { - "type": "string" - }, - "type": { - "type": "string", - "const": "function_call", - "default": "function_call" - }, - "id": { - "type": "string" - }, - "status": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "arguments", - "call_id", - "name", - "type", - "id", - "status" - ], - "title": "OpenAIResponseOutputMessageFunctionToolCall" - }, "OpenAIResponseObjectStream": { "oneOf": [ { diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index bc71ce915..10b5deec2 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -4537,7 +4537,34 @@ components: OpenAIResponseInput: oneOf: - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' - $ref: '#/components/schemas/OpenAIResponseMessage' + "OpenAIResponseInputFunctionToolCallOutput": + type: object + properties: + call_id: + type: string + output: + type: string + type: + type: string + const: function_call_output + default: function_call_output + id: + type: string + status: + type: string + additionalProperties: false + required: + - call_id + - output + - type + title: >- + OpenAIResponseInputFunctionToolCallOutput + description: >- + This represents the output of a function call that gets passed back to the + model. OpenAIResponseInputMessageContent: oneOf: - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText' @@ -4721,6 +4748,33 @@ components: - type title: >- OpenAIResponseOutputMessageContentOutputText + "OpenAIResponseOutputMessageFunctionToolCall": + type: object + properties: + arguments: + type: string + call_id: + type: string + name: + type: string + type: + type: string + const: function_call + default: function_call + id: + type: string + status: + type: string + additionalProperties: false + required: + - arguments + - call_id + - name + - type + - id + - status + title: >- + OpenAIResponseOutputMessageFunctionToolCall "OpenAIResponseOutputMessageWebSearchToolCall": type: object properties: @@ -4840,33 +4894,6 @@ components: message: '#/components/schemas/OpenAIResponseMessage' web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' - "OpenAIResponseOutputMessageFunctionToolCall": - type: object - properties: - arguments: - type: string - call_id: - type: string - name: - type: string - type: - type: string - const: function_call - default: function_call - id: - type: string - status: - type: string - additionalProperties: false - required: - - arguments - - call_id - - name - - type - - id - - status - title: >- - OpenAIResponseOutputMessageFunctionToolCall OpenAIResponseObjectStream: oneOf: - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated' diff --git a/llama_stack/apis/agents/openai_responses.py b/llama_stack/apis/agents/openai_responses.py index 511cf4f86..dcf0c7f9c 100644 --- a/llama_stack/apis/agents/openai_responses.py +++ b/llama_stack/apis/agents/openai_responses.py @@ -130,9 +130,24 @@ OpenAIResponseObjectStream = Annotated[ register_schema(OpenAIResponseObjectStream, name="OpenAIResponseObjectStream") +@json_schema_type +class OpenAIResponseInputFunctionToolCallOutput(BaseModel): + """ + This represents the output of a function call that gets passed back to the model. + """ + + call_id: str + output: str + type: Literal["function_call_output"] = "function_call_output" + id: str | None = None + status: str | None = None + + OpenAIResponseInput = Annotated[ # Responses API allows output messages to be passed in as input OpenAIResponseOutputMessageWebSearchToolCall + | OpenAIResponseOutputMessageFunctionToolCall + | OpenAIResponseInputFunctionToolCallOutput | # Fallback to the generic message type as a last resort OpenAIResponseMessage, diff --git a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py index 4d2f40226..b2853e2c3 100644 --- a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py +++ b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py @@ -14,6 +14,7 @@ from pydantic import BaseModel from llama_stack.apis.agents.openai_responses import ( OpenAIResponseInput, + OpenAIResponseInputFunctionToolCallOutput, OpenAIResponseInputItemList, OpenAIResponseInputMessageContent, OpenAIResponseInputMessageContentImage, @@ -38,6 +39,7 @@ from llama_stack.apis.inference.inference import ( OpenAIChatCompletionContentPartImageParam, OpenAIChatCompletionContentPartParam, OpenAIChatCompletionContentPartTextParam, + OpenAIChatCompletionToolCall, OpenAIChatCompletionToolCallFunction, OpenAIChoice, OpenAIDeveloperMessageParam, @@ -97,13 +99,31 @@ async def _convert_response_input_to_chat_messages( messages: list[OpenAIMessageParam] = [] if isinstance(input, list): for input_message in input: - content = await _convert_response_content_to_chat_content(input_message.content) - message_type = await _get_message_type_by_role(input_message.role) - if message_type is None: - raise ValueError( - f"Llama Stack OpenAI Responses does not yet support message role '{input_message.role}' in this context" + if isinstance(input_message, OpenAIResponseInputFunctionToolCallOutput): + messages.append( + OpenAIToolMessageParam( + content=input_message.output, + tool_call_id=input_message.call_id, + ) ) - messages.append(message_type(content=content)) + elif isinstance(input_message, OpenAIResponseOutputMessageFunctionToolCall): + tool_call = OpenAIChatCompletionToolCall( + index=0, + id=input_message.call_id, + function=OpenAIChatCompletionToolCallFunction( + name=input_message.name, + arguments=input_message.arguments, + ), + ) + messages.append(OpenAIAssistantMessageParam(tool_calls=[tool_call])) + else: + content = await _convert_response_content_to_chat_content(input_message.content) + message_type = await _get_message_type_by_role(input_message.role) + if message_type is None: + raise ValueError( + f"Llama Stack OpenAI Responses does not yet support message role '{input_message.role}' in this context" + ) + messages.append(message_type(content=content)) else: messages.append(OpenAIUserMessageParam(content=input)) return messages @@ -222,6 +242,7 @@ class OpenAIResponsesImpl: # TODO: refactor this into a separate method that handles streaming chat_response_id = "" chat_response_content = [] + chat_response_tool_calls: dict[int, OpenAIChatCompletionToolCall] = {} # TODO: these chunk_ fields are hacky and only take the last chunk into account chunk_created = 0 chunk_model = "" @@ -235,7 +256,26 @@ class OpenAIResponsesImpl: chat_response_content.append(chunk_choice.delta.content or "") if chunk_choice.finish_reason: chunk_finish_reason = chunk_choice.finish_reason - assistant_message = OpenAIAssistantMessageParam(content="".join(chat_response_content)) + + if chunk_choice.delta.tool_calls: + for tool_call in chunk_choice.delta.tool_calls: + if tool_call.index not in chat_response_tool_calls: + chat_response_tool_calls[tool_call.index] = OpenAIChatCompletionToolCall( + **tool_call.model_dump() + ) + chat_response_tool_calls[tool_call.index].function.arguments = ( + chat_response_tool_calls[tool_call.index].function.arguments + + tool_call.function.arguments + ) + + if chat_response_tool_calls: + tool_calls = [chat_response_tool_calls[i] for i in sorted(chat_response_tool_calls.keys())] + else: + tool_calls = None + assistant_message = OpenAIAssistantMessageParam( + content="".join(chat_response_content), + tool_calls=tool_calls, + ) chat_response = OpenAIChatCompletion( id=chat_response_id, choices=[