From 4df8caab41cfe1a4a5769864aaaf35a2a052da65 Mon Sep 17 00:00:00 2001
From: Ben Browning <bbrownin@redhat.com>
Date: Thu, 8 May 2025 16:21:15 -0400
Subject: [PATCH] Fixes for multi-turn tool calls in Responses API

Testing with Codex locally, I found another issue in how we were
plumbing through tool calls in multi-turn scenarios and the way tool
call inputs and outputs from previous turns were passed back into
future turns.

This led me to realize we were missing the function tool call output
type in the Responses API, so this adds that and plumbs handling of it
through the responses API to chat completion conversion code.

Signed-off-by: Ben Browning <bbrownin@redhat.com>
---
 docs/_static/llama-stack-spec.html            | 106 ++++++++++++------
 docs/_static/llama-stack-spec.yaml            |  81 ++++++++-----
 llama_stack/apis/agents/openai_responses.py   |  15 +++
 .../agents/meta_reference/openai_responses.py |  54 +++++++--
 4 files changed, 187 insertions(+), 69 deletions(-)

diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 15342de86..f1bde880b 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -6471,11 +6471,47 @@
                     {
                         "$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
                     },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseInputFunctionToolCallOutput"
+                    },
                     {
                         "$ref": "#/components/schemas/OpenAIResponseMessage"
                     }
                 ]
             },
+            "OpenAIResponseInputFunctionToolCallOutput": {
+                "type": "object",
+                "properties": {
+                    "call_id": {
+                        "type": "string"
+                    },
+                    "output": {
+                        "type": "string"
+                    },
+                    "type": {
+                        "type": "string",
+                        "const": "function_call_output",
+                        "default": "function_call_output"
+                    },
+                    "id": {
+                        "type": "string"
+                    },
+                    "status": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "call_id",
+                    "output",
+                    "type"
+                ],
+                "title": "OpenAIResponseInputFunctionToolCallOutput",
+                "description": "This represents the output of a function call that gets passed back to the model."
+            },
             "OpenAIResponseInputMessageContent": {
                 "oneOf": [
                     {
@@ -6764,6 +6800,41 @@
                 ],
                 "title": "OpenAIResponseOutputMessageContentOutputText"
             },
+            "OpenAIResponseOutputMessageFunctionToolCall": {
+                "type": "object",
+                "properties": {
+                    "arguments": {
+                        "type": "string"
+                    },
+                    "call_id": {
+                        "type": "string"
+                    },
+                    "name": {
+                        "type": "string"
+                    },
+                    "type": {
+                        "type": "string",
+                        "const": "function_call",
+                        "default": "function_call"
+                    },
+                    "id": {
+                        "type": "string"
+                    },
+                    "status": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "arguments",
+                    "call_id",
+                    "name",
+                    "type",
+                    "id",
+                    "status"
+                ],
+                "title": "OpenAIResponseOutputMessageFunctionToolCall"
+            },
             "OpenAIResponseOutputMessageWebSearchToolCall": {
                 "type": "object",
                 "properties": {
@@ -6934,41 +7005,6 @@
                     }
                 }
             },
-            "OpenAIResponseOutputMessageFunctionToolCall": {
-                "type": "object",
-                "properties": {
-                    "arguments": {
-                        "type": "string"
-                    },
-                    "call_id": {
-                        "type": "string"
-                    },
-                    "name": {
-                        "type": "string"
-                    },
-                    "type": {
-                        "type": "string",
-                        "const": "function_call",
-                        "default": "function_call"
-                    },
-                    "id": {
-                        "type": "string"
-                    },
-                    "status": {
-                        "type": "string"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "arguments",
-                    "call_id",
-                    "name",
-                    "type",
-                    "id",
-                    "status"
-                ],
-                "title": "OpenAIResponseOutputMessageFunctionToolCall"
-            },
             "OpenAIResponseObjectStream": {
                 "oneOf": [
                     {
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index bc71ce915..10b5deec2 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -4537,7 +4537,34 @@ components:
     OpenAIResponseInput:
       oneOf:
         - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
+        - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
+        - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
         - $ref: '#/components/schemas/OpenAIResponseMessage'
+    "OpenAIResponseInputFunctionToolCallOutput":
+      type: object
+      properties:
+        call_id:
+          type: string
+        output:
+          type: string
+        type:
+          type: string
+          const: function_call_output
+          default: function_call_output
+        id:
+          type: string
+        status:
+          type: string
+      additionalProperties: false
+      required:
+        - call_id
+        - output
+        - type
+      title: >-
+        OpenAIResponseInputFunctionToolCallOutput
+      description: >-
+        This represents the output of a function call that gets passed back to the
+        model.
     OpenAIResponseInputMessageContent:
       oneOf:
         - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
@@ -4721,6 +4748,33 @@ components:
         - type
       title: >-
         OpenAIResponseOutputMessageContentOutputText
+    "OpenAIResponseOutputMessageFunctionToolCall":
+      type: object
+      properties:
+        arguments:
+          type: string
+        call_id:
+          type: string
+        name:
+          type: string
+        type:
+          type: string
+          const: function_call
+          default: function_call
+        id:
+          type: string
+        status:
+          type: string
+      additionalProperties: false
+      required:
+        - arguments
+        - call_id
+        - name
+        - type
+        - id
+        - status
+      title: >-
+        OpenAIResponseOutputMessageFunctionToolCall
     "OpenAIResponseOutputMessageWebSearchToolCall":
       type: object
       properties:
@@ -4840,33 +4894,6 @@ components:
           message: '#/components/schemas/OpenAIResponseMessage'
           web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
           function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
-    "OpenAIResponseOutputMessageFunctionToolCall":
-      type: object
-      properties:
-        arguments:
-          type: string
-        call_id:
-          type: string
-        name:
-          type: string
-        type:
-          type: string
-          const: function_call
-          default: function_call
-        id:
-          type: string
-        status:
-          type: string
-      additionalProperties: false
-      required:
-        - arguments
-        - call_id
-        - name
-        - type
-        - id
-        - status
-      title: >-
-        OpenAIResponseOutputMessageFunctionToolCall
     OpenAIResponseObjectStream:
       oneOf:
         - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
diff --git a/llama_stack/apis/agents/openai_responses.py b/llama_stack/apis/agents/openai_responses.py
index 511cf4f86..dcf0c7f9c 100644
--- a/llama_stack/apis/agents/openai_responses.py
+++ b/llama_stack/apis/agents/openai_responses.py
@@ -130,9 +130,24 @@ OpenAIResponseObjectStream = Annotated[
 register_schema(OpenAIResponseObjectStream, name="OpenAIResponseObjectStream")
 
 
+@json_schema_type
+class OpenAIResponseInputFunctionToolCallOutput(BaseModel):
+    """
+    This represents the output of a function call that gets passed back to the model.
+    """
+
+    call_id: str
+    output: str
+    type: Literal["function_call_output"] = "function_call_output"
+    id: str | None = None
+    status: str | None = None
+
+
 OpenAIResponseInput = Annotated[
     # Responses API allows output messages to be passed in as input
     OpenAIResponseOutputMessageWebSearchToolCall
+    | OpenAIResponseOutputMessageFunctionToolCall
+    | OpenAIResponseInputFunctionToolCallOutput
     |
     # Fallback to the generic message type as a last resort
     OpenAIResponseMessage,
diff --git a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
index 4d2f40226..b2853e2c3 100644
--- a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
+++ b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
@@ -14,6 +14,7 @@ from pydantic import BaseModel
 
 from llama_stack.apis.agents.openai_responses import (
     OpenAIResponseInput,
+    OpenAIResponseInputFunctionToolCallOutput,
     OpenAIResponseInputItemList,
     OpenAIResponseInputMessageContent,
     OpenAIResponseInputMessageContentImage,
@@ -38,6 +39,7 @@ from llama_stack.apis.inference.inference import (
     OpenAIChatCompletionContentPartImageParam,
     OpenAIChatCompletionContentPartParam,
     OpenAIChatCompletionContentPartTextParam,
+    OpenAIChatCompletionToolCall,
     OpenAIChatCompletionToolCallFunction,
     OpenAIChoice,
     OpenAIDeveloperMessageParam,
@@ -97,13 +99,31 @@ async def _convert_response_input_to_chat_messages(
     messages: list[OpenAIMessageParam] = []
     if isinstance(input, list):
         for input_message in input:
-            content = await _convert_response_content_to_chat_content(input_message.content)
-            message_type = await _get_message_type_by_role(input_message.role)
-            if message_type is None:
-                raise ValueError(
-                    f"Llama Stack OpenAI Responses does not yet support message role '{input_message.role}' in this context"
+            if isinstance(input_message, OpenAIResponseInputFunctionToolCallOutput):
+                messages.append(
+                    OpenAIToolMessageParam(
+                        content=input_message.output,
+                        tool_call_id=input_message.call_id,
+                    )
                 )
-            messages.append(message_type(content=content))
+            elif isinstance(input_message, OpenAIResponseOutputMessageFunctionToolCall):
+                tool_call = OpenAIChatCompletionToolCall(
+                    index=0,
+                    id=input_message.call_id,
+                    function=OpenAIChatCompletionToolCallFunction(
+                        name=input_message.name,
+                        arguments=input_message.arguments,
+                    ),
+                )
+                messages.append(OpenAIAssistantMessageParam(tool_calls=[tool_call]))
+            else:
+                content = await _convert_response_content_to_chat_content(input_message.content)
+                message_type = await _get_message_type_by_role(input_message.role)
+                if message_type is None:
+                    raise ValueError(
+                        f"Llama Stack OpenAI Responses does not yet support message role '{input_message.role}' in this context"
+                    )
+                messages.append(message_type(content=content))
     else:
         messages.append(OpenAIUserMessageParam(content=input))
     return messages
@@ -222,6 +242,7 @@ class OpenAIResponsesImpl:
             # TODO: refactor this into a separate method that handles streaming
             chat_response_id = ""
             chat_response_content = []
+            chat_response_tool_calls: dict[int, OpenAIChatCompletionToolCall] = {}
             # TODO: these chunk_ fields are hacky and only take the last chunk into account
             chunk_created = 0
             chunk_model = ""
@@ -235,7 +256,26 @@ class OpenAIResponsesImpl:
                     chat_response_content.append(chunk_choice.delta.content or "")
                     if chunk_choice.finish_reason:
                         chunk_finish_reason = chunk_choice.finish_reason
-            assistant_message = OpenAIAssistantMessageParam(content="".join(chat_response_content))
+
+                    if chunk_choice.delta.tool_calls:
+                        for tool_call in chunk_choice.delta.tool_calls:
+                            if tool_call.index not in chat_response_tool_calls:
+                                chat_response_tool_calls[tool_call.index] = OpenAIChatCompletionToolCall(
+                                    **tool_call.model_dump()
+                                )
+                            chat_response_tool_calls[tool_call.index].function.arguments = (
+                                chat_response_tool_calls[tool_call.index].function.arguments
+                                + tool_call.function.arguments
+                            )
+
+            if chat_response_tool_calls:
+                tool_calls = [chat_response_tool_calls[i] for i in sorted(chat_response_tool_calls.keys())]
+            else:
+                tool_calls = None
+            assistant_message = OpenAIAssistantMessageParam(
+                content="".join(chat_response_content),
+                tool_calls=tool_calls,
+            )
             chat_response = OpenAIChatCompletion(
                 id=chat_response_id,
                 choices=[