From d47f2c0ba85006e8ae382beff8c854193d62fc00 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Tue, 14 Oct 2025 14:23:22 -0700 Subject: [PATCH] feat(responses)!: improve responses + conversations implementations This PR updates the Conversation item related types and improves a couple critical parts of the implemenation: - it creates a streaming output item for the final assistant message output by the model. until now we only added content parts and included that message in the final response. - rewrites the conversation update code completely to account for items other than messages (tool calls, outputs, etc.) --- docs/static/deprecated-llama-stack-spec.html | 6 + docs/static/deprecated-llama-stack-spec.yaml | 2 + docs/static/llama-stack-spec.html | 210 ++++++++++-------- docs/static/llama-stack-spec.yaml | 160 ++++++------- docs/static/stainless-llama-stack-spec.html | 210 ++++++++++-------- docs/static/stainless-llama-stack-spec.yaml | 160 ++++++------- llama_stack/apis/agents/openai_responses.py | 6 +- .../apis/conversations/conversations.py | 12 +- .../core/conversations/conversations.py | 6 + .../responses/openai_responses.py | 134 ++++------- .../meta_reference/responses/streaming.py | 46 ++++ 11 files changed, 511 insertions(+), 441 deletions(-) diff --git a/docs/static/deprecated-llama-stack-spec.html b/docs/static/deprecated-llama-stack-spec.html index 0fa8c9adc..59a663543 100644 --- a/docs/static/deprecated-llama-stack-spec.html +++ b/docs/static/deprecated-llama-stack-spec.html @@ -8523,6 +8523,12 @@ { "$ref": "#/components/schemas/OpenAIResponseMCPApprovalResponse" }, + { + "$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPCall" + }, + { + "$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools" + }, { "$ref": "#/components/schemas/OpenAIResponseMessage" } diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml index ef388e5d1..b3a68e1ba 100644 --- a/docs/static/deprecated-llama-stack-spec.yaml +++ b/docs/static/deprecated-llama-stack-spec.yaml @@ -6358,6 +6358,8 @@ components: - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' - $ref: '#/components/schemas/OpenAIResponseMessage' "OpenAIResponseInputFunctionToolCallOutput": type: object diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html index 9887a5778..12e71d05d 100644 --- a/docs/static/llama-stack-spec.html +++ b/docs/static/llama-stack-spec.html @@ -5479,13 +5479,22 @@ "$ref": "#/components/schemas/OpenAIResponseMessage" }, { - "$ref": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall" + "$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall" }, { "$ref": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall" }, { - "$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall" + "$ref": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputFunctionToolCallOutput" + }, + { + "$ref": "#/components/schemas/OpenAIResponseMCPApprovalRequest" + }, + { + "$ref": "#/components/schemas/OpenAIResponseMCPApprovalResponse" }, { "$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPCall" @@ -5498,9 +5507,12 @@ "propertyName": "type", "mapping": { "message": "#/components/schemas/OpenAIResponseMessage", - "function_call": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall", - "file_search_call": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall", "web_search_call": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall", + "file_search_call": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall", + "function_call": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall", + "function_call_output": "#/components/schemas/OpenAIResponseInputFunctionToolCallOutput", + "mcp_approval_request": "#/components/schemas/OpenAIResponseMCPApprovalRequest", + "mcp_approval_response": "#/components/schemas/OpenAIResponseMCPApprovalResponse", "mcp_call": "#/components/schemas/OpenAIResponseOutputMessageMCPCall", "mcp_list_tools": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools" } @@ -5658,6 +5670,36 @@ } } }, + "OpenAIResponseInputFunctionToolCallOutput": { + "type": "object", + "properties": { + "call_id": { + "type": "string" + }, + "output": { + "type": "string" + }, + "type": { + "type": "string", + "const": "function_call_output", + "default": "function_call_output" + }, + "id": { + "type": "string" + }, + "status": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "call_id", + "output", + "type" + ], + "title": "OpenAIResponseInputFunctionToolCallOutput", + "description": "This represents the output of a function call that gets passed back to the model." + }, "OpenAIResponseInputMessageContent": { "oneOf": [ { @@ -5737,6 +5779,68 @@ "title": "OpenAIResponseInputMessageContentText", "description": "Text content for input messages in OpenAI response format." }, + "OpenAIResponseMCPApprovalRequest": { + "type": "object", + "properties": { + "arguments": { + "type": "string" + }, + "id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "server_label": { + "type": "string" + }, + "type": { + "type": "string", + "const": "mcp_approval_request", + "default": "mcp_approval_request" + } + }, + "additionalProperties": false, + "required": [ + "arguments", + "id", + "name", + "server_label", + "type" + ], + "title": "OpenAIResponseMCPApprovalRequest", + "description": "A request for human approval of a tool invocation." + }, + "OpenAIResponseMCPApprovalResponse": { + "type": "object", + "properties": { + "approval_request_id": { + "type": "string" + }, + "approve": { + "type": "boolean" + }, + "type": { + "type": "string", + "const": "mcp_approval_response", + "default": "mcp_approval_response" + }, + "id": { + "type": "string" + }, + "reason": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "approval_request_id", + "approve", + "type" + ], + "title": "OpenAIResponseMCPApprovalResponse", + "description": "A response to an MCP approval request." + }, "OpenAIResponseMessage": { "type": "object", "properties": { @@ -7212,41 +7316,17 @@ { "$ref": "#/components/schemas/OpenAIResponseMCPApprovalResponse" }, + { + "$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPCall" + }, + { + "$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools" + }, { "$ref": "#/components/schemas/OpenAIResponseMessage" } ] }, - "OpenAIResponseInputFunctionToolCallOutput": { - "type": "object", - "properties": { - "call_id": { - "type": "string" - }, - "output": { - "type": "string" - }, - "type": { - "type": "string", - "const": "function_call_output", - "default": "function_call_output" - }, - "id": { - "type": "string" - }, - "status": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "call_id", - "output", - "type" - ], - "title": "OpenAIResponseInputFunctionToolCallOutput", - "description": "This represents the output of a function call that gets passed back to the model." - }, "OpenAIResponseInputToolFileSearch": { "type": "object", "properties": { @@ -7409,68 +7489,6 @@ "title": "OpenAIResponseInputToolWebSearch", "description": "Web search tool configuration for OpenAI response inputs." }, - "OpenAIResponseMCPApprovalRequest": { - "type": "object", - "properties": { - "arguments": { - "type": "string" - }, - "id": { - "type": "string" - }, - "name": { - "type": "string" - }, - "server_label": { - "type": "string" - }, - "type": { - "type": "string", - "const": "mcp_approval_request", - "default": "mcp_approval_request" - } - }, - "additionalProperties": false, - "required": [ - "arguments", - "id", - "name", - "server_label", - "type" - ], - "title": "OpenAIResponseMCPApprovalRequest", - "description": "A request for human approval of a tool invocation." - }, - "OpenAIResponseMCPApprovalResponse": { - "type": "object", - "properties": { - "approval_request_id": { - "type": "string" - }, - "approve": { - "type": "boolean" - }, - "type": { - "type": "string", - "const": "mcp_approval_response", - "default": "mcp_approval_response" - }, - "id": { - "type": "string" - }, - "reason": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "approval_request_id", - "approve", - "type" - ], - "title": "OpenAIResponseMCPApprovalResponse", - "description": "A response to an MCP approval request." - }, "OpenAIResponseObjectWithInput": { "type": "object", "properties": { diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml index 98e5f9cbb..b589729bf 100644 --- a/docs/static/llama-stack-spec.yaml +++ b/docs/static/llama-stack-spec.yaml @@ -4153,18 +4153,24 @@ components: ConversationItem: oneOf: - $ref: '#/components/schemas/OpenAIResponseMessage' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' discriminator: propertyName: type mapping: message: '#/components/schemas/OpenAIResponseMessage' - function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' - file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse' mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' OpenAIResponseAnnotationCitation: @@ -4285,6 +4291,31 @@ components: url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation' container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath' + "OpenAIResponseInputFunctionToolCallOutput": + type: object + properties: + call_id: + type: string + output: + type: string + type: + type: string + const: function_call_output + default: function_call_output + id: + type: string + status: + type: string + additionalProperties: false + required: + - call_id + - output + - type + title: >- + OpenAIResponseInputFunctionToolCallOutput + description: >- + This represents the output of a function call that gets passed back to the + model. OpenAIResponseInputMessageContent: oneOf: - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText' @@ -4343,6 +4374,53 @@ components: title: OpenAIResponseInputMessageContentText description: >- Text content for input messages in OpenAI response format. + OpenAIResponseMCPApprovalRequest: + type: object + properties: + arguments: + type: string + id: + type: string + name: + type: string + server_label: + type: string + type: + type: string + const: mcp_approval_request + default: mcp_approval_request + additionalProperties: false + required: + - arguments + - id + - name + - server_label + - type + title: OpenAIResponseMCPApprovalRequest + description: >- + A request for human approval of a tool invocation. + OpenAIResponseMCPApprovalResponse: + type: object + properties: + approval_request_id: + type: string + approve: + type: boolean + type: + type: string + const: mcp_approval_response + default: mcp_approval_response + id: + type: string + reason: + type: string + additionalProperties: false + required: + - approval_request_id + - approve + - type + title: OpenAIResponseMCPApprovalResponse + description: A response to an MCP approval request. OpenAIResponseMessage: type: object properties: @@ -5500,32 +5578,9 @@ components: - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' - $ref: '#/components/schemas/OpenAIResponseMessage' - "OpenAIResponseInputFunctionToolCallOutput": - type: object - properties: - call_id: - type: string - output: - type: string - type: - type: string - const: function_call_output - default: function_call_output - id: - type: string - status: - type: string - additionalProperties: false - required: - - call_id - - output - - type - title: >- - OpenAIResponseInputFunctionToolCallOutput - description: >- - This represents the output of a function call that gets passed back to the - model. OpenAIResponseInputToolFileSearch: type: object properties: @@ -5642,53 +5697,6 @@ components: title: OpenAIResponseInputToolWebSearch description: >- Web search tool configuration for OpenAI response inputs. - OpenAIResponseMCPApprovalRequest: - type: object - properties: - arguments: - type: string - id: - type: string - name: - type: string - server_label: - type: string - type: - type: string - const: mcp_approval_request - default: mcp_approval_request - additionalProperties: false - required: - - arguments - - id - - name - - server_label - - type - title: OpenAIResponseMCPApprovalRequest - description: >- - A request for human approval of a tool invocation. - OpenAIResponseMCPApprovalResponse: - type: object - properties: - approval_request_id: - type: string - approve: - type: boolean - type: - type: string - const: mcp_approval_response - default: mcp_approval_response - id: - type: string - reason: - type: string - additionalProperties: false - required: - - approval_request_id - - approve - - type - title: OpenAIResponseMCPApprovalResponse - description: A response to an MCP approval request. OpenAIResponseObjectWithInput: type: object properties: diff --git a/docs/static/stainless-llama-stack-spec.html b/docs/static/stainless-llama-stack-spec.html index 932d9fd96..0562ac235 100644 --- a/docs/static/stainless-llama-stack-spec.html +++ b/docs/static/stainless-llama-stack-spec.html @@ -7151,13 +7151,22 @@ "$ref": "#/components/schemas/OpenAIResponseMessage" }, { - "$ref": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall" + "$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall" }, { "$ref": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall" }, { - "$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall" + "$ref": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall" + }, + { + "$ref": "#/components/schemas/OpenAIResponseInputFunctionToolCallOutput" + }, + { + "$ref": "#/components/schemas/OpenAIResponseMCPApprovalRequest" + }, + { + "$ref": "#/components/schemas/OpenAIResponseMCPApprovalResponse" }, { "$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPCall" @@ -7170,9 +7179,12 @@ "propertyName": "type", "mapping": { "message": "#/components/schemas/OpenAIResponseMessage", - "function_call": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall", - "file_search_call": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall", "web_search_call": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall", + "file_search_call": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall", + "function_call": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall", + "function_call_output": "#/components/schemas/OpenAIResponseInputFunctionToolCallOutput", + "mcp_approval_request": "#/components/schemas/OpenAIResponseMCPApprovalRequest", + "mcp_approval_response": "#/components/schemas/OpenAIResponseMCPApprovalResponse", "mcp_call": "#/components/schemas/OpenAIResponseOutputMessageMCPCall", "mcp_list_tools": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools" } @@ -7330,6 +7342,36 @@ } } }, + "OpenAIResponseInputFunctionToolCallOutput": { + "type": "object", + "properties": { + "call_id": { + "type": "string" + }, + "output": { + "type": "string" + }, + "type": { + "type": "string", + "const": "function_call_output", + "default": "function_call_output" + }, + "id": { + "type": "string" + }, + "status": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "call_id", + "output", + "type" + ], + "title": "OpenAIResponseInputFunctionToolCallOutput", + "description": "This represents the output of a function call that gets passed back to the model." + }, "OpenAIResponseInputMessageContent": { "oneOf": [ { @@ -7409,6 +7451,68 @@ "title": "OpenAIResponseInputMessageContentText", "description": "Text content for input messages in OpenAI response format." }, + "OpenAIResponseMCPApprovalRequest": { + "type": "object", + "properties": { + "arguments": { + "type": "string" + }, + "id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "server_label": { + "type": "string" + }, + "type": { + "type": "string", + "const": "mcp_approval_request", + "default": "mcp_approval_request" + } + }, + "additionalProperties": false, + "required": [ + "arguments", + "id", + "name", + "server_label", + "type" + ], + "title": "OpenAIResponseMCPApprovalRequest", + "description": "A request for human approval of a tool invocation." + }, + "OpenAIResponseMCPApprovalResponse": { + "type": "object", + "properties": { + "approval_request_id": { + "type": "string" + }, + "approve": { + "type": "boolean" + }, + "type": { + "type": "string", + "const": "mcp_approval_response", + "default": "mcp_approval_response" + }, + "id": { + "type": "string" + }, + "reason": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "approval_request_id", + "approve", + "type" + ], + "title": "OpenAIResponseMCPApprovalResponse", + "description": "A response to an MCP approval request." + }, "OpenAIResponseMessage": { "type": "object", "properties": { @@ -8884,41 +8988,17 @@ { "$ref": "#/components/schemas/OpenAIResponseMCPApprovalResponse" }, + { + "$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPCall" + }, + { + "$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools" + }, { "$ref": "#/components/schemas/OpenAIResponseMessage" } ] }, - "OpenAIResponseInputFunctionToolCallOutput": { - "type": "object", - "properties": { - "call_id": { - "type": "string" - }, - "output": { - "type": "string" - }, - "type": { - "type": "string", - "const": "function_call_output", - "default": "function_call_output" - }, - "id": { - "type": "string" - }, - "status": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "call_id", - "output", - "type" - ], - "title": "OpenAIResponseInputFunctionToolCallOutput", - "description": "This represents the output of a function call that gets passed back to the model." - }, "OpenAIResponseInputToolFileSearch": { "type": "object", "properties": { @@ -9081,68 +9161,6 @@ "title": "OpenAIResponseInputToolWebSearch", "description": "Web search tool configuration for OpenAI response inputs." }, - "OpenAIResponseMCPApprovalRequest": { - "type": "object", - "properties": { - "arguments": { - "type": "string" - }, - "id": { - "type": "string" - }, - "name": { - "type": "string" - }, - "server_label": { - "type": "string" - }, - "type": { - "type": "string", - "const": "mcp_approval_request", - "default": "mcp_approval_request" - } - }, - "additionalProperties": false, - "required": [ - "arguments", - "id", - "name", - "server_label", - "type" - ], - "title": "OpenAIResponseMCPApprovalRequest", - "description": "A request for human approval of a tool invocation." - }, - "OpenAIResponseMCPApprovalResponse": { - "type": "object", - "properties": { - "approval_request_id": { - "type": "string" - }, - "approve": { - "type": "boolean" - }, - "type": { - "type": "string", - "const": "mcp_approval_response", - "default": "mcp_approval_response" - }, - "id": { - "type": "string" - }, - "reason": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "approval_request_id", - "approve", - "type" - ], - "title": "OpenAIResponseMCPApprovalResponse", - "description": "A response to an MCP approval request." - }, "OpenAIResponseObjectWithInput": { "type": "object", "properties": { diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml index b28b3d8f7..e7819c47a 100644 --- a/docs/static/stainless-llama-stack-spec.yaml +++ b/docs/static/stainless-llama-stack-spec.yaml @@ -5366,18 +5366,24 @@ components: ConversationItem: oneOf: - $ref: '#/components/schemas/OpenAIResponseMessage' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' - - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' discriminator: propertyName: type mapping: message: '#/components/schemas/OpenAIResponseMessage' - function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' - file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' + file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall' + function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall' + function_call_output: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' + mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' + mcp_approval_response: '#/components/schemas/OpenAIResponseMCPApprovalResponse' mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' OpenAIResponseAnnotationCitation: @@ -5498,6 +5504,31 @@ components: url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation' container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation' file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath' + "OpenAIResponseInputFunctionToolCallOutput": + type: object + properties: + call_id: + type: string + output: + type: string + type: + type: string + const: function_call_output + default: function_call_output + id: + type: string + status: + type: string + additionalProperties: false + required: + - call_id + - output + - type + title: >- + OpenAIResponseInputFunctionToolCallOutput + description: >- + This represents the output of a function call that gets passed back to the + model. OpenAIResponseInputMessageContent: oneOf: - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText' @@ -5556,6 +5587,53 @@ components: title: OpenAIResponseInputMessageContentText description: >- Text content for input messages in OpenAI response format. + OpenAIResponseMCPApprovalRequest: + type: object + properties: + arguments: + type: string + id: + type: string + name: + type: string + server_label: + type: string + type: + type: string + const: mcp_approval_request + default: mcp_approval_request + additionalProperties: false + required: + - arguments + - id + - name + - server_label + - type + title: OpenAIResponseMCPApprovalRequest + description: >- + A request for human approval of a tool invocation. + OpenAIResponseMCPApprovalResponse: + type: object + properties: + approval_request_id: + type: string + approve: + type: boolean + type: + type: string + const: mcp_approval_response + default: mcp_approval_response + id: + type: string + reason: + type: string + additionalProperties: false + required: + - approval_request_id + - approve + - type + title: OpenAIResponseMCPApprovalResponse + description: A response to an MCP approval request. OpenAIResponseMessage: type: object properties: @@ -6713,32 +6791,9 @@ components: - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest' - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' + - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' - $ref: '#/components/schemas/OpenAIResponseMessage' - "OpenAIResponseInputFunctionToolCallOutput": - type: object - properties: - call_id: - type: string - output: - type: string - type: - type: string - const: function_call_output - default: function_call_output - id: - type: string - status: - type: string - additionalProperties: false - required: - - call_id - - output - - type - title: >- - OpenAIResponseInputFunctionToolCallOutput - description: >- - This represents the output of a function call that gets passed back to the - model. OpenAIResponseInputToolFileSearch: type: object properties: @@ -6855,53 +6910,6 @@ components: title: OpenAIResponseInputToolWebSearch description: >- Web search tool configuration for OpenAI response inputs. - OpenAIResponseMCPApprovalRequest: - type: object - properties: - arguments: - type: string - id: - type: string - name: - type: string - server_label: - type: string - type: - type: string - const: mcp_approval_request - default: mcp_approval_request - additionalProperties: false - required: - - arguments - - id - - name - - server_label - - type - title: OpenAIResponseMCPApprovalRequest - description: >- - A request for human approval of a tool invocation. - OpenAIResponseMCPApprovalResponse: - type: object - properties: - approval_request_id: - type: string - approve: - type: boolean - type: - type: string - const: mcp_approval_response - default: mcp_approval_response - id: - type: string - reason: - type: string - additionalProperties: false - required: - - approval_request_id - - approve - - type - title: OpenAIResponseMCPApprovalResponse - description: A response to an MCP approval request. OpenAIResponseObjectWithInput: type: object properties: diff --git a/llama_stack/apis/agents/openai_responses.py b/llama_stack/apis/agents/openai_responses.py index 18176f00f..46f73db48 100644 --- a/llama_stack/apis/agents/openai_responses.py +++ b/llama_stack/apis/agents/openai_responses.py @@ -1258,9 +1258,9 @@ OpenAIResponseInput = Annotated[ | OpenAIResponseInputFunctionToolCallOutput | OpenAIResponseMCPApprovalRequest | OpenAIResponseMCPApprovalResponse - | - # Fallback to the generic message type as a last resort - OpenAIResponseMessage, + | OpenAIResponseOutputMessageMCPCall + | OpenAIResponseOutputMessageMCPListTools + | OpenAIResponseMessage, Field(union_mode="left_to_right"), ] register_schema(OpenAIResponseInput, name="OpenAIResponseInput") diff --git a/llama_stack/apis/conversations/conversations.py b/llama_stack/apis/conversations/conversations.py index 58ae9c35a..3fa51f0fb 100644 --- a/llama_stack/apis/conversations/conversations.py +++ b/llama_stack/apis/conversations/conversations.py @@ -12,6 +12,9 @@ from openai.types.responses.response_includable import ResponseIncludable from pydantic import BaseModel, Field from llama_stack.apis.agents.openai_responses import ( + OpenAIResponseInputFunctionToolCallOutput, + OpenAIResponseMCPApprovalRequest, + OpenAIResponseMCPApprovalResponse, OpenAIResponseMessage, OpenAIResponseOutputMessageFileSearchToolCall, OpenAIResponseOutputMessageFunctionToolCall, @@ -61,9 +64,14 @@ class ConversationMessage(BaseModel): ConversationItem = Annotated[ OpenAIResponseMessage - | OpenAIResponseOutputMessageFunctionToolCall - | OpenAIResponseOutputMessageFileSearchToolCall | OpenAIResponseOutputMessageWebSearchToolCall + | OpenAIResponseOutputMessageFileSearchToolCall + | OpenAIResponseOutputMessageFunctionToolCall + | OpenAIResponseInputFunctionToolCallOutput + | OpenAIResponseMCPApprovalRequest + | OpenAIResponseMCPApprovalResponse + | OpenAIResponseOutputMessageMCPCall + | OpenAIResponseOutputMessageMCPListTools | OpenAIResponseOutputMessageMCPCall | OpenAIResponseOutputMessageMCPListTools, Field(discriminator="type"), diff --git a/llama_stack/core/conversations/conversations.py b/llama_stack/core/conversations/conversations.py index 04441054d..043d1e0da 100644 --- a/llama_stack/core/conversations/conversations.py +++ b/llama_stack/core/conversations/conversations.py @@ -255,6 +255,12 @@ class ConversationServiceImpl(Conversations): async def list(self, conversation_id: str, after=NOT_GIVEN, include=NOT_GIVEN, limit=NOT_GIVEN, order=NOT_GIVEN): """List items in the conversation.""" + if not conversation_id: + raise ValueError(f"Expected a non-empty value for `conversation_id` but received {conversation_id!r}") + + # check if conversation exists + await self.get_conversation(conversation_id) + result = await self.sql_store.fetch_all(table="conversation_items", where={"conversation_id": conversation_id}) records = result.data diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py index e459b0232..3f8da5d2a 100644 --- a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +++ b/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py @@ -100,6 +100,7 @@ class OpenAIResponsesImpl: input: str | list[OpenAIResponseInput], tools: list[OpenAIResponseInputTool] | None, previous_response_id: str | None, + conversation: str | None, ) -> tuple[str | list[OpenAIResponseInput], list[OpenAIMessageParam]]: """Process input with optional previous response context. @@ -125,15 +126,20 @@ class OpenAIResponsesImpl: tool_context.recover_tools_from_previous_response(previous_response) else: - all_input = input - messages = await convert_response_input_to_chat_messages(input) + if conversation is not None: + conversation_items = await self.conversations_api.list(conversation, order="asc") + all_input = conversation_items.data + if isinstance(input, str): + all_input.append(OpenAIResponseMessage(role="user", content=input)) + elif isinstance(input, list): + all_input.extend(input) + else: + all_input = input + + messages = await convert_response_input_to_chat_messages(all_input) return all_input, messages, tool_context - async def _prepend_instructions(self, messages, instructions): - if instructions: - messages.insert(0, OpenAISystemMessageParam(content=instructions)) - async def get_openai_response( self, response_id: str, @@ -229,27 +235,21 @@ class OpenAIResponsesImpl: if shields is not None: raise NotImplementedError("Shields parameter is not yet implemented in the meta-reference provider") - if conversation is not None and previous_response_id is not None: - raise ValueError( - "Mutually exclusive parameters: 'previous_response_id' and 'conversation'. Ensure you are only providing one of these parameters." - ) - - original_input = input # needed for syncing to Conversations if conversation is not None: + if previous_response_id is not None: + raise ValueError( + "Mutually exclusive parameters: 'previous_response_id' and 'conversation'. Ensure you are only providing one of these parameters." + ) + if not conversation.startswith("conv_"): raise InvalidConversationIdError(conversation) - # Check conversation exists (raises ConversationNotFoundError if not) - _ = await self.conversations_api.get_conversation(conversation) - input = await self._load_conversation_context(conversation, input) - stream_gen = self._create_streaming_response( input=input, - original_input=original_input, + conversation=conversation, model=model, instructions=instructions, previous_response_id=previous_response_id, - conversation=conversation, store=store, temperature=temperature, text=text, @@ -292,7 +292,6 @@ class OpenAIResponsesImpl: self, input: str | list[OpenAIResponseInput], model: str, - original_input: str | list[OpenAIResponseInput] | None = None, instructions: str | None = None, previous_response_id: str | None = None, conversation: str | None = None, @@ -304,9 +303,11 @@ class OpenAIResponsesImpl: ) -> AsyncIterator[OpenAIResponseObjectStream]: # Input preprocessing all_input, messages, tool_context = await self._process_input_with_previous_response( - input, tools, previous_response_id + input, tools, previous_response_id, conversation ) - await self._prepend_instructions(messages, instructions) + + if instructions: + messages.insert(0, OpenAISystemMessageParam(content=instructions)) # Structured outputs response_format = await convert_response_text_to_chat_response_format(text) @@ -338,6 +339,8 @@ class OpenAIResponsesImpl: # Stream the response final_response = None failed_response = None + + output_items = [] async for stream_chunk in orchestrator.create_response(): if stream_chunk.type in {"response.completed", "response.incomplete"}: final_response = stream_chunk.response @@ -345,6 +348,10 @@ class OpenAIResponsesImpl: failed_response = stream_chunk.response yield stream_chunk + if stream_chunk.type == "response.output_item.done": + item = stream_chunk.item + output_items.append(item) + # Store and sync immediately after yielding terminal events # This ensures the storage/syncing happens even if the consumer breaks early if ( @@ -353,6 +360,7 @@ class OpenAIResponsesImpl: and final_response and failed_response is None ): + # TODO: we really should work off of output_items instead of "final_messages" await self._store_response( response=final_response, input=all_input, @@ -360,87 +368,29 @@ class OpenAIResponsesImpl: ) if stream_chunk.type in {"response.completed", "response.incomplete"} and conversation and final_response: - # for Conversations, we need to use the original_input if it's available, otherwise use input - sync_input = original_input if original_input is not None else input - await self._sync_response_to_conversation(conversation, sync_input, final_response) + # if we are starting the conversation, you will have "input" available + # we need to persist that. all else would be recorded via output_items + await self._sync_response_to_conversation(conversation, input, output_items) async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject: return await self.responses_store.delete_response_object(response_id) - async def _load_conversation_context( - self, conversation_id: str, content: str | list[OpenAIResponseInput] - ) -> list[OpenAIResponseInput]: - """Load conversation history and merge with provided content.""" - conversation_items = await self.conversations_api.list(conversation_id, order="asc") - - context_messages = [] - for item in conversation_items.data: - if isinstance(item, OpenAIResponseMessage): - if item.role == "user": - context_messages.append( - OpenAIResponseMessage( - role="user", content=item.content, id=item.id if hasattr(item, "id") else None - ) - ) - elif item.role == "assistant": - context_messages.append( - OpenAIResponseMessage( - role="assistant", content=item.content, id=item.id if hasattr(item, "id") else None - ) - ) - - # add new content to context - if isinstance(content, str): - context_messages.append(OpenAIResponseMessage(role="user", content=content)) - elif isinstance(content, list): - context_messages.extend(content) - - return context_messages - async def _sync_response_to_conversation( - self, conversation_id: str, content: str | list[OpenAIResponseInput], response: OpenAIResponseObject + self, conversation_id: str, input: str | list[OpenAIResponseInput] | None, output_items: list[ConversationItem] ) -> None: """Sync content and response messages to the conversation.""" conversation_items = [] - # add user content message(s) - if isinstance(content, str): + # add user content message(s) -- fix this, it is somewhat hacky and feels non comprehensive + if isinstance(input, str): conversation_items.append( - {"type": "message", "role": "user", "content": [{"type": "input_text", "text": content}]} + OpenAIResponseMessage(role="user", content=[OpenAIResponseInputMessageContentText(text=input)]) ) - elif isinstance(content, list): - for item in content: - if not isinstance(item, OpenAIResponseMessage): - raise NotImplementedError(f"Unsupported input item type: {type(item)}") + elif isinstance(input, list): + conversation_items.extend(input) - if item.role == "user": - if isinstance(item.content, str): - conversation_items.append( - { - "type": "message", - "role": "user", - "content": [{"type": "input_text", "text": item.content}], - } - ) - elif isinstance(item.content, list): - conversation_items.append({"type": "message", "role": "user", "content": item.content}) - else: - raise NotImplementedError(f"Unsupported user message content type: {type(item.content)}") - elif item.role == "assistant": - if isinstance(item.content, list): - conversation_items.append({"type": "message", "role": "assistant", "content": item.content}) - else: - raise NotImplementedError(f"Unsupported assistant message content type: {type(item.content)}") - else: - raise NotImplementedError(f"Unsupported message role: {item.role}") + conversation_items.extend(output_items) - # add assistant response message - for output_item in response.output: - if isinstance(output_item, OpenAIResponseMessage) and output_item.role == "assistant": - if hasattr(output_item, "content") and isinstance(output_item.content, list): - conversation_items.append({"type": "message", "role": "assistant", "content": output_item.content}) - - if conversation_items: - adapter = TypeAdapter(list[ConversationItem]) - validated_items = adapter.validate_python(conversation_items) - await self.conversations_api.add_items(conversation_id, validated_items) + adapter = TypeAdapter(list[ConversationItem]) + validated_items = adapter.validate_python(conversation_items) + await self.conversations_api.add_items(conversation_id, validated_items) diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py index 6582e459a..cfdd09baa 100644 --- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py @@ -19,6 +19,7 @@ from llama_stack.apis.agents.openai_responses import ( OpenAIResponseInputTool, OpenAIResponseInputToolMCP, OpenAIResponseMCPApprovalRequest, + OpenAIResponseMessage, OpenAIResponseObject, OpenAIResponseObjectStream, OpenAIResponseObjectStreamResponseCompleted, @@ -42,6 +43,7 @@ from llama_stack.apis.agents.openai_responses import ( OpenAIResponseObjectStreamResponseRefusalDelta, OpenAIResponseObjectStreamResponseRefusalDone, OpenAIResponseOutput, + OpenAIResponseOutputMessageContentOutputText, OpenAIResponseOutputMessageFunctionToolCall, OpenAIResponseOutputMessageMCPListTools, OpenAIResponseText, @@ -500,6 +502,7 @@ class StreamingResponseOrchestrator: # Track tool call items for streaming events tool_call_item_ids: dict[int, str] = {} # Track content parts for streaming events + message_item_added_emitted = False content_part_emitted = False reasoning_part_emitted = False refusal_part_emitted = False @@ -521,6 +524,23 @@ class StreamingResponseOrchestrator: for chunk_choice in chunk.choices: # Emit incremental text content as delta events if chunk_choice.delta.content: + # Emit output_item.added for the message on first content + if not message_item_added_emitted: + message_item_added_emitted = True + self.sequence_number += 1 + message_item = OpenAIResponseMessage( + id=message_item_id, + content=[], + role="assistant", + status="in_progress", + ) + yield OpenAIResponseObjectStreamResponseOutputItemAdded( + response_id=self.response_id, + item=message_item, + output_index=message_output_index, + sequence_number=self.sequence_number, + ) + # Emit content_part.added event for first text chunk if not content_part_emitted: content_part_emitted = True @@ -700,6 +720,32 @@ class StreamingResponseOrchestrator: if chat_response_tool_calls: chat_response_content = [] + # Emit output_item.done for message when we have content and no tool calls + if message_item_added_emitted and not chat_response_tool_calls: + content_parts = [] + if content_part_emitted: + final_text = "".join(chat_response_content) + content_parts.append( + OpenAIResponseOutputMessageContentOutputText( + text=final_text, + annotations=[], + ) + ) + + self.sequence_number += 1 + message_item = OpenAIResponseMessage( + id=message_item_id, + content=content_parts, + role="assistant", + status="completed", + ) + yield OpenAIResponseObjectStreamResponseOutputItemDone( + response_id=self.response_id, + item=message_item, + output_index=message_output_index, + sequence_number=self.sequence_number, + ) + yield ChatCompletionResult( response_id=chat_response_id, content=chat_response_content,