From ed69c1b3cc482e60c7011614b14d486c17334f73 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Tue, 3 Jun 2025 15:48:41 -0700 Subject: [PATCH] feat(responses): add more streaming response types (#2375) --- docs/_static/llama-stack-spec.html | 488 ++++++++++++++++++ docs/_static/llama-stack-spec.yaml | 362 +++++++++++++ llama_stack/apis/agents/openai_responses.py | 148 +++++- .../agents/meta_reference/openai_responses.py | 19 +- 4 files changed, 1003 insertions(+), 14 deletions(-) diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index 6b858eecf..775eb93b3 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -7593,9 +7593,57 @@ { "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseCreated" }, + { + "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded" + }, + { + "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone" + }, { "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta" }, + { + "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone" + }, + { + "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta" + }, + { + "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone" + }, + { + "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress" + }, + { + "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching" + }, + { + "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted" + }, + { + "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress" + }, + { + "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed" + }, + { + "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted" + }, + { + "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta" + }, + { + "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone" + }, + { + "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress" + }, + { + "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed" + }, + { + "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted" + }, { "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseCompleted" } @@ -7604,7 +7652,23 @@ "propertyName": "type", "mapping": { "response.created": "#/components/schemas/OpenAIResponseObjectStreamResponseCreated", + "response.output_item.added": "#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded", + "response.output_item.done": "#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone", "response.output_text.delta": "#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta", + "response.output_text.done": "#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone", + "response.function_call_arguments.delta": "#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta", + "response.function_call_arguments.done": "#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone", + "response.web_search_call.in_progress": "#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress", + "response.web_search_call.searching": "#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching", + "response.web_search_call.completed": "#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted", + "response.mcp_list_tools.in_progress": "#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress", + "response.mcp_list_tools.failed": "#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed", + "response.mcp_list_tools.completed": "#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted", + "response.mcp_call.arguments.delta": "#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta", + "response.mcp_call.arguments.done": "#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone", + "response.mcp_call.in_progress": "#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress", + "response.mcp_call.failed": "#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed", + "response.mcp_call.completed": "#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted", "response.completed": "#/components/schemas/OpenAIResponseObjectStreamResponseCompleted" } } @@ -7647,6 +7711,314 @@ ], "title": "OpenAIResponseObjectStreamResponseCreated" }, + "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta": { + "type": "object", + "properties": { + "delta": { + "type": "string" + }, + "item_id": { + "type": "string" + }, + "output_index": { + "type": "integer" + }, + "sequence_number": { + "type": "integer" + }, + "type": { + "type": "string", + "const": "response.function_call_arguments.delta", + "default": "response.function_call_arguments.delta" + } + }, + "additionalProperties": false, + "required": [ + "delta", + "item_id", + "output_index", + "sequence_number", + "type" + ], + "title": "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta" + }, + "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone": { + "type": "object", + "properties": { + "arguments": { + "type": "string" + }, + "item_id": { + "type": "string" + }, + "output_index": { + "type": "integer" + }, + "sequence_number": { + "type": "integer" + }, + "type": { + "type": "string", + "const": "response.function_call_arguments.done", + "default": "response.function_call_arguments.done" + } + }, + "additionalProperties": false, + "required": [ + "arguments", + "item_id", + "output_index", + "sequence_number", + "type" + ], + "title": "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone" + }, + "OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta": { + "type": "object", + "properties": { + "delta": { + "type": "string" + }, + "item_id": { + "type": "string" + }, + "output_index": { + "type": "integer" + }, + "sequence_number": { + "type": "integer" + }, + "type": { + "type": "string", + "const": "response.mcp_call.arguments.delta", + "default": "response.mcp_call.arguments.delta" + } + }, + "additionalProperties": false, + "required": [ + "delta", + "item_id", + "output_index", + "sequence_number", + "type" + ], + "title": "OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta" + }, + "OpenAIResponseObjectStreamResponseMcpCallArgumentsDone": { + "type": "object", + "properties": { + "arguments": { + "type": "string" + }, + "item_id": { + "type": "string" + }, + "output_index": { + "type": "integer" + }, + "sequence_number": { + "type": "integer" + }, + "type": { + "type": "string", + "const": "response.mcp_call.arguments.done", + "default": "response.mcp_call.arguments.done" + } + }, + "additionalProperties": false, + "required": [ + "arguments", + "item_id", + "output_index", + "sequence_number", + "type" + ], + "title": "OpenAIResponseObjectStreamResponseMcpCallArgumentsDone" + }, + "OpenAIResponseObjectStreamResponseMcpCallCompleted": { + "type": "object", + "properties": { + "sequence_number": { + "type": "integer" + }, + "type": { + "type": "string", + "const": "response.mcp_call.completed", + "default": "response.mcp_call.completed" + } + }, + "additionalProperties": false, + "required": [ + "sequence_number", + "type" + ], + "title": "OpenAIResponseObjectStreamResponseMcpCallCompleted" + }, + "OpenAIResponseObjectStreamResponseMcpCallFailed": { + "type": "object", + "properties": { + "sequence_number": { + "type": "integer" + }, + "type": { + "type": "string", + "const": "response.mcp_call.failed", + "default": "response.mcp_call.failed" + } + }, + "additionalProperties": false, + "required": [ + "sequence_number", + "type" + ], + "title": "OpenAIResponseObjectStreamResponseMcpCallFailed" + }, + "OpenAIResponseObjectStreamResponseMcpCallInProgress": { + "type": "object", + "properties": { + "item_id": { + "type": "string" + }, + "output_index": { + "type": "integer" + }, + "sequence_number": { + "type": "integer" + }, + "type": { + "type": "string", + "const": "response.mcp_call.in_progress", + "default": "response.mcp_call.in_progress" + } + }, + "additionalProperties": false, + "required": [ + "item_id", + "output_index", + "sequence_number", + "type" + ], + "title": "OpenAIResponseObjectStreamResponseMcpCallInProgress" + }, + "OpenAIResponseObjectStreamResponseMcpListToolsCompleted": { + "type": "object", + "properties": { + "sequence_number": { + "type": "integer" + }, + "type": { + "type": "string", + "const": "response.mcp_list_tools.completed", + "default": "response.mcp_list_tools.completed" + } + }, + "additionalProperties": false, + "required": [ + "sequence_number", + "type" + ], + "title": "OpenAIResponseObjectStreamResponseMcpListToolsCompleted" + }, + "OpenAIResponseObjectStreamResponseMcpListToolsFailed": { + "type": "object", + "properties": { + "sequence_number": { + "type": "integer" + }, + "type": { + "type": "string", + "const": "response.mcp_list_tools.failed", + "default": "response.mcp_list_tools.failed" + } + }, + "additionalProperties": false, + "required": [ + "sequence_number", + "type" + ], + "title": "OpenAIResponseObjectStreamResponseMcpListToolsFailed" + }, + "OpenAIResponseObjectStreamResponseMcpListToolsInProgress": { + "type": "object", + "properties": { + "sequence_number": { + "type": "integer" + }, + "type": { + "type": "string", + "const": "response.mcp_list_tools.in_progress", + "default": "response.mcp_list_tools.in_progress" + } + }, + "additionalProperties": false, + "required": [ + "sequence_number", + "type" + ], + "title": "OpenAIResponseObjectStreamResponseMcpListToolsInProgress" + }, + "OpenAIResponseObjectStreamResponseOutputItemAdded": { + "type": "object", + "properties": { + "response_id": { + "type": "string" + }, + "item": { + "$ref": "#/components/schemas/OpenAIResponseOutput" + }, + "output_index": { + "type": "integer" + }, + "sequence_number": { + "type": "integer" + }, + "type": { + "type": "string", + "const": "response.output_item.added", + "default": "response.output_item.added" + } + }, + "additionalProperties": false, + "required": [ + "response_id", + "item", + "output_index", + "sequence_number", + "type" + ], + "title": "OpenAIResponseObjectStreamResponseOutputItemAdded" + }, + "OpenAIResponseObjectStreamResponseOutputItemDone": { + "type": "object", + "properties": { + "response_id": { + "type": "string" + }, + "item": { + "$ref": "#/components/schemas/OpenAIResponseOutput" + }, + "output_index": { + "type": "integer" + }, + "sequence_number": { + "type": "integer" + }, + "type": { + "type": "string", + "const": "response.output_item.done", + "default": "response.output_item.done" + } + }, + "additionalProperties": false, + "required": [ + "response_id", + "item", + "output_index", + "sequence_number", + "type" + ], + "title": "OpenAIResponseObjectStreamResponseOutputItemDone" + }, "OpenAIResponseObjectStreamResponseOutputTextDelta": { "type": "object", "properties": { @@ -7682,6 +8054,122 @@ ], "title": "OpenAIResponseObjectStreamResponseOutputTextDelta" }, + "OpenAIResponseObjectStreamResponseOutputTextDone": { + "type": "object", + "properties": { + "content_index": { + "type": "integer" + }, + "text": { + "type": "string" + }, + "item_id": { + "type": "string" + }, + "output_index": { + "type": "integer" + }, + "sequence_number": { + "type": "integer" + }, + "type": { + "type": "string", + "const": "response.output_text.done", + "default": "response.output_text.done" + } + }, + "additionalProperties": false, + "required": [ + "content_index", + "text", + "item_id", + "output_index", + "sequence_number", + "type" + ], + "title": "OpenAIResponseObjectStreamResponseOutputTextDone" + }, + "OpenAIResponseObjectStreamResponseWebSearchCallCompleted": { + "type": "object", + "properties": { + "item_id": { + "type": "string" + }, + "output_index": { + "type": "integer" + }, + "sequence_number": { + "type": "integer" + }, + "type": { + "type": "string", + "const": "response.web_search_call.completed", + "default": "response.web_search_call.completed" + } + }, + "additionalProperties": false, + "required": [ + "item_id", + "output_index", + "sequence_number", + "type" + ], + "title": "OpenAIResponseObjectStreamResponseWebSearchCallCompleted" + }, + "OpenAIResponseObjectStreamResponseWebSearchCallInProgress": { + "type": "object", + "properties": { + "item_id": { + "type": "string" + }, + "output_index": { + "type": "integer" + }, + "sequence_number": { + "type": "integer" + }, + "type": { + "type": "string", + "const": "response.web_search_call.in_progress", + "default": "response.web_search_call.in_progress" + } + }, + "additionalProperties": false, + "required": [ + "item_id", + "output_index", + "sequence_number", + "type" + ], + "title": "OpenAIResponseObjectStreamResponseWebSearchCallInProgress" + }, + "OpenAIResponseObjectStreamResponseWebSearchCallSearching": { + "type": "object", + "properties": { + "item_id": { + "type": "string" + }, + "output_index": { + "type": "integer" + }, + "sequence_number": { + "type": "integer" + }, + "type": { + "type": "string", + "const": "response.web_search_call.searching", + "default": "response.web_search_call.searching" + } + }, + "additionalProperties": false, + "required": [ + "item_id", + "output_index", + "sequence_number", + "type" + ], + "title": "OpenAIResponseObjectStreamResponseWebSearchCallSearching" + }, "EmbeddingsRequest": { "type": "object", "properties": { diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index b5172e947..ef6b5d70a 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -5355,13 +5355,45 @@ components: OpenAIResponseObjectStream: oneOf: - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone' - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted' - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted' discriminator: propertyName: type mapping: response.created: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated' + response.output_item.added: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded' + response.output_item.done: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone' response.output_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta' + response.output_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone' + response.function_call_arguments.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta' + response.function_call_arguments.done: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone' + response.web_search_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress' + response.web_search_call.searching: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching' + response.web_search_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted' + response.mcp_list_tools.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress' + response.mcp_list_tools.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed' + response.mcp_list_tools.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted' + response.mcp_call.arguments.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta' + response.mcp_call.arguments.done: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone' + response.mcp_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress' + response.mcp_call.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed' + response.mcp_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted' response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted' "OpenAIResponseObjectStreamResponseCompleted": type: object @@ -5393,6 +5425,246 @@ components: - type title: >- OpenAIResponseObjectStreamResponseCreated + "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta": + type: object + properties: + delta: + type: string + item_id: + type: string + output_index: + type: integer + sequence_number: + type: integer + type: + type: string + const: response.function_call_arguments.delta + default: response.function_call_arguments.delta + additionalProperties: false + required: + - delta + - item_id + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta + "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone": + type: object + properties: + arguments: + type: string + item_id: + type: string + output_index: + type: integer + sequence_number: + type: integer + type: + type: string + const: response.function_call_arguments.done + default: response.function_call_arguments.done + additionalProperties: false + required: + - arguments + - item_id + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone + "OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta": + type: object + properties: + delta: + type: string + item_id: + type: string + output_index: + type: integer + sequence_number: + type: integer + type: + type: string + const: response.mcp_call.arguments.delta + default: response.mcp_call.arguments.delta + additionalProperties: false + required: + - delta + - item_id + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta + "OpenAIResponseObjectStreamResponseMcpCallArgumentsDone": + type: object + properties: + arguments: + type: string + item_id: + type: string + output_index: + type: integer + sequence_number: + type: integer + type: + type: string + const: response.mcp_call.arguments.done + default: response.mcp_call.arguments.done + additionalProperties: false + required: + - arguments + - item_id + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseMcpCallArgumentsDone + "OpenAIResponseObjectStreamResponseMcpCallCompleted": + type: object + properties: + sequence_number: + type: integer + type: + type: string + const: response.mcp_call.completed + default: response.mcp_call.completed + additionalProperties: false + required: + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseMcpCallCompleted + "OpenAIResponseObjectStreamResponseMcpCallFailed": + type: object + properties: + sequence_number: + type: integer + type: + type: string + const: response.mcp_call.failed + default: response.mcp_call.failed + additionalProperties: false + required: + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseMcpCallFailed + "OpenAIResponseObjectStreamResponseMcpCallInProgress": + type: object + properties: + item_id: + type: string + output_index: + type: integer + sequence_number: + type: integer + type: + type: string + const: response.mcp_call.in_progress + default: response.mcp_call.in_progress + additionalProperties: false + required: + - item_id + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseMcpCallInProgress + "OpenAIResponseObjectStreamResponseMcpListToolsCompleted": + type: object + properties: + sequence_number: + type: integer + type: + type: string + const: response.mcp_list_tools.completed + default: response.mcp_list_tools.completed + additionalProperties: false + required: + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseMcpListToolsCompleted + "OpenAIResponseObjectStreamResponseMcpListToolsFailed": + type: object + properties: + sequence_number: + type: integer + type: + type: string + const: response.mcp_list_tools.failed + default: response.mcp_list_tools.failed + additionalProperties: false + required: + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseMcpListToolsFailed + "OpenAIResponseObjectStreamResponseMcpListToolsInProgress": + type: object + properties: + sequence_number: + type: integer + type: + type: string + const: response.mcp_list_tools.in_progress + default: response.mcp_list_tools.in_progress + additionalProperties: false + required: + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseMcpListToolsInProgress + "OpenAIResponseObjectStreamResponseOutputItemAdded": + type: object + properties: + response_id: + type: string + item: + $ref: '#/components/schemas/OpenAIResponseOutput' + output_index: + type: integer + sequence_number: + type: integer + type: + type: string + const: response.output_item.added + default: response.output_item.added + additionalProperties: false + required: + - response_id + - item + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseOutputItemAdded + "OpenAIResponseObjectStreamResponseOutputItemDone": + type: object + properties: + response_id: + type: string + item: + $ref: '#/components/schemas/OpenAIResponseOutput' + output_index: + type: integer + sequence_number: + type: integer + type: + type: string + const: response.output_item.done + default: response.output_item.done + additionalProperties: false + required: + - response_id + - item + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseOutputItemDone "OpenAIResponseObjectStreamResponseOutputTextDelta": type: object properties: @@ -5420,6 +5692,96 @@ components: - type title: >- OpenAIResponseObjectStreamResponseOutputTextDelta + "OpenAIResponseObjectStreamResponseOutputTextDone": + type: object + properties: + content_index: + type: integer + text: + type: string + item_id: + type: string + output_index: + type: integer + sequence_number: + type: integer + type: + type: string + const: response.output_text.done + default: response.output_text.done + additionalProperties: false + required: + - content_index + - text + - item_id + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseOutputTextDone + "OpenAIResponseObjectStreamResponseWebSearchCallCompleted": + type: object + properties: + item_id: + type: string + output_index: + type: integer + sequence_number: + type: integer + type: + type: string + const: response.web_search_call.completed + default: response.web_search_call.completed + additionalProperties: false + required: + - item_id + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseWebSearchCallCompleted + "OpenAIResponseObjectStreamResponseWebSearchCallInProgress": + type: object + properties: + item_id: + type: string + output_index: + type: integer + sequence_number: + type: integer + type: + type: string + const: response.web_search_call.in_progress + default: response.web_search_call.in_progress + additionalProperties: false + required: + - item_id + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseWebSearchCallInProgress + "OpenAIResponseObjectStreamResponseWebSearchCallSearching": + type: object + properties: + item_id: + type: string + output_index: + type: integer + sequence_number: + type: integer + type: + type: string + const: response.web_search_call.searching + default: response.web_search_call.searching + additionalProperties: false + required: + - item_id + - output_index + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseWebSearchCallSearching EmbeddingsRequest: type: object properties: diff --git a/llama_stack/apis/agents/openai_responses.py b/llama_stack/apis/agents/openai_responses.py index 6fa18b115..35b3d5ace 100644 --- a/llama_stack/apis/agents/openai_responses.py +++ b/llama_stack/apis/agents/openai_responses.py @@ -179,6 +179,30 @@ class OpenAIResponseObjectStreamResponseCreated(BaseModel): type: Literal["response.created"] = "response.created" +@json_schema_type +class OpenAIResponseObjectStreamResponseCompleted(BaseModel): + response: OpenAIResponseObject + type: Literal["response.completed"] = "response.completed" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseOutputItemAdded(BaseModel): + response_id: str + item: OpenAIResponseOutput + output_index: int + sequence_number: int + type: Literal["response.output_item.added"] = "response.output_item.added" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseOutputItemDone(BaseModel): + response_id: str + item: OpenAIResponseOutput + output_index: int + sequence_number: int + type: Literal["response.output_item.done"] = "response.output_item.done" + + @json_schema_type class OpenAIResponseObjectStreamResponseOutputTextDelta(BaseModel): content_index: int @@ -190,14 +214,132 @@ class OpenAIResponseObjectStreamResponseOutputTextDelta(BaseModel): @json_schema_type -class OpenAIResponseObjectStreamResponseCompleted(BaseModel): - response: OpenAIResponseObject - type: Literal["response.completed"] = "response.completed" +class OpenAIResponseObjectStreamResponseOutputTextDone(BaseModel): + content_index: int + text: str # final text of the output item + item_id: str + output_index: int + sequence_number: int + type: Literal["response.output_text.done"] = "response.output_text.done" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta(BaseModel): + delta: str + item_id: str + output_index: int + sequence_number: int + type: Literal["response.function_call_arguments.delta"] = "response.function_call_arguments.delta" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone(BaseModel): + arguments: str # final arguments of the function call + item_id: str + output_index: int + sequence_number: int + type: Literal["response.function_call_arguments.done"] = "response.function_call_arguments.done" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseWebSearchCallInProgress(BaseModel): + item_id: str + output_index: int + sequence_number: int + type: Literal["response.web_search_call.in_progress"] = "response.web_search_call.in_progress" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseWebSearchCallSearching(BaseModel): + item_id: str + output_index: int + sequence_number: int + type: Literal["response.web_search_call.searching"] = "response.web_search_call.searching" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseWebSearchCallCompleted(BaseModel): + item_id: str + output_index: int + sequence_number: int + type: Literal["response.web_search_call.completed"] = "response.web_search_call.completed" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseMcpListToolsInProgress(BaseModel): + sequence_number: int + type: Literal["response.mcp_list_tools.in_progress"] = "response.mcp_list_tools.in_progress" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseMcpListToolsFailed(BaseModel): + sequence_number: int + type: Literal["response.mcp_list_tools.failed"] = "response.mcp_list_tools.failed" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseMcpListToolsCompleted(BaseModel): + sequence_number: int + type: Literal["response.mcp_list_tools.completed"] = "response.mcp_list_tools.completed" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta(BaseModel): + delta: str + item_id: str + output_index: int + sequence_number: int + type: Literal["response.mcp_call.arguments.delta"] = "response.mcp_call.arguments.delta" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseMcpCallArgumentsDone(BaseModel): + arguments: str # final arguments of the MCP call + item_id: str + output_index: int + sequence_number: int + type: Literal["response.mcp_call.arguments.done"] = "response.mcp_call.arguments.done" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseMcpCallInProgress(BaseModel): + item_id: str + output_index: int + sequence_number: int + type: Literal["response.mcp_call.in_progress"] = "response.mcp_call.in_progress" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseMcpCallFailed(BaseModel): + sequence_number: int + type: Literal["response.mcp_call.failed"] = "response.mcp_call.failed" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseMcpCallCompleted(BaseModel): + sequence_number: int + type: Literal["response.mcp_call.completed"] = "response.mcp_call.completed" OpenAIResponseObjectStream = Annotated[ OpenAIResponseObjectStreamResponseCreated + | OpenAIResponseObjectStreamResponseOutputItemAdded + | OpenAIResponseObjectStreamResponseOutputItemDone | OpenAIResponseObjectStreamResponseOutputTextDelta + | OpenAIResponseObjectStreamResponseOutputTextDone + | OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta + | OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone + | OpenAIResponseObjectStreamResponseWebSearchCallInProgress + | OpenAIResponseObjectStreamResponseWebSearchCallSearching + | OpenAIResponseObjectStreamResponseWebSearchCallCompleted + | OpenAIResponseObjectStreamResponseMcpListToolsInProgress + | OpenAIResponseObjectStreamResponseMcpListToolsFailed + | OpenAIResponseObjectStreamResponseMcpListToolsCompleted + | OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta + | OpenAIResponseObjectStreamResponseMcpCallArgumentsDone + | OpenAIResponseObjectStreamResponseMcpCallInProgress + | OpenAIResponseObjectStreamResponseMcpCallFailed + | OpenAIResponseObjectStreamResponseMcpCallCompleted | OpenAIResponseObjectStreamResponseCompleted, Field(discriminator="type"), ] diff --git a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py index 661f04ef1..06f445c18 100644 --- a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py +++ b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py @@ -433,12 +433,10 @@ class OpenAIResponsesImpl: store: bool | None, text: OpenAIResponseText, tools: list[OpenAIResponseInputTool] | None, - max_infer_iters: int | None, + max_infer_iters: int, ) -> OpenAIResponseObject: - # Implement tool execution loop - handle ALL inference rounds including the first n_iter = 0 messages = ctx.messages.copy() - current_response = None while True: # Do inference (including the first one) @@ -450,13 +448,13 @@ class OpenAIResponsesImpl: temperature=ctx.temperature, response_format=ctx.response_format, ) - current_response = OpenAIChatCompletion(**inference_result.model_dump()) + completion = OpenAIChatCompletion(**inference_result.model_dump()) # Separate function vs non-function tool calls function_tool_calls = [] non_function_tool_calls = [] - for choice in current_response.choices: + for choice in completion.choices: if choice.message.tool_calls and tools: for tool_call in choice.message.tool_calls: if self._is_function_tool_call(tool_call, tools): @@ -468,7 +466,7 @@ class OpenAIResponsesImpl: if function_tool_calls: # For function tool calls, use existing logic and return immediately current_output_messages = await self._process_response_choices( - chat_response=current_response, + chat_response=completion, ctx=ctx, tools=tools, ) @@ -476,7 +474,7 @@ class OpenAIResponsesImpl: break elif non_function_tool_calls: # For non-function tool calls, execute them and continue loop - for choice in current_response.choices: + for choice in completion.choices: tool_outputs, tool_response_messages = await self._execute_tool_calls_only(choice, ctx) output_messages.extend(tool_outputs) @@ -485,19 +483,19 @@ class OpenAIResponsesImpl: messages.extend(tool_response_messages) n_iter += 1 - if n_iter >= (max_infer_iters or 10): + if n_iter >= max_infer_iters: break # Continue with next iteration of the loop continue else: # No tool calls - convert response to message and we're done - for choice in current_response.choices: + for choice in completion.choices: output_messages.append(await _convert_chat_choice_to_response_message(choice)) break response = OpenAIResponseObject( - created_at=current_response.created, + created_at=completion.created, id=f"resp-{uuid.uuid4()}", model=model, object="response", @@ -549,7 +547,6 @@ class OpenAIResponsesImpl: messages = ctx.messages.copy() while True: - # Do inference (including the first one) - streaming current_inference_result = await self.inference_api.openai_chat_completion( model=ctx.model, messages=messages,