diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 6b858eecf..775eb93b3 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -7593,9 +7593,57 @@
{
"$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseCreated"
},
+ {
+ "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded"
+ },
+ {
+ "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone"
+ },
{
"$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta"
},
+ {
+ "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone"
+ },
+ {
+ "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta"
+ },
+ {
+ "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone"
+ },
+ {
+ "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress"
+ },
+ {
+ "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching"
+ },
+ {
+ "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted"
+ },
+ {
+ "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress"
+ },
+ {
+ "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed"
+ },
+ {
+ "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted"
+ },
+ {
+ "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta"
+ },
+ {
+ "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone"
+ },
+ {
+ "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress"
+ },
+ {
+ "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed"
+ },
+ {
+ "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted"
+ },
{
"$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseCompleted"
}
@@ -7604,7 +7652,23 @@
"propertyName": "type",
"mapping": {
"response.created": "#/components/schemas/OpenAIResponseObjectStreamResponseCreated",
+ "response.output_item.added": "#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded",
+ "response.output_item.done": "#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone",
"response.output_text.delta": "#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta",
+ "response.output_text.done": "#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone",
+ "response.function_call_arguments.delta": "#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta",
+ "response.function_call_arguments.done": "#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone",
+ "response.web_search_call.in_progress": "#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress",
+ "response.web_search_call.searching": "#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching",
+ "response.web_search_call.completed": "#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted",
+ "response.mcp_list_tools.in_progress": "#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress",
+ "response.mcp_list_tools.failed": "#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed",
+ "response.mcp_list_tools.completed": "#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted",
+ "response.mcp_call.arguments.delta": "#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta",
+ "response.mcp_call.arguments.done": "#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone",
+ "response.mcp_call.in_progress": "#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress",
+ "response.mcp_call.failed": "#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed",
+ "response.mcp_call.completed": "#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted",
"response.completed": "#/components/schemas/OpenAIResponseObjectStreamResponseCompleted"
}
}
@@ -7647,6 +7711,314 @@
],
"title": "OpenAIResponseObjectStreamResponseCreated"
},
+ "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta": {
+ "type": "object",
+ "properties": {
+ "delta": {
+ "type": "string"
+ },
+ "item_id": {
+ "type": "string"
+ },
+ "output_index": {
+ "type": "integer"
+ },
+ "sequence_number": {
+ "type": "integer"
+ },
+ "type": {
+ "type": "string",
+ "const": "response.function_call_arguments.delta",
+ "default": "response.function_call_arguments.delta"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "delta",
+ "item_id",
+ "output_index",
+ "sequence_number",
+ "type"
+ ],
+ "title": "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta"
+ },
+ "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone": {
+ "type": "object",
+ "properties": {
+ "arguments": {
+ "type": "string"
+ },
+ "item_id": {
+ "type": "string"
+ },
+ "output_index": {
+ "type": "integer"
+ },
+ "sequence_number": {
+ "type": "integer"
+ },
+ "type": {
+ "type": "string",
+ "const": "response.function_call_arguments.done",
+ "default": "response.function_call_arguments.done"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "arguments",
+ "item_id",
+ "output_index",
+ "sequence_number",
+ "type"
+ ],
+ "title": "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone"
+ },
+ "OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta": {
+ "type": "object",
+ "properties": {
+ "delta": {
+ "type": "string"
+ },
+ "item_id": {
+ "type": "string"
+ },
+ "output_index": {
+ "type": "integer"
+ },
+ "sequence_number": {
+ "type": "integer"
+ },
+ "type": {
+ "type": "string",
+ "const": "response.mcp_call.arguments.delta",
+ "default": "response.mcp_call.arguments.delta"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "delta",
+ "item_id",
+ "output_index",
+ "sequence_number",
+ "type"
+ ],
+ "title": "OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta"
+ },
+ "OpenAIResponseObjectStreamResponseMcpCallArgumentsDone": {
+ "type": "object",
+ "properties": {
+ "arguments": {
+ "type": "string"
+ },
+ "item_id": {
+ "type": "string"
+ },
+ "output_index": {
+ "type": "integer"
+ },
+ "sequence_number": {
+ "type": "integer"
+ },
+ "type": {
+ "type": "string",
+ "const": "response.mcp_call.arguments.done",
+ "default": "response.mcp_call.arguments.done"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "arguments",
+ "item_id",
+ "output_index",
+ "sequence_number",
+ "type"
+ ],
+ "title": "OpenAIResponseObjectStreamResponseMcpCallArgumentsDone"
+ },
+ "OpenAIResponseObjectStreamResponseMcpCallCompleted": {
+ "type": "object",
+ "properties": {
+ "sequence_number": {
+ "type": "integer"
+ },
+ "type": {
+ "type": "string",
+ "const": "response.mcp_call.completed",
+ "default": "response.mcp_call.completed"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "sequence_number",
+ "type"
+ ],
+ "title": "OpenAIResponseObjectStreamResponseMcpCallCompleted"
+ },
+ "OpenAIResponseObjectStreamResponseMcpCallFailed": {
+ "type": "object",
+ "properties": {
+ "sequence_number": {
+ "type": "integer"
+ },
+ "type": {
+ "type": "string",
+ "const": "response.mcp_call.failed",
+ "default": "response.mcp_call.failed"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "sequence_number",
+ "type"
+ ],
+ "title": "OpenAIResponseObjectStreamResponseMcpCallFailed"
+ },
+ "OpenAIResponseObjectStreamResponseMcpCallInProgress": {
+ "type": "object",
+ "properties": {
+ "item_id": {
+ "type": "string"
+ },
+ "output_index": {
+ "type": "integer"
+ },
+ "sequence_number": {
+ "type": "integer"
+ },
+ "type": {
+ "type": "string",
+ "const": "response.mcp_call.in_progress",
+ "default": "response.mcp_call.in_progress"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "item_id",
+ "output_index",
+ "sequence_number",
+ "type"
+ ],
+ "title": "OpenAIResponseObjectStreamResponseMcpCallInProgress"
+ },
+ "OpenAIResponseObjectStreamResponseMcpListToolsCompleted": {
+ "type": "object",
+ "properties": {
+ "sequence_number": {
+ "type": "integer"
+ },
+ "type": {
+ "type": "string",
+ "const": "response.mcp_list_tools.completed",
+ "default": "response.mcp_list_tools.completed"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "sequence_number",
+ "type"
+ ],
+ "title": "OpenAIResponseObjectStreamResponseMcpListToolsCompleted"
+ },
+ "OpenAIResponseObjectStreamResponseMcpListToolsFailed": {
+ "type": "object",
+ "properties": {
+ "sequence_number": {
+ "type": "integer"
+ },
+ "type": {
+ "type": "string",
+ "const": "response.mcp_list_tools.failed",
+ "default": "response.mcp_list_tools.failed"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "sequence_number",
+ "type"
+ ],
+ "title": "OpenAIResponseObjectStreamResponseMcpListToolsFailed"
+ },
+ "OpenAIResponseObjectStreamResponseMcpListToolsInProgress": {
+ "type": "object",
+ "properties": {
+ "sequence_number": {
+ "type": "integer"
+ },
+ "type": {
+ "type": "string",
+ "const": "response.mcp_list_tools.in_progress",
+ "default": "response.mcp_list_tools.in_progress"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "sequence_number",
+ "type"
+ ],
+ "title": "OpenAIResponseObjectStreamResponseMcpListToolsInProgress"
+ },
+ "OpenAIResponseObjectStreamResponseOutputItemAdded": {
+ "type": "object",
+ "properties": {
+ "response_id": {
+ "type": "string"
+ },
+ "item": {
+ "$ref": "#/components/schemas/OpenAIResponseOutput"
+ },
+ "output_index": {
+ "type": "integer"
+ },
+ "sequence_number": {
+ "type": "integer"
+ },
+ "type": {
+ "type": "string",
+ "const": "response.output_item.added",
+ "default": "response.output_item.added"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "response_id",
+ "item",
+ "output_index",
+ "sequence_number",
+ "type"
+ ],
+ "title": "OpenAIResponseObjectStreamResponseOutputItemAdded"
+ },
+ "OpenAIResponseObjectStreamResponseOutputItemDone": {
+ "type": "object",
+ "properties": {
+ "response_id": {
+ "type": "string"
+ },
+ "item": {
+ "$ref": "#/components/schemas/OpenAIResponseOutput"
+ },
+ "output_index": {
+ "type": "integer"
+ },
+ "sequence_number": {
+ "type": "integer"
+ },
+ "type": {
+ "type": "string",
+ "const": "response.output_item.done",
+ "default": "response.output_item.done"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "response_id",
+ "item",
+ "output_index",
+ "sequence_number",
+ "type"
+ ],
+ "title": "OpenAIResponseObjectStreamResponseOutputItemDone"
+ },
"OpenAIResponseObjectStreamResponseOutputTextDelta": {
"type": "object",
"properties": {
@@ -7682,6 +8054,122 @@
],
"title": "OpenAIResponseObjectStreamResponseOutputTextDelta"
},
+ "OpenAIResponseObjectStreamResponseOutputTextDone": {
+ "type": "object",
+ "properties": {
+ "content_index": {
+ "type": "integer"
+ },
+ "text": {
+ "type": "string"
+ },
+ "item_id": {
+ "type": "string"
+ },
+ "output_index": {
+ "type": "integer"
+ },
+ "sequence_number": {
+ "type": "integer"
+ },
+ "type": {
+ "type": "string",
+ "const": "response.output_text.done",
+ "default": "response.output_text.done"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "content_index",
+ "text",
+ "item_id",
+ "output_index",
+ "sequence_number",
+ "type"
+ ],
+ "title": "OpenAIResponseObjectStreamResponseOutputTextDone"
+ },
+ "OpenAIResponseObjectStreamResponseWebSearchCallCompleted": {
+ "type": "object",
+ "properties": {
+ "item_id": {
+ "type": "string"
+ },
+ "output_index": {
+ "type": "integer"
+ },
+ "sequence_number": {
+ "type": "integer"
+ },
+ "type": {
+ "type": "string",
+ "const": "response.web_search_call.completed",
+ "default": "response.web_search_call.completed"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "item_id",
+ "output_index",
+ "sequence_number",
+ "type"
+ ],
+ "title": "OpenAIResponseObjectStreamResponseWebSearchCallCompleted"
+ },
+ "OpenAIResponseObjectStreamResponseWebSearchCallInProgress": {
+ "type": "object",
+ "properties": {
+ "item_id": {
+ "type": "string"
+ },
+ "output_index": {
+ "type": "integer"
+ },
+ "sequence_number": {
+ "type": "integer"
+ },
+ "type": {
+ "type": "string",
+ "const": "response.web_search_call.in_progress",
+ "default": "response.web_search_call.in_progress"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "item_id",
+ "output_index",
+ "sequence_number",
+ "type"
+ ],
+ "title": "OpenAIResponseObjectStreamResponseWebSearchCallInProgress"
+ },
+ "OpenAIResponseObjectStreamResponseWebSearchCallSearching": {
+ "type": "object",
+ "properties": {
+ "item_id": {
+ "type": "string"
+ },
+ "output_index": {
+ "type": "integer"
+ },
+ "sequence_number": {
+ "type": "integer"
+ },
+ "type": {
+ "type": "string",
+ "const": "response.web_search_call.searching",
+ "default": "response.web_search_call.searching"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "item_id",
+ "output_index",
+ "sequence_number",
+ "type"
+ ],
+ "title": "OpenAIResponseObjectStreamResponseWebSearchCallSearching"
+ },
"EmbeddingsRequest": {
"type": "object",
"properties": {
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index b5172e947..ef6b5d70a 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -5355,13 +5355,45 @@ components:
OpenAIResponseObjectStream:
oneOf:
- $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
+ - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded'
+ - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone'
- $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta'
+ - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone'
+ - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta'
+ - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone'
+ - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress'
+ - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching'
+ - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted'
+ - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress'
+ - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed'
+ - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted'
+ - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta'
+ - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone'
+ - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress'
+ - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed'
+ - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted'
- $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
discriminator:
propertyName: type
mapping:
response.created: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
+ response.output_item.added: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded'
+ response.output_item.done: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone'
response.output_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta'
+ response.output_text.done: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDone'
+ response.function_call_arguments.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta'
+ response.function_call_arguments.done: '#/components/schemas/OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone'
+ response.web_search_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallInProgress'
+ response.web_search_call.searching: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallSearching'
+ response.web_search_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseWebSearchCallCompleted'
+ response.mcp_list_tools.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsInProgress'
+ response.mcp_list_tools.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsFailed'
+ response.mcp_list_tools.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpListToolsCompleted'
+ response.mcp_call.arguments.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta'
+ response.mcp_call.arguments.done: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallArgumentsDone'
+ response.mcp_call.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallInProgress'
+ response.mcp_call.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallFailed'
+ response.mcp_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted'
response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
"OpenAIResponseObjectStreamResponseCompleted":
type: object
@@ -5393,6 +5425,246 @@ components:
- type
title: >-
OpenAIResponseObjectStreamResponseCreated
+ "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta":
+ type: object
+ properties:
+ delta:
+ type: string
+ item_id:
+ type: string
+ output_index:
+ type: integer
+ sequence_number:
+ type: integer
+ type:
+ type: string
+ const: response.function_call_arguments.delta
+ default: response.function_call_arguments.delta
+ additionalProperties: false
+ required:
+ - delta
+ - item_id
+ - output_index
+ - sequence_number
+ - type
+ title: >-
+ OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta
+ "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone":
+ type: object
+ properties:
+ arguments:
+ type: string
+ item_id:
+ type: string
+ output_index:
+ type: integer
+ sequence_number:
+ type: integer
+ type:
+ type: string
+ const: response.function_call_arguments.done
+ default: response.function_call_arguments.done
+ additionalProperties: false
+ required:
+ - arguments
+ - item_id
+ - output_index
+ - sequence_number
+ - type
+ title: >-
+ OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone
+ "OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta":
+ type: object
+ properties:
+ delta:
+ type: string
+ item_id:
+ type: string
+ output_index:
+ type: integer
+ sequence_number:
+ type: integer
+ type:
+ type: string
+ const: response.mcp_call.arguments.delta
+ default: response.mcp_call.arguments.delta
+ additionalProperties: false
+ required:
+ - delta
+ - item_id
+ - output_index
+ - sequence_number
+ - type
+ title: >-
+ OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta
+ "OpenAIResponseObjectStreamResponseMcpCallArgumentsDone":
+ type: object
+ properties:
+ arguments:
+ type: string
+ item_id:
+ type: string
+ output_index:
+ type: integer
+ sequence_number:
+ type: integer
+ type:
+ type: string
+ const: response.mcp_call.arguments.done
+ default: response.mcp_call.arguments.done
+ additionalProperties: false
+ required:
+ - arguments
+ - item_id
+ - output_index
+ - sequence_number
+ - type
+ title: >-
+ OpenAIResponseObjectStreamResponseMcpCallArgumentsDone
+ "OpenAIResponseObjectStreamResponseMcpCallCompleted":
+ type: object
+ properties:
+ sequence_number:
+ type: integer
+ type:
+ type: string
+ const: response.mcp_call.completed
+ default: response.mcp_call.completed
+ additionalProperties: false
+ required:
+ - sequence_number
+ - type
+ title: >-
+ OpenAIResponseObjectStreamResponseMcpCallCompleted
+ "OpenAIResponseObjectStreamResponseMcpCallFailed":
+ type: object
+ properties:
+ sequence_number:
+ type: integer
+ type:
+ type: string
+ const: response.mcp_call.failed
+ default: response.mcp_call.failed
+ additionalProperties: false
+ required:
+ - sequence_number
+ - type
+ title: >-
+ OpenAIResponseObjectStreamResponseMcpCallFailed
+ "OpenAIResponseObjectStreamResponseMcpCallInProgress":
+ type: object
+ properties:
+ item_id:
+ type: string
+ output_index:
+ type: integer
+ sequence_number:
+ type: integer
+ type:
+ type: string
+ const: response.mcp_call.in_progress
+ default: response.mcp_call.in_progress
+ additionalProperties: false
+ required:
+ - item_id
+ - output_index
+ - sequence_number
+ - type
+ title: >-
+ OpenAIResponseObjectStreamResponseMcpCallInProgress
+ "OpenAIResponseObjectStreamResponseMcpListToolsCompleted":
+ type: object
+ properties:
+ sequence_number:
+ type: integer
+ type:
+ type: string
+ const: response.mcp_list_tools.completed
+ default: response.mcp_list_tools.completed
+ additionalProperties: false
+ required:
+ - sequence_number
+ - type
+ title: >-
+ OpenAIResponseObjectStreamResponseMcpListToolsCompleted
+ "OpenAIResponseObjectStreamResponseMcpListToolsFailed":
+ type: object
+ properties:
+ sequence_number:
+ type: integer
+ type:
+ type: string
+ const: response.mcp_list_tools.failed
+ default: response.mcp_list_tools.failed
+ additionalProperties: false
+ required:
+ - sequence_number
+ - type
+ title: >-
+ OpenAIResponseObjectStreamResponseMcpListToolsFailed
+ "OpenAIResponseObjectStreamResponseMcpListToolsInProgress":
+ type: object
+ properties:
+ sequence_number:
+ type: integer
+ type:
+ type: string
+ const: response.mcp_list_tools.in_progress
+ default: response.mcp_list_tools.in_progress
+ additionalProperties: false
+ required:
+ - sequence_number
+ - type
+ title: >-
+ OpenAIResponseObjectStreamResponseMcpListToolsInProgress
+ "OpenAIResponseObjectStreamResponseOutputItemAdded":
+ type: object
+ properties:
+ response_id:
+ type: string
+ item:
+ $ref: '#/components/schemas/OpenAIResponseOutput'
+ output_index:
+ type: integer
+ sequence_number:
+ type: integer
+ type:
+ type: string
+ const: response.output_item.added
+ default: response.output_item.added
+ additionalProperties: false
+ required:
+ - response_id
+ - item
+ - output_index
+ - sequence_number
+ - type
+ title: >-
+ OpenAIResponseObjectStreamResponseOutputItemAdded
+ "OpenAIResponseObjectStreamResponseOutputItemDone":
+ type: object
+ properties:
+ response_id:
+ type: string
+ item:
+ $ref: '#/components/schemas/OpenAIResponseOutput'
+ output_index:
+ type: integer
+ sequence_number:
+ type: integer
+ type:
+ type: string
+ const: response.output_item.done
+ default: response.output_item.done
+ additionalProperties: false
+ required:
+ - response_id
+ - item
+ - output_index
+ - sequence_number
+ - type
+ title: >-
+ OpenAIResponseObjectStreamResponseOutputItemDone
"OpenAIResponseObjectStreamResponseOutputTextDelta":
type: object
properties:
@@ -5420,6 +5692,96 @@ components:
- type
title: >-
OpenAIResponseObjectStreamResponseOutputTextDelta
+ "OpenAIResponseObjectStreamResponseOutputTextDone":
+ type: object
+ properties:
+ content_index:
+ type: integer
+ text:
+ type: string
+ item_id:
+ type: string
+ output_index:
+ type: integer
+ sequence_number:
+ type: integer
+ type:
+ type: string
+ const: response.output_text.done
+ default: response.output_text.done
+ additionalProperties: false
+ required:
+ - content_index
+ - text
+ - item_id
+ - output_index
+ - sequence_number
+ - type
+ title: >-
+ OpenAIResponseObjectStreamResponseOutputTextDone
+ "OpenAIResponseObjectStreamResponseWebSearchCallCompleted":
+ type: object
+ properties:
+ item_id:
+ type: string
+ output_index:
+ type: integer
+ sequence_number:
+ type: integer
+ type:
+ type: string
+ const: response.web_search_call.completed
+ default: response.web_search_call.completed
+ additionalProperties: false
+ required:
+ - item_id
+ - output_index
+ - sequence_number
+ - type
+ title: >-
+ OpenAIResponseObjectStreamResponseWebSearchCallCompleted
+ "OpenAIResponseObjectStreamResponseWebSearchCallInProgress":
+ type: object
+ properties:
+ item_id:
+ type: string
+ output_index:
+ type: integer
+ sequence_number:
+ type: integer
+ type:
+ type: string
+ const: response.web_search_call.in_progress
+ default: response.web_search_call.in_progress
+ additionalProperties: false
+ required:
+ - item_id
+ - output_index
+ - sequence_number
+ - type
+ title: >-
+ OpenAIResponseObjectStreamResponseWebSearchCallInProgress
+ "OpenAIResponseObjectStreamResponseWebSearchCallSearching":
+ type: object
+ properties:
+ item_id:
+ type: string
+ output_index:
+ type: integer
+ sequence_number:
+ type: integer
+ type:
+ type: string
+ const: response.web_search_call.searching
+ default: response.web_search_call.searching
+ additionalProperties: false
+ required:
+ - item_id
+ - output_index
+ - sequence_number
+ - type
+ title: >-
+ OpenAIResponseObjectStreamResponseWebSearchCallSearching
EmbeddingsRequest:
type: object
properties:
diff --git a/llama_stack/apis/agents/openai_responses.py b/llama_stack/apis/agents/openai_responses.py
index 6fa18b115..35b3d5ace 100644
--- a/llama_stack/apis/agents/openai_responses.py
+++ b/llama_stack/apis/agents/openai_responses.py
@@ -179,6 +179,30 @@ class OpenAIResponseObjectStreamResponseCreated(BaseModel):
type: Literal["response.created"] = "response.created"
+@json_schema_type
+class OpenAIResponseObjectStreamResponseCompleted(BaseModel):
+ response: OpenAIResponseObject
+ type: Literal["response.completed"] = "response.completed"
+
+
+@json_schema_type
+class OpenAIResponseObjectStreamResponseOutputItemAdded(BaseModel):
+ response_id: str
+ item: OpenAIResponseOutput
+ output_index: int
+ sequence_number: int
+ type: Literal["response.output_item.added"] = "response.output_item.added"
+
+
+@json_schema_type
+class OpenAIResponseObjectStreamResponseOutputItemDone(BaseModel):
+ response_id: str
+ item: OpenAIResponseOutput
+ output_index: int
+ sequence_number: int
+ type: Literal["response.output_item.done"] = "response.output_item.done"
+
+
@json_schema_type
class OpenAIResponseObjectStreamResponseOutputTextDelta(BaseModel):
content_index: int
@@ -190,14 +214,132 @@ class OpenAIResponseObjectStreamResponseOutputTextDelta(BaseModel):
@json_schema_type
-class OpenAIResponseObjectStreamResponseCompleted(BaseModel):
- response: OpenAIResponseObject
- type: Literal["response.completed"] = "response.completed"
+class OpenAIResponseObjectStreamResponseOutputTextDone(BaseModel):
+ content_index: int
+ text: str # final text of the output item
+ item_id: str
+ output_index: int
+ sequence_number: int
+ type: Literal["response.output_text.done"] = "response.output_text.done"
+
+
+@json_schema_type
+class OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta(BaseModel):
+ delta: str
+ item_id: str
+ output_index: int
+ sequence_number: int
+ type: Literal["response.function_call_arguments.delta"] = "response.function_call_arguments.delta"
+
+
+@json_schema_type
+class OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone(BaseModel):
+ arguments: str # final arguments of the function call
+ item_id: str
+ output_index: int
+ sequence_number: int
+ type: Literal["response.function_call_arguments.done"] = "response.function_call_arguments.done"
+
+
+@json_schema_type
+class OpenAIResponseObjectStreamResponseWebSearchCallInProgress(BaseModel):
+ item_id: str
+ output_index: int
+ sequence_number: int
+ type: Literal["response.web_search_call.in_progress"] = "response.web_search_call.in_progress"
+
+
+@json_schema_type
+class OpenAIResponseObjectStreamResponseWebSearchCallSearching(BaseModel):
+ item_id: str
+ output_index: int
+ sequence_number: int
+ type: Literal["response.web_search_call.searching"] = "response.web_search_call.searching"
+
+
+@json_schema_type
+class OpenAIResponseObjectStreamResponseWebSearchCallCompleted(BaseModel):
+ item_id: str
+ output_index: int
+ sequence_number: int
+ type: Literal["response.web_search_call.completed"] = "response.web_search_call.completed"
+
+
+@json_schema_type
+class OpenAIResponseObjectStreamResponseMcpListToolsInProgress(BaseModel):
+ sequence_number: int
+ type: Literal["response.mcp_list_tools.in_progress"] = "response.mcp_list_tools.in_progress"
+
+
+@json_schema_type
+class OpenAIResponseObjectStreamResponseMcpListToolsFailed(BaseModel):
+ sequence_number: int
+ type: Literal["response.mcp_list_tools.failed"] = "response.mcp_list_tools.failed"
+
+
+@json_schema_type
+class OpenAIResponseObjectStreamResponseMcpListToolsCompleted(BaseModel):
+ sequence_number: int
+ type: Literal["response.mcp_list_tools.completed"] = "response.mcp_list_tools.completed"
+
+
+@json_schema_type
+class OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta(BaseModel):
+ delta: str
+ item_id: str
+ output_index: int
+ sequence_number: int
+ type: Literal["response.mcp_call.arguments.delta"] = "response.mcp_call.arguments.delta"
+
+
+@json_schema_type
+class OpenAIResponseObjectStreamResponseMcpCallArgumentsDone(BaseModel):
+ arguments: str # final arguments of the MCP call
+ item_id: str
+ output_index: int
+ sequence_number: int
+ type: Literal["response.mcp_call.arguments.done"] = "response.mcp_call.arguments.done"
+
+
+@json_schema_type
+class OpenAIResponseObjectStreamResponseMcpCallInProgress(BaseModel):
+ item_id: str
+ output_index: int
+ sequence_number: int
+ type: Literal["response.mcp_call.in_progress"] = "response.mcp_call.in_progress"
+
+
+@json_schema_type
+class OpenAIResponseObjectStreamResponseMcpCallFailed(BaseModel):
+ sequence_number: int
+ type: Literal["response.mcp_call.failed"] = "response.mcp_call.failed"
+
+
+@json_schema_type
+class OpenAIResponseObjectStreamResponseMcpCallCompleted(BaseModel):
+ sequence_number: int
+ type: Literal["response.mcp_call.completed"] = "response.mcp_call.completed"
OpenAIResponseObjectStream = Annotated[
OpenAIResponseObjectStreamResponseCreated
+ | OpenAIResponseObjectStreamResponseOutputItemAdded
+ | OpenAIResponseObjectStreamResponseOutputItemDone
| OpenAIResponseObjectStreamResponseOutputTextDelta
+ | OpenAIResponseObjectStreamResponseOutputTextDone
+ | OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta
+ | OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone
+ | OpenAIResponseObjectStreamResponseWebSearchCallInProgress
+ | OpenAIResponseObjectStreamResponseWebSearchCallSearching
+ | OpenAIResponseObjectStreamResponseWebSearchCallCompleted
+ | OpenAIResponseObjectStreamResponseMcpListToolsInProgress
+ | OpenAIResponseObjectStreamResponseMcpListToolsFailed
+ | OpenAIResponseObjectStreamResponseMcpListToolsCompleted
+ | OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta
+ | OpenAIResponseObjectStreamResponseMcpCallArgumentsDone
+ | OpenAIResponseObjectStreamResponseMcpCallInProgress
+ | OpenAIResponseObjectStreamResponseMcpCallFailed
+ | OpenAIResponseObjectStreamResponseMcpCallCompleted
| OpenAIResponseObjectStreamResponseCompleted,
Field(discriminator="type"),
]
diff --git a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
index 661f04ef1..06f445c18 100644
--- a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
+++ b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
@@ -433,12 +433,10 @@ class OpenAIResponsesImpl:
store: bool | None,
text: OpenAIResponseText,
tools: list[OpenAIResponseInputTool] | None,
- max_infer_iters: int | None,
+ max_infer_iters: int,
) -> OpenAIResponseObject:
- # Implement tool execution loop - handle ALL inference rounds including the first
n_iter = 0
messages = ctx.messages.copy()
- current_response = None
while True:
# Do inference (including the first one)
@@ -450,13 +448,13 @@ class OpenAIResponsesImpl:
temperature=ctx.temperature,
response_format=ctx.response_format,
)
- current_response = OpenAIChatCompletion(**inference_result.model_dump())
+ completion = OpenAIChatCompletion(**inference_result.model_dump())
# Separate function vs non-function tool calls
function_tool_calls = []
non_function_tool_calls = []
- for choice in current_response.choices:
+ for choice in completion.choices:
if choice.message.tool_calls and tools:
for tool_call in choice.message.tool_calls:
if self._is_function_tool_call(tool_call, tools):
@@ -468,7 +466,7 @@ class OpenAIResponsesImpl:
if function_tool_calls:
# For function tool calls, use existing logic and return immediately
current_output_messages = await self._process_response_choices(
- chat_response=current_response,
+ chat_response=completion,
ctx=ctx,
tools=tools,
)
@@ -476,7 +474,7 @@ class OpenAIResponsesImpl:
break
elif non_function_tool_calls:
# For non-function tool calls, execute them and continue loop
- for choice in current_response.choices:
+ for choice in completion.choices:
tool_outputs, tool_response_messages = await self._execute_tool_calls_only(choice, ctx)
output_messages.extend(tool_outputs)
@@ -485,19 +483,19 @@ class OpenAIResponsesImpl:
messages.extend(tool_response_messages)
n_iter += 1
- if n_iter >= (max_infer_iters or 10):
+ if n_iter >= max_infer_iters:
break
# Continue with next iteration of the loop
continue
else:
# No tool calls - convert response to message and we're done
- for choice in current_response.choices:
+ for choice in completion.choices:
output_messages.append(await _convert_chat_choice_to_response_message(choice))
break
response = OpenAIResponseObject(
- created_at=current_response.created,
+ created_at=completion.created,
id=f"resp-{uuid.uuid4()}",
model=model,
object="response",
@@ -549,7 +547,6 @@ class OpenAIResponsesImpl:
messages = ctx.messages.copy()
while True:
- # Do inference (including the first one) - streaming
current_inference_result = await self.inference_api.openai_chat_completion(
model=ctx.model,
messages=messages,