diff --git a/docs/static/deprecated-llama-stack-spec.html b/docs/static/deprecated-llama-stack-spec.html index 463837141..8d1b05742 100644 --- a/docs/static/deprecated-llama-stack-spec.html +++ b/docs/static/deprecated-llama-stack-spec.html @@ -10125,10 +10125,71 @@ "type": { "type": "string", "const": "output_text", - "default": "output_text" + "default": "output_text", + "description": "Content part type identifier, always \"output_text\"" }, "text": { - "type": "string" + "type": "string", + "description": "Text emitted for this content part" + }, + "annotations": { + "type": "array", + "items": { + "$ref": "#/components/schemas/OpenAIResponseAnnotations" + }, + "description": "Structured annotations associated with the text" + }, + "logprobs": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + }, + "description": "(Optional) Token log probability details" + } + }, + "additionalProperties": false, + "required": [ + "type", + "text", + "annotations" + ], + "title": "OpenAIResponseContentPartOutputText", + "description": "Text content within a streamed response part." + }, + "OpenAIResponseContentPartReasoningText": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "reasoning_text", + "default": "reasoning_text", + "description": "Content part type identifier, always \"reasoning_text\"" + }, + "text": { + "type": "string", + "description": "Reasoning text supplied by the model" } }, "additionalProperties": false, @@ -10136,7 +10197,8 @@ "type", "text" ], - "title": "OpenAIResponseContentPartOutputText" + "title": "OpenAIResponseContentPartReasoningText", + "description": "Reasoning text emitted as part of a streamed response." }, "OpenAIResponseContentPartRefusal": { "type": "object", @@ -10144,10 +10206,12 @@ "type": { "type": "string", "const": "refusal", - "default": "refusal" + "default": "refusal", + "description": "Content part type identifier, always \"refusal\"" }, "refusal": { - "type": "string" + "type": "string", + "description": "Refusal text supplied by the model" } }, "additionalProperties": false, @@ -10155,13 +10219,17 @@ "type", "refusal" ], - "title": "OpenAIResponseContentPartRefusal" + "title": "OpenAIResponseContentPartRefusal", + "description": "Refusal content within a streamed response part." }, "OpenAIResponseObjectStream": { "oneOf": [ { "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseCreated" }, + { + "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseInProgress" + }, { "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded" }, @@ -10219,6 +10287,12 @@ { "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone" }, + { + "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete" + }, + { + "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseFailed" + }, { "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseCompleted" } @@ -10227,6 +10301,7 @@ "propertyName": "type", "mapping": { "response.created": "#/components/schemas/OpenAIResponseObjectStreamResponseCreated", + "response.in_progress": "#/components/schemas/OpenAIResponseObjectStreamResponseInProgress", "response.output_item.added": "#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded", "response.output_item.done": "#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone", "response.output_text.delta": "#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta", @@ -10246,6 +10321,8 @@ "response.mcp_call.completed": "#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted", "response.content_part.added": "#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded", "response.content_part.done": "#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone", + "response.incomplete": "#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete", + "response.failed": "#/components/schemas/OpenAIResponseObjectStreamResponseFailed", "response.completed": "#/components/schemas/OpenAIResponseObjectStreamResponseCompleted" } } @@ -10255,7 +10332,7 @@ "properties": { "response": { "$ref": "#/components/schemas/OpenAIResponseObject", - "description": "The completed response object" + "description": "Completed response object" }, "type": { "type": "string", @@ -10275,6 +10352,10 @@ "OpenAIResponseObjectStreamResponseContentPartAdded": { "type": "object", "properties": { + "content_index": { + "type": "integer", + "description": "Index position of the part within the content array" + }, "response_id": { "type": "string", "description": "Unique identifier of the response containing this content" @@ -10283,6 +10364,10 @@ "type": "string", "description": "Unique identifier of the output item containing this content part" }, + "output_index": { + "type": "integer", + "description": "Index position of the output item in the response" + }, "part": { "oneOf": [ { @@ -10290,13 +10375,17 @@ }, { "$ref": "#/components/schemas/OpenAIResponseContentPartRefusal" + }, + { + "$ref": "#/components/schemas/OpenAIResponseContentPartReasoningText" } ], "discriminator": { "propertyName": "type", "mapping": { "output_text": "#/components/schemas/OpenAIResponseContentPartOutputText", - "refusal": "#/components/schemas/OpenAIResponseContentPartRefusal" + "refusal": "#/components/schemas/OpenAIResponseContentPartRefusal", + "reasoning_text": "#/components/schemas/OpenAIResponseContentPartReasoningText" } }, "description": "The content part that was added" @@ -10314,8 +10403,10 @@ }, "additionalProperties": false, "required": [ + "content_index", "response_id", "item_id", + "output_index", "part", "sequence_number", "type" @@ -10326,6 +10417,10 @@ "OpenAIResponseObjectStreamResponseContentPartDone": { "type": "object", "properties": { + "content_index": { + "type": "integer", + "description": "Index position of the part within the content array" + }, "response_id": { "type": "string", "description": "Unique identifier of the response containing this content" @@ -10334,6 +10429,10 @@ "type": "string", "description": "Unique identifier of the output item containing this content part" }, + "output_index": { + "type": "integer", + "description": "Index position of the output item in the response" + }, "part": { "oneOf": [ { @@ -10341,13 +10440,17 @@ }, { "$ref": "#/components/schemas/OpenAIResponseContentPartRefusal" + }, + { + "$ref": "#/components/schemas/OpenAIResponseContentPartReasoningText" } ], "discriminator": { "propertyName": "type", "mapping": { "output_text": "#/components/schemas/OpenAIResponseContentPartOutputText", - "refusal": "#/components/schemas/OpenAIResponseContentPartRefusal" + "refusal": "#/components/schemas/OpenAIResponseContentPartRefusal", + "reasoning_text": "#/components/schemas/OpenAIResponseContentPartReasoningText" } }, "description": "The completed content part" @@ -10365,8 +10468,10 @@ }, "additionalProperties": false, "required": [ + "content_index", "response_id", "item_id", + "output_index", "part", "sequence_number", "type" @@ -10379,7 +10484,7 @@ "properties": { "response": { "$ref": "#/components/schemas/OpenAIResponseObject", - "description": "The newly created response object" + "description": "The response object that was created" }, "type": { "type": "string", @@ -10396,6 +10501,33 @@ "title": "OpenAIResponseObjectStreamResponseCreated", "description": "Streaming event indicating a new response has been created." }, + "OpenAIResponseObjectStreamResponseFailed": { + "type": "object", + "properties": { + "response": { + "$ref": "#/components/schemas/OpenAIResponseObject", + "description": "Response object describing the failure" + }, + "sequence_number": { + "type": "integer", + "description": "Sequential number for ordering streaming events" + }, + "type": { + "type": "string", + "const": "response.failed", + "default": "response.failed", + "description": "Event type identifier, always \"response.failed\"" + } + }, + "additionalProperties": false, + "required": [ + "response", + "sequence_number", + "type" + ], + "title": "OpenAIResponseObjectStreamResponseFailed", + "description": "Streaming event emitted when a response fails." + }, "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta": { "type": "object", "properties": { @@ -10470,6 +10602,60 @@ "title": "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone", "description": "Streaming event for when function call arguments are completed." }, + "OpenAIResponseObjectStreamResponseInProgress": { + "type": "object", + "properties": { + "response": { + "$ref": "#/components/schemas/OpenAIResponseObject", + "description": "Current response state while in progress" + }, + "sequence_number": { + "type": "integer", + "description": "Sequential number for ordering streaming events" + }, + "type": { + "type": "string", + "const": "response.in_progress", + "default": "response.in_progress", + "description": "Event type identifier, always \"response.in_progress\"" + } + }, + "additionalProperties": false, + "required": [ + "response", + "sequence_number", + "type" + ], + "title": "OpenAIResponseObjectStreamResponseInProgress", + "description": "Streaming event indicating the response remains in progress." + }, + "OpenAIResponseObjectStreamResponseIncomplete": { + "type": "object", + "properties": { + "response": { + "$ref": "#/components/schemas/OpenAIResponseObject", + "description": "Response object describing the incomplete state" + }, + "sequence_number": { + "type": "integer", + "description": "Sequential number for ordering streaming events" + }, + "type": { + "type": "string", + "const": "response.incomplete", + "default": "response.incomplete", + "description": "Event type identifier, always \"response.incomplete\"" + } + }, + "additionalProperties": false, + "required": [ + "response", + "sequence_number", + "type" + ], + "title": "OpenAIResponseObjectStreamResponseIncomplete", + "description": "Streaming event emitted when a response ends in an incomplete state." + }, "OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta": { "type": "object", "properties": { diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml index e4871e12a..2278cea7e 100644 --- a/docs/static/deprecated-llama-stack-spec.yaml +++ b/docs/static/deprecated-llama-stack-spec.yaml @@ -7544,13 +7544,57 @@ components: type: string const: output_text default: output_text + description: >- + Content part type identifier, always "output_text" text: type: string + description: Text emitted for this content part + annotations: + type: array + items: + $ref: '#/components/schemas/OpenAIResponseAnnotations' + description: >- + Structured annotations associated with the text + logprobs: + type: array + items: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: (Optional) Token log probability details additionalProperties: false required: - type - text + - annotations title: OpenAIResponseContentPartOutputText + description: >- + Text content within a streamed response part. + OpenAIResponseContentPartReasoningText: + type: object + properties: + type: + type: string + const: reasoning_text + default: reasoning_text + description: >- + Content part type identifier, always "reasoning_text" + text: + type: string + description: Reasoning text supplied by the model + additionalProperties: false + required: + - type + - text + title: OpenAIResponseContentPartReasoningText + description: >- + Reasoning text emitted as part of a streamed response. OpenAIResponseContentPartRefusal: type: object properties: @@ -7558,16 +7602,22 @@ components: type: string const: refusal default: refusal + description: >- + Content part type identifier, always "refusal" refusal: type: string + description: Refusal text supplied by the model additionalProperties: false required: - type - refusal title: OpenAIResponseContentPartRefusal + description: >- + Refusal content within a streamed response part. OpenAIResponseObjectStream: oneOf: - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseInProgress' - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded' - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone' - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta' @@ -7587,11 +7637,14 @@ components: - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted' - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded' - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFailed' - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted' discriminator: propertyName: type mapping: response.created: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated' + response.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseInProgress' response.output_item.added: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded' response.output_item.done: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone' response.output_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta' @@ -7611,13 +7664,15 @@ components: response.mcp_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted' response.content_part.added: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded' response.content_part.done: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone' + response.incomplete: '#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete' + response.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseFailed' response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted' "OpenAIResponseObjectStreamResponseCompleted": type: object properties: response: $ref: '#/components/schemas/OpenAIResponseObject' - description: The completed response object + description: Completed response object type: type: string const: response.completed @@ -7635,6 +7690,10 @@ components: "OpenAIResponseObjectStreamResponseContentPartAdded": type: object properties: + content_index: + type: integer + description: >- + Index position of the part within the content array response_id: type: string description: >- @@ -7643,15 +7702,21 @@ components: type: string description: >- Unique identifier of the output item containing this content part + output_index: + type: integer + description: >- + Index position of the output item in the response part: oneOf: - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText' - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText' discriminator: propertyName: type mapping: output_text: '#/components/schemas/OpenAIResponseContentPartOutputText' refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' + reasoning_text: '#/components/schemas/OpenAIResponseContentPartReasoningText' description: The content part that was added sequence_number: type: integer @@ -7665,8 +7730,10 @@ components: Event type identifier, always "response.content_part.added" additionalProperties: false required: + - content_index - response_id - item_id + - output_index - part - sequence_number - type @@ -7677,6 +7744,10 @@ components: "OpenAIResponseObjectStreamResponseContentPartDone": type: object properties: + content_index: + type: integer + description: >- + Index position of the part within the content array response_id: type: string description: >- @@ -7685,15 +7756,21 @@ components: type: string description: >- Unique identifier of the output item containing this content part + output_index: + type: integer + description: >- + Index position of the output item in the response part: oneOf: - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText' - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText' discriminator: propertyName: type mapping: output_text: '#/components/schemas/OpenAIResponseContentPartOutputText' refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' + reasoning_text: '#/components/schemas/OpenAIResponseContentPartReasoningText' description: The completed content part sequence_number: type: integer @@ -7707,8 +7784,10 @@ components: Event type identifier, always "response.content_part.done" additionalProperties: false required: + - content_index - response_id - item_id + - output_index - part - sequence_number - type @@ -7721,7 +7800,7 @@ components: properties: response: $ref: '#/components/schemas/OpenAIResponseObject' - description: The newly created response object + description: The response object that was created type: type: string const: response.created @@ -7736,6 +7815,30 @@ components: OpenAIResponseObjectStreamResponseCreated description: >- Streaming event indicating a new response has been created. + OpenAIResponseObjectStreamResponseFailed: + type: object + properties: + response: + $ref: '#/components/schemas/OpenAIResponseObject' + description: Response object describing the failure + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.failed + default: response.failed + description: >- + Event type identifier, always "response.failed" + additionalProperties: false + required: + - response + - sequence_number + - type + title: OpenAIResponseObjectStreamResponseFailed + description: >- + Streaming event emitted when a response fails. "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta": type: object properties: @@ -7808,6 +7911,57 @@ components: OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone description: >- Streaming event for when function call arguments are completed. + "OpenAIResponseObjectStreamResponseInProgress": + type: object + properties: + response: + $ref: '#/components/schemas/OpenAIResponseObject' + description: Current response state while in progress + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.in_progress + default: response.in_progress + description: >- + Event type identifier, always "response.in_progress" + additionalProperties: false + required: + - response + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseInProgress + description: >- + Streaming event indicating the response remains in progress. + "OpenAIResponseObjectStreamResponseIncomplete": + type: object + properties: + response: + $ref: '#/components/schemas/OpenAIResponseObject' + description: >- + Response object describing the incomplete state + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.incomplete + default: response.incomplete + description: >- + Event type identifier, always "response.incomplete" + additionalProperties: false + required: + - response + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseIncomplete + description: >- + Streaming event emitted when a response ends in an incomplete state. "OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta": type: object properties: diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html index 8c363e61b..5fb01ffce 100644 --- a/docs/static/llama-stack-spec.html +++ b/docs/static/llama-stack-spec.html @@ -8220,10 +8220,71 @@ "type": { "type": "string", "const": "output_text", - "default": "output_text" + "default": "output_text", + "description": "Content part type identifier, always \"output_text\"" }, "text": { - "type": "string" + "type": "string", + "description": "Text emitted for this content part" + }, + "annotations": { + "type": "array", + "items": { + "$ref": "#/components/schemas/OpenAIResponseAnnotations" + }, + "description": "Structured annotations associated with the text" + }, + "logprobs": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + }, + "description": "(Optional) Token log probability details" + } + }, + "additionalProperties": false, + "required": [ + "type", + "text", + "annotations" + ], + "title": "OpenAIResponseContentPartOutputText", + "description": "Text content within a streamed response part." + }, + "OpenAIResponseContentPartReasoningText": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "reasoning_text", + "default": "reasoning_text", + "description": "Content part type identifier, always \"reasoning_text\"" + }, + "text": { + "type": "string", + "description": "Reasoning text supplied by the model" } }, "additionalProperties": false, @@ -8231,7 +8292,8 @@ "type", "text" ], - "title": "OpenAIResponseContentPartOutputText" + "title": "OpenAIResponseContentPartReasoningText", + "description": "Reasoning text emitted as part of a streamed response." }, "OpenAIResponseContentPartRefusal": { "type": "object", @@ -8239,10 +8301,12 @@ "type": { "type": "string", "const": "refusal", - "default": "refusal" + "default": "refusal", + "description": "Content part type identifier, always \"refusal\"" }, "refusal": { - "type": "string" + "type": "string", + "description": "Refusal text supplied by the model" } }, "additionalProperties": false, @@ -8250,13 +8314,17 @@ "type", "refusal" ], - "title": "OpenAIResponseContentPartRefusal" + "title": "OpenAIResponseContentPartRefusal", + "description": "Refusal content within a streamed response part." }, "OpenAIResponseObjectStream": { "oneOf": [ { "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseCreated" }, + { + "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseInProgress" + }, { "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded" }, @@ -8314,6 +8382,12 @@ { "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone" }, + { + "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete" + }, + { + "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseFailed" + }, { "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseCompleted" } @@ -8322,6 +8396,7 @@ "propertyName": "type", "mapping": { "response.created": "#/components/schemas/OpenAIResponseObjectStreamResponseCreated", + "response.in_progress": "#/components/schemas/OpenAIResponseObjectStreamResponseInProgress", "response.output_item.added": "#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded", "response.output_item.done": "#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone", "response.output_text.delta": "#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta", @@ -8341,6 +8416,8 @@ "response.mcp_call.completed": "#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted", "response.content_part.added": "#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded", "response.content_part.done": "#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone", + "response.incomplete": "#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete", + "response.failed": "#/components/schemas/OpenAIResponseObjectStreamResponseFailed", "response.completed": "#/components/schemas/OpenAIResponseObjectStreamResponseCompleted" } } @@ -8350,7 +8427,7 @@ "properties": { "response": { "$ref": "#/components/schemas/OpenAIResponseObject", - "description": "The completed response object" + "description": "Completed response object" }, "type": { "type": "string", @@ -8370,6 +8447,10 @@ "OpenAIResponseObjectStreamResponseContentPartAdded": { "type": "object", "properties": { + "content_index": { + "type": "integer", + "description": "Index position of the part within the content array" + }, "response_id": { "type": "string", "description": "Unique identifier of the response containing this content" @@ -8378,6 +8459,10 @@ "type": "string", "description": "Unique identifier of the output item containing this content part" }, + "output_index": { + "type": "integer", + "description": "Index position of the output item in the response" + }, "part": { "oneOf": [ { @@ -8385,13 +8470,17 @@ }, { "$ref": "#/components/schemas/OpenAIResponseContentPartRefusal" + }, + { + "$ref": "#/components/schemas/OpenAIResponseContentPartReasoningText" } ], "discriminator": { "propertyName": "type", "mapping": { "output_text": "#/components/schemas/OpenAIResponseContentPartOutputText", - "refusal": "#/components/schemas/OpenAIResponseContentPartRefusal" + "refusal": "#/components/schemas/OpenAIResponseContentPartRefusal", + "reasoning_text": "#/components/schemas/OpenAIResponseContentPartReasoningText" } }, "description": "The content part that was added" @@ -8409,8 +8498,10 @@ }, "additionalProperties": false, "required": [ + "content_index", "response_id", "item_id", + "output_index", "part", "sequence_number", "type" @@ -8421,6 +8512,10 @@ "OpenAIResponseObjectStreamResponseContentPartDone": { "type": "object", "properties": { + "content_index": { + "type": "integer", + "description": "Index position of the part within the content array" + }, "response_id": { "type": "string", "description": "Unique identifier of the response containing this content" @@ -8429,6 +8524,10 @@ "type": "string", "description": "Unique identifier of the output item containing this content part" }, + "output_index": { + "type": "integer", + "description": "Index position of the output item in the response" + }, "part": { "oneOf": [ { @@ -8436,13 +8535,17 @@ }, { "$ref": "#/components/schemas/OpenAIResponseContentPartRefusal" + }, + { + "$ref": "#/components/schemas/OpenAIResponseContentPartReasoningText" } ], "discriminator": { "propertyName": "type", "mapping": { "output_text": "#/components/schemas/OpenAIResponseContentPartOutputText", - "refusal": "#/components/schemas/OpenAIResponseContentPartRefusal" + "refusal": "#/components/schemas/OpenAIResponseContentPartRefusal", + "reasoning_text": "#/components/schemas/OpenAIResponseContentPartReasoningText" } }, "description": "The completed content part" @@ -8460,8 +8563,10 @@ }, "additionalProperties": false, "required": [ + "content_index", "response_id", "item_id", + "output_index", "part", "sequence_number", "type" @@ -8474,7 +8579,7 @@ "properties": { "response": { "$ref": "#/components/schemas/OpenAIResponseObject", - "description": "The newly created response object" + "description": "The response object that was created" }, "type": { "type": "string", @@ -8491,6 +8596,33 @@ "title": "OpenAIResponseObjectStreamResponseCreated", "description": "Streaming event indicating a new response has been created." }, + "OpenAIResponseObjectStreamResponseFailed": { + "type": "object", + "properties": { + "response": { + "$ref": "#/components/schemas/OpenAIResponseObject", + "description": "Response object describing the failure" + }, + "sequence_number": { + "type": "integer", + "description": "Sequential number for ordering streaming events" + }, + "type": { + "type": "string", + "const": "response.failed", + "default": "response.failed", + "description": "Event type identifier, always \"response.failed\"" + } + }, + "additionalProperties": false, + "required": [ + "response", + "sequence_number", + "type" + ], + "title": "OpenAIResponseObjectStreamResponseFailed", + "description": "Streaming event emitted when a response fails." + }, "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta": { "type": "object", "properties": { @@ -8565,6 +8697,60 @@ "title": "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone", "description": "Streaming event for when function call arguments are completed." }, + "OpenAIResponseObjectStreamResponseInProgress": { + "type": "object", + "properties": { + "response": { + "$ref": "#/components/schemas/OpenAIResponseObject", + "description": "Current response state while in progress" + }, + "sequence_number": { + "type": "integer", + "description": "Sequential number for ordering streaming events" + }, + "type": { + "type": "string", + "const": "response.in_progress", + "default": "response.in_progress", + "description": "Event type identifier, always \"response.in_progress\"" + } + }, + "additionalProperties": false, + "required": [ + "response", + "sequence_number", + "type" + ], + "title": "OpenAIResponseObjectStreamResponseInProgress", + "description": "Streaming event indicating the response remains in progress." + }, + "OpenAIResponseObjectStreamResponseIncomplete": { + "type": "object", + "properties": { + "response": { + "$ref": "#/components/schemas/OpenAIResponseObject", + "description": "Response object describing the incomplete state" + }, + "sequence_number": { + "type": "integer", + "description": "Sequential number for ordering streaming events" + }, + "type": { + "type": "string", + "const": "response.incomplete", + "default": "response.incomplete", + "description": "Event type identifier, always \"response.incomplete\"" + } + }, + "additionalProperties": false, + "required": [ + "response", + "sequence_number", + "type" + ], + "title": "OpenAIResponseObjectStreamResponseIncomplete", + "description": "Streaming event emitted when a response ends in an incomplete state." + }, "OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta": { "type": "object", "properties": { diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml index bc587f939..b6e2871c6 100644 --- a/docs/static/llama-stack-spec.yaml +++ b/docs/static/llama-stack-spec.yaml @@ -6240,13 +6240,57 @@ components: type: string const: output_text default: output_text + description: >- + Content part type identifier, always "output_text" text: type: string + description: Text emitted for this content part + annotations: + type: array + items: + $ref: '#/components/schemas/OpenAIResponseAnnotations' + description: >- + Structured annotations associated with the text + logprobs: + type: array + items: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: (Optional) Token log probability details additionalProperties: false required: - type - text + - annotations title: OpenAIResponseContentPartOutputText + description: >- + Text content within a streamed response part. + OpenAIResponseContentPartReasoningText: + type: object + properties: + type: + type: string + const: reasoning_text + default: reasoning_text + description: >- + Content part type identifier, always "reasoning_text" + text: + type: string + description: Reasoning text supplied by the model + additionalProperties: false + required: + - type + - text + title: OpenAIResponseContentPartReasoningText + description: >- + Reasoning text emitted as part of a streamed response. OpenAIResponseContentPartRefusal: type: object properties: @@ -6254,16 +6298,22 @@ components: type: string const: refusal default: refusal + description: >- + Content part type identifier, always "refusal" refusal: type: string + description: Refusal text supplied by the model additionalProperties: false required: - type - refusal title: OpenAIResponseContentPartRefusal + description: >- + Refusal content within a streamed response part. OpenAIResponseObjectStream: oneOf: - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseInProgress' - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded' - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone' - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta' @@ -6283,11 +6333,14 @@ components: - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted' - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded' - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFailed' - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted' discriminator: propertyName: type mapping: response.created: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated' + response.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseInProgress' response.output_item.added: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded' response.output_item.done: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone' response.output_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta' @@ -6307,13 +6360,15 @@ components: response.mcp_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted' response.content_part.added: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded' response.content_part.done: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone' + response.incomplete: '#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete' + response.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseFailed' response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted' "OpenAIResponseObjectStreamResponseCompleted": type: object properties: response: $ref: '#/components/schemas/OpenAIResponseObject' - description: The completed response object + description: Completed response object type: type: string const: response.completed @@ -6331,6 +6386,10 @@ components: "OpenAIResponseObjectStreamResponseContentPartAdded": type: object properties: + content_index: + type: integer + description: >- + Index position of the part within the content array response_id: type: string description: >- @@ -6339,15 +6398,21 @@ components: type: string description: >- Unique identifier of the output item containing this content part + output_index: + type: integer + description: >- + Index position of the output item in the response part: oneOf: - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText' - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText' discriminator: propertyName: type mapping: output_text: '#/components/schemas/OpenAIResponseContentPartOutputText' refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' + reasoning_text: '#/components/schemas/OpenAIResponseContentPartReasoningText' description: The content part that was added sequence_number: type: integer @@ -6361,8 +6426,10 @@ components: Event type identifier, always "response.content_part.added" additionalProperties: false required: + - content_index - response_id - item_id + - output_index - part - sequence_number - type @@ -6373,6 +6440,10 @@ components: "OpenAIResponseObjectStreamResponseContentPartDone": type: object properties: + content_index: + type: integer + description: >- + Index position of the part within the content array response_id: type: string description: >- @@ -6381,15 +6452,21 @@ components: type: string description: >- Unique identifier of the output item containing this content part + output_index: + type: integer + description: >- + Index position of the output item in the response part: oneOf: - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText' - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText' discriminator: propertyName: type mapping: output_text: '#/components/schemas/OpenAIResponseContentPartOutputText' refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' + reasoning_text: '#/components/schemas/OpenAIResponseContentPartReasoningText' description: The completed content part sequence_number: type: integer @@ -6403,8 +6480,10 @@ components: Event type identifier, always "response.content_part.done" additionalProperties: false required: + - content_index - response_id - item_id + - output_index - part - sequence_number - type @@ -6417,7 +6496,7 @@ components: properties: response: $ref: '#/components/schemas/OpenAIResponseObject' - description: The newly created response object + description: The response object that was created type: type: string const: response.created @@ -6432,6 +6511,30 @@ components: OpenAIResponseObjectStreamResponseCreated description: >- Streaming event indicating a new response has been created. + OpenAIResponseObjectStreamResponseFailed: + type: object + properties: + response: + $ref: '#/components/schemas/OpenAIResponseObject' + description: Response object describing the failure + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.failed + default: response.failed + description: >- + Event type identifier, always "response.failed" + additionalProperties: false + required: + - response + - sequence_number + - type + title: OpenAIResponseObjectStreamResponseFailed + description: >- + Streaming event emitted when a response fails. "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta": type: object properties: @@ -6504,6 +6607,57 @@ components: OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone description: >- Streaming event for when function call arguments are completed. + "OpenAIResponseObjectStreamResponseInProgress": + type: object + properties: + response: + $ref: '#/components/schemas/OpenAIResponseObject' + description: Current response state while in progress + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.in_progress + default: response.in_progress + description: >- + Event type identifier, always "response.in_progress" + additionalProperties: false + required: + - response + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseInProgress + description: >- + Streaming event indicating the response remains in progress. + "OpenAIResponseObjectStreamResponseIncomplete": + type: object + properties: + response: + $ref: '#/components/schemas/OpenAIResponseObject' + description: >- + Response object describing the incomplete state + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.incomplete + default: response.incomplete + description: >- + Event type identifier, always "response.incomplete" + additionalProperties: false + required: + - response + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseIncomplete + description: >- + Streaming event emitted when a response ends in an incomplete state. "OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta": type: object properties: diff --git a/docs/static/stainless-llama-stack-spec.html b/docs/static/stainless-llama-stack-spec.html index 405f64038..404eb464e 100644 --- a/docs/static/stainless-llama-stack-spec.html +++ b/docs/static/stainless-llama-stack-spec.html @@ -10229,10 +10229,71 @@ "type": { "type": "string", "const": "output_text", - "default": "output_text" + "default": "output_text", + "description": "Content part type identifier, always \"output_text\"" }, "text": { - "type": "string" + "type": "string", + "description": "Text emitted for this content part" + }, + "annotations": { + "type": "array", + "items": { + "$ref": "#/components/schemas/OpenAIResponseAnnotations" + }, + "description": "Structured annotations associated with the text" + }, + "logprobs": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + }, + "description": "(Optional) Token log probability details" + } + }, + "additionalProperties": false, + "required": [ + "type", + "text", + "annotations" + ], + "title": "OpenAIResponseContentPartOutputText", + "description": "Text content within a streamed response part." + }, + "OpenAIResponseContentPartReasoningText": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "reasoning_text", + "default": "reasoning_text", + "description": "Content part type identifier, always \"reasoning_text\"" + }, + "text": { + "type": "string", + "description": "Reasoning text supplied by the model" } }, "additionalProperties": false, @@ -10240,7 +10301,8 @@ "type", "text" ], - "title": "OpenAIResponseContentPartOutputText" + "title": "OpenAIResponseContentPartReasoningText", + "description": "Reasoning text emitted as part of a streamed response." }, "OpenAIResponseContentPartRefusal": { "type": "object", @@ -10248,10 +10310,12 @@ "type": { "type": "string", "const": "refusal", - "default": "refusal" + "default": "refusal", + "description": "Content part type identifier, always \"refusal\"" }, "refusal": { - "type": "string" + "type": "string", + "description": "Refusal text supplied by the model" } }, "additionalProperties": false, @@ -10259,13 +10323,17 @@ "type", "refusal" ], - "title": "OpenAIResponseContentPartRefusal" + "title": "OpenAIResponseContentPartRefusal", + "description": "Refusal content within a streamed response part." }, "OpenAIResponseObjectStream": { "oneOf": [ { "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseCreated" }, + { + "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseInProgress" + }, { "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded" }, @@ -10323,6 +10391,12 @@ { "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone" }, + { + "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete" + }, + { + "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseFailed" + }, { "$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseCompleted" } @@ -10331,6 +10405,7 @@ "propertyName": "type", "mapping": { "response.created": "#/components/schemas/OpenAIResponseObjectStreamResponseCreated", + "response.in_progress": "#/components/schemas/OpenAIResponseObjectStreamResponseInProgress", "response.output_item.added": "#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded", "response.output_item.done": "#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone", "response.output_text.delta": "#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta", @@ -10350,6 +10425,8 @@ "response.mcp_call.completed": "#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted", "response.content_part.added": "#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded", "response.content_part.done": "#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone", + "response.incomplete": "#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete", + "response.failed": "#/components/schemas/OpenAIResponseObjectStreamResponseFailed", "response.completed": "#/components/schemas/OpenAIResponseObjectStreamResponseCompleted" } } @@ -10359,7 +10436,7 @@ "properties": { "response": { "$ref": "#/components/schemas/OpenAIResponseObject", - "description": "The completed response object" + "description": "Completed response object" }, "type": { "type": "string", @@ -10379,6 +10456,10 @@ "OpenAIResponseObjectStreamResponseContentPartAdded": { "type": "object", "properties": { + "content_index": { + "type": "integer", + "description": "Index position of the part within the content array" + }, "response_id": { "type": "string", "description": "Unique identifier of the response containing this content" @@ -10387,6 +10468,10 @@ "type": "string", "description": "Unique identifier of the output item containing this content part" }, + "output_index": { + "type": "integer", + "description": "Index position of the output item in the response" + }, "part": { "oneOf": [ { @@ -10394,13 +10479,17 @@ }, { "$ref": "#/components/schemas/OpenAIResponseContentPartRefusal" + }, + { + "$ref": "#/components/schemas/OpenAIResponseContentPartReasoningText" } ], "discriminator": { "propertyName": "type", "mapping": { "output_text": "#/components/schemas/OpenAIResponseContentPartOutputText", - "refusal": "#/components/schemas/OpenAIResponseContentPartRefusal" + "refusal": "#/components/schemas/OpenAIResponseContentPartRefusal", + "reasoning_text": "#/components/schemas/OpenAIResponseContentPartReasoningText" } }, "description": "The content part that was added" @@ -10418,8 +10507,10 @@ }, "additionalProperties": false, "required": [ + "content_index", "response_id", "item_id", + "output_index", "part", "sequence_number", "type" @@ -10430,6 +10521,10 @@ "OpenAIResponseObjectStreamResponseContentPartDone": { "type": "object", "properties": { + "content_index": { + "type": "integer", + "description": "Index position of the part within the content array" + }, "response_id": { "type": "string", "description": "Unique identifier of the response containing this content" @@ -10438,6 +10533,10 @@ "type": "string", "description": "Unique identifier of the output item containing this content part" }, + "output_index": { + "type": "integer", + "description": "Index position of the output item in the response" + }, "part": { "oneOf": [ { @@ -10445,13 +10544,17 @@ }, { "$ref": "#/components/schemas/OpenAIResponseContentPartRefusal" + }, + { + "$ref": "#/components/schemas/OpenAIResponseContentPartReasoningText" } ], "discriminator": { "propertyName": "type", "mapping": { "output_text": "#/components/schemas/OpenAIResponseContentPartOutputText", - "refusal": "#/components/schemas/OpenAIResponseContentPartRefusal" + "refusal": "#/components/schemas/OpenAIResponseContentPartRefusal", + "reasoning_text": "#/components/schemas/OpenAIResponseContentPartReasoningText" } }, "description": "The completed content part" @@ -10469,8 +10572,10 @@ }, "additionalProperties": false, "required": [ + "content_index", "response_id", "item_id", + "output_index", "part", "sequence_number", "type" @@ -10483,7 +10588,7 @@ "properties": { "response": { "$ref": "#/components/schemas/OpenAIResponseObject", - "description": "The newly created response object" + "description": "The response object that was created" }, "type": { "type": "string", @@ -10500,6 +10605,33 @@ "title": "OpenAIResponseObjectStreamResponseCreated", "description": "Streaming event indicating a new response has been created." }, + "OpenAIResponseObjectStreamResponseFailed": { + "type": "object", + "properties": { + "response": { + "$ref": "#/components/schemas/OpenAIResponseObject", + "description": "Response object describing the failure" + }, + "sequence_number": { + "type": "integer", + "description": "Sequential number for ordering streaming events" + }, + "type": { + "type": "string", + "const": "response.failed", + "default": "response.failed", + "description": "Event type identifier, always \"response.failed\"" + } + }, + "additionalProperties": false, + "required": [ + "response", + "sequence_number", + "type" + ], + "title": "OpenAIResponseObjectStreamResponseFailed", + "description": "Streaming event emitted when a response fails." + }, "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta": { "type": "object", "properties": { @@ -10574,6 +10706,60 @@ "title": "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone", "description": "Streaming event for when function call arguments are completed." }, + "OpenAIResponseObjectStreamResponseInProgress": { + "type": "object", + "properties": { + "response": { + "$ref": "#/components/schemas/OpenAIResponseObject", + "description": "Current response state while in progress" + }, + "sequence_number": { + "type": "integer", + "description": "Sequential number for ordering streaming events" + }, + "type": { + "type": "string", + "const": "response.in_progress", + "default": "response.in_progress", + "description": "Event type identifier, always \"response.in_progress\"" + } + }, + "additionalProperties": false, + "required": [ + "response", + "sequence_number", + "type" + ], + "title": "OpenAIResponseObjectStreamResponseInProgress", + "description": "Streaming event indicating the response remains in progress." + }, + "OpenAIResponseObjectStreamResponseIncomplete": { + "type": "object", + "properties": { + "response": { + "$ref": "#/components/schemas/OpenAIResponseObject", + "description": "Response object describing the incomplete state" + }, + "sequence_number": { + "type": "integer", + "description": "Sequential number for ordering streaming events" + }, + "type": { + "type": "string", + "const": "response.incomplete", + "default": "response.incomplete", + "description": "Event type identifier, always \"response.incomplete\"" + } + }, + "additionalProperties": false, + "required": [ + "response", + "sequence_number", + "type" + ], + "title": "OpenAIResponseObjectStreamResponseIncomplete", + "description": "Streaming event emitted when a response ends in an incomplete state." + }, "OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta": { "type": "object", "properties": { diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml index 182e7363d..460c028fd 100644 --- a/docs/static/stainless-llama-stack-spec.yaml +++ b/docs/static/stainless-llama-stack-spec.yaml @@ -7685,13 +7685,57 @@ components: type: string const: output_text default: output_text + description: >- + Content part type identifier, always "output_text" text: type: string + description: Text emitted for this content part + annotations: + type: array + items: + $ref: '#/components/schemas/OpenAIResponseAnnotations' + description: >- + Structured annotations associated with the text + logprobs: + type: array + items: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: (Optional) Token log probability details additionalProperties: false required: - type - text + - annotations title: OpenAIResponseContentPartOutputText + description: >- + Text content within a streamed response part. + OpenAIResponseContentPartReasoningText: + type: object + properties: + type: + type: string + const: reasoning_text + default: reasoning_text + description: >- + Content part type identifier, always "reasoning_text" + text: + type: string + description: Reasoning text supplied by the model + additionalProperties: false + required: + - type + - text + title: OpenAIResponseContentPartReasoningText + description: >- + Reasoning text emitted as part of a streamed response. OpenAIResponseContentPartRefusal: type: object properties: @@ -7699,16 +7743,22 @@ components: type: string const: refusal default: refusal + description: >- + Content part type identifier, always "refusal" refusal: type: string + description: Refusal text supplied by the model additionalProperties: false required: - type - refusal title: OpenAIResponseContentPartRefusal + description: >- + Refusal content within a streamed response part. OpenAIResponseObjectStream: oneOf: - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseInProgress' - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded' - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone' - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta' @@ -7728,11 +7778,14 @@ components: - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted' - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded' - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete' + - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseFailed' - $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted' discriminator: propertyName: type mapping: response.created: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated' + response.in_progress: '#/components/schemas/OpenAIResponseObjectStreamResponseInProgress' response.output_item.added: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemAdded' response.output_item.done: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputItemDone' response.output_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta' @@ -7752,13 +7805,15 @@ components: response.mcp_call.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseMcpCallCompleted' response.content_part.added: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartAdded' response.content_part.done: '#/components/schemas/OpenAIResponseObjectStreamResponseContentPartDone' + response.incomplete: '#/components/schemas/OpenAIResponseObjectStreamResponseIncomplete' + response.failed: '#/components/schemas/OpenAIResponseObjectStreamResponseFailed' response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted' "OpenAIResponseObjectStreamResponseCompleted": type: object properties: response: $ref: '#/components/schemas/OpenAIResponseObject' - description: The completed response object + description: Completed response object type: type: string const: response.completed @@ -7776,6 +7831,10 @@ components: "OpenAIResponseObjectStreamResponseContentPartAdded": type: object properties: + content_index: + type: integer + description: >- + Index position of the part within the content array response_id: type: string description: >- @@ -7784,15 +7843,21 @@ components: type: string description: >- Unique identifier of the output item containing this content part + output_index: + type: integer + description: >- + Index position of the output item in the response part: oneOf: - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText' - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText' discriminator: propertyName: type mapping: output_text: '#/components/schemas/OpenAIResponseContentPartOutputText' refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' + reasoning_text: '#/components/schemas/OpenAIResponseContentPartReasoningText' description: The content part that was added sequence_number: type: integer @@ -7806,8 +7871,10 @@ components: Event type identifier, always "response.content_part.added" additionalProperties: false required: + - content_index - response_id - item_id + - output_index - part - sequence_number - type @@ -7818,6 +7885,10 @@ components: "OpenAIResponseObjectStreamResponseContentPartDone": type: object properties: + content_index: + type: integer + description: >- + Index position of the part within the content array response_id: type: string description: >- @@ -7826,15 +7897,21 @@ components: type: string description: >- Unique identifier of the output item containing this content part + output_index: + type: integer + description: >- + Index position of the output item in the response part: oneOf: - $ref: '#/components/schemas/OpenAIResponseContentPartOutputText' - $ref: '#/components/schemas/OpenAIResponseContentPartRefusal' + - $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText' discriminator: propertyName: type mapping: output_text: '#/components/schemas/OpenAIResponseContentPartOutputText' refusal: '#/components/schemas/OpenAIResponseContentPartRefusal' + reasoning_text: '#/components/schemas/OpenAIResponseContentPartReasoningText' description: The completed content part sequence_number: type: integer @@ -7848,8 +7925,10 @@ components: Event type identifier, always "response.content_part.done" additionalProperties: false required: + - content_index - response_id - item_id + - output_index - part - sequence_number - type @@ -7862,7 +7941,7 @@ components: properties: response: $ref: '#/components/schemas/OpenAIResponseObject' - description: The newly created response object + description: The response object that was created type: type: string const: response.created @@ -7877,6 +7956,30 @@ components: OpenAIResponseObjectStreamResponseCreated description: >- Streaming event indicating a new response has been created. + OpenAIResponseObjectStreamResponseFailed: + type: object + properties: + response: + $ref: '#/components/schemas/OpenAIResponseObject' + description: Response object describing the failure + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.failed + default: response.failed + description: >- + Event type identifier, always "response.failed" + additionalProperties: false + required: + - response + - sequence_number + - type + title: OpenAIResponseObjectStreamResponseFailed + description: >- + Streaming event emitted when a response fails. "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta": type: object properties: @@ -7949,6 +8052,57 @@ components: OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone description: >- Streaming event for when function call arguments are completed. + "OpenAIResponseObjectStreamResponseInProgress": + type: object + properties: + response: + $ref: '#/components/schemas/OpenAIResponseObject' + description: Current response state while in progress + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.in_progress + default: response.in_progress + description: >- + Event type identifier, always "response.in_progress" + additionalProperties: false + required: + - response + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseInProgress + description: >- + Streaming event indicating the response remains in progress. + "OpenAIResponseObjectStreamResponseIncomplete": + type: object + properties: + response: + $ref: '#/components/schemas/OpenAIResponseObject' + description: >- + Response object describing the incomplete state + sequence_number: + type: integer + description: >- + Sequential number for ordering streaming events + type: + type: string + const: response.incomplete + default: response.incomplete + description: >- + Event type identifier, always "response.incomplete" + additionalProperties: false + required: + - response + - sequence_number + - type + title: >- + OpenAIResponseObjectStreamResponseIncomplete + description: >- + Streaming event emitted when a response ends in an incomplete state. "OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta": type: object properties: diff --git a/llama_stack/apis/agents/openai_responses.py b/llama_stack/apis/agents/openai_responses.py index 3fd08362c..17f95a6aa 100644 --- a/llama_stack/apis/agents/openai_responses.py +++ b/llama_stack/apis/agents/openai_responses.py @@ -438,7 +438,7 @@ class OpenAIDeleteResponseObject(BaseModel): class OpenAIResponseObjectStreamResponseCreated(BaseModel): """Streaming event indicating a new response has been created. - :param response: The newly created response object + :param response: The response object that was created :param type: Event type identifier, always "response.created" """ @@ -446,11 +446,25 @@ class OpenAIResponseObjectStreamResponseCreated(BaseModel): type: Literal["response.created"] = "response.created" +@json_schema_type +class OpenAIResponseObjectStreamResponseInProgress(BaseModel): + """Streaming event indicating the response remains in progress. + + :param response: Current response state while in progress + :param sequence_number: Sequential number for ordering streaming events + :param type: Event type identifier, always "response.in_progress" + """ + + response: OpenAIResponseObject + sequence_number: int + type: Literal["response.in_progress"] = "response.in_progress" + + @json_schema_type class OpenAIResponseObjectStreamResponseCompleted(BaseModel): """Streaming event indicating a response has been completed. - :param response: The completed response object + :param response: Completed response object :param type: Event type identifier, always "response.completed" """ @@ -458,6 +472,34 @@ class OpenAIResponseObjectStreamResponseCompleted(BaseModel): type: Literal["response.completed"] = "response.completed" +@json_schema_type +class OpenAIResponseObjectStreamResponseIncomplete(BaseModel): + """Streaming event emitted when a response ends in an incomplete state. + + :param response: Response object describing the incomplete state + :param sequence_number: Sequential number for ordering streaming events + :param type: Event type identifier, always "response.incomplete" + """ + + response: OpenAIResponseObject + sequence_number: int + type: Literal["response.incomplete"] = "response.incomplete" + + +@json_schema_type +class OpenAIResponseObjectStreamResponseFailed(BaseModel): + """Streaming event emitted when a response fails. + + :param response: Response object describing the failure + :param sequence_number: Sequential number for ordering streaming events + :param type: Event type identifier, always "response.failed" + """ + + response: OpenAIResponseObject + sequence_number: int + type: Literal["response.failed"] = "response.failed" + + @json_schema_type class OpenAIResponseObjectStreamResponseOutputItemAdded(BaseModel): """Streaming event for when a new output item is added to the response. @@ -688,19 +730,46 @@ class OpenAIResponseObjectStreamResponseMcpCallCompleted(BaseModel): @json_schema_type class OpenAIResponseContentPartOutputText(BaseModel): + """Text content within a streamed response part. + + :param type: Content part type identifier, always "output_text" + :param text: Text emitted for this content part + :param annotations: Structured annotations associated with the text + :param logprobs: (Optional) Token log probability details + """ + type: Literal["output_text"] = "output_text" text: str - # TODO: add annotations, logprobs, etc. + annotations: list[OpenAIResponseAnnotations] = Field(default_factory=list) + logprobs: list[dict[str, Any]] | None = None @json_schema_type class OpenAIResponseContentPartRefusal(BaseModel): + """Refusal content within a streamed response part. + + :param type: Content part type identifier, always "refusal" + :param refusal: Refusal text supplied by the model + """ + type: Literal["refusal"] = "refusal" refusal: str +@json_schema_type +class OpenAIResponseContentPartReasoningText(BaseModel): + """Reasoning text emitted as part of a streamed response. + + :param type: Content part type identifier, always "reasoning_text" + :param text: Reasoning text supplied by the model + """ + + type: Literal["reasoning_text"] = "reasoning_text" + text: str + + OpenAIResponseContentPart = Annotated[ - OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal, + OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText, Field(discriminator="type"), ] register_schema(OpenAIResponseContentPart, name="OpenAIResponseContentPart") @@ -710,15 +779,19 @@ register_schema(OpenAIResponseContentPart, name="OpenAIResponseContentPart") class OpenAIResponseObjectStreamResponseContentPartAdded(BaseModel): """Streaming event for when a new content part is added to a response item. + :param content_index: Index position of the part within the content array :param response_id: Unique identifier of the response containing this content :param item_id: Unique identifier of the output item containing this content part + :param output_index: Index position of the output item in the response :param part: The content part that was added :param sequence_number: Sequential number for ordering streaming events :param type: Event type identifier, always "response.content_part.added" """ + content_index: int response_id: str item_id: str + output_index: int part: OpenAIResponseContentPart sequence_number: int type: Literal["response.content_part.added"] = "response.content_part.added" @@ -728,15 +801,19 @@ class OpenAIResponseObjectStreamResponseContentPartAdded(BaseModel): class OpenAIResponseObjectStreamResponseContentPartDone(BaseModel): """Streaming event for when a content part is completed. + :param content_index: Index position of the part within the content array :param response_id: Unique identifier of the response containing this content :param item_id: Unique identifier of the output item containing this content part + :param output_index: Index position of the output item in the response :param part: The completed content part :param sequence_number: Sequential number for ordering streaming events :param type: Event type identifier, always "response.content_part.done" """ + content_index: int response_id: str item_id: str + output_index: int part: OpenAIResponseContentPart sequence_number: int type: Literal["response.content_part.done"] = "response.content_part.done" @@ -744,6 +821,7 @@ class OpenAIResponseObjectStreamResponseContentPartDone(BaseModel): OpenAIResponseObjectStream = Annotated[ OpenAIResponseObjectStreamResponseCreated + | OpenAIResponseObjectStreamResponseInProgress | OpenAIResponseObjectStreamResponseOutputItemAdded | OpenAIResponseObjectStreamResponseOutputItemDone | OpenAIResponseObjectStreamResponseOutputTextDelta @@ -763,6 +841,8 @@ OpenAIResponseObjectStream = Annotated[ | OpenAIResponseObjectStreamResponseMcpCallCompleted | OpenAIResponseObjectStreamResponseContentPartAdded | OpenAIResponseObjectStreamResponseContentPartDone + | OpenAIResponseObjectStreamResponseIncomplete + | OpenAIResponseObjectStreamResponseFailed | OpenAIResponseObjectStreamResponseCompleted, Field(discriminator="type"), ] diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py index da8b01f40..a55aafecf 100644 --- a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +++ b/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py @@ -232,17 +232,33 @@ class OpenAIResponsesImpl: if stream: return stream_gen else: - response = None - async for stream_chunk in stream_gen: - if stream_chunk.type == "response.completed": - if response is not None: - raise ValueError("The response stream completed multiple times! Earlier response: {response}") - response = stream_chunk.response - # don't leave the generator half complete! + final_response = None + final_event_type = None + failed_response = None - if response is None: - raise ValueError("The response stream never completed") - return response + async for stream_chunk in stream_gen: + if stream_chunk.type in {"response.completed", "response.incomplete"}: + if final_response is not None: + raise ValueError( + "The response stream produced multiple terminal responses! " + f"Earlier response from {final_event_type}" + ) + final_response = stream_chunk.response + final_event_type = stream_chunk.type + elif stream_chunk.type == "response.failed": + failed_response = stream_chunk.response + + if failed_response is not None: + error_message = ( + failed_response.error.message + if failed_response and failed_response.error + else "Response stream failed without error details" + ) + raise RuntimeError(f"OpenAI response failed: {error_message}") + + if final_response is None: + raise ValueError("The response stream never reached a terminal state") + return final_response async def _create_streaming_response( self, @@ -288,13 +304,16 @@ class OpenAIResponsesImpl: # Stream the response final_response = None + failed_response = None async for stream_chunk in orchestrator.create_response(): - if stream_chunk.type == "response.completed": + if stream_chunk.type in {"response.completed", "response.incomplete"}: final_response = stream_chunk.response + elif stream_chunk.type == "response.failed": + failed_response = stream_chunk.response yield stream_chunk # Store the response if requested - if store and final_response: + if store and final_response and failed_response is None: await self._store_response( response=final_response, input=all_input, diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py index 9487edc61..5580a0531 100644 --- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py @@ -13,6 +13,7 @@ from llama_stack.apis.agents.openai_responses import ( ApprovalFilter, MCPListToolsTool, OpenAIResponseContentPartOutputText, + OpenAIResponseError, OpenAIResponseInputTool, OpenAIResponseInputToolMCP, OpenAIResponseMCPApprovalRequest, @@ -22,8 +23,11 @@ from llama_stack.apis.agents.openai_responses import ( OpenAIResponseObjectStreamResponseContentPartAdded, OpenAIResponseObjectStreamResponseContentPartDone, OpenAIResponseObjectStreamResponseCreated, + OpenAIResponseObjectStreamResponseFailed, OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta, OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone, + OpenAIResponseObjectStreamResponseIncomplete, + OpenAIResponseObjectStreamResponseInProgress, OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta, OpenAIResponseObjectStreamResponseMcpCallArgumentsDone, OpenAIResponseObjectStreamResponseMcpListToolsCompleted, @@ -101,21 +105,46 @@ class StreamingResponseOrchestrator: # mapping for annotations self.citation_files: dict[str, str] = {} - async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]: - # Initialize output messages - output_messages: list[OpenAIResponseOutput] = [] - # Create initial response and emit response.created immediately - initial_response = OpenAIResponseObject( + def _clone_outputs(self, outputs: list[OpenAIResponseOutput]) -> list[OpenAIResponseOutput]: + cloned: list[OpenAIResponseOutput] = [] + for item in outputs: + if hasattr(item, "model_copy"): + cloned.append(item.model_copy(deep=True)) + else: + cloned.append(item) + return cloned + + def _snapshot_response( + self, + status: str, + outputs: list[OpenAIResponseOutput], + *, + error: OpenAIResponseError | None = None, + ) -> OpenAIResponseObject: + return OpenAIResponseObject( created_at=self.created_at, id=self.response_id, model=self.ctx.model, object="response", - status="in_progress", - output=output_messages.copy(), + status=status, + output=self._clone_outputs(outputs), text=self.text, + error=error, ) - yield OpenAIResponseObjectStreamResponseCreated(response=initial_response) + async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]: + output_messages: list[OpenAIResponseOutput] = [] + + # Emit response.created followed by response.in_progress to align with OpenAI streaming + yield OpenAIResponseObjectStreamResponseCreated( + response=self._snapshot_response("in_progress", output_messages) + ) + + self.sequence_number += 1 + yield OpenAIResponseObjectStreamResponseInProgress( + response=self._snapshot_response("in_progress", output_messages), + sequence_number=self.sequence_number, + ) # Process all tools (including MCP tools) and emit streaming events if self.ctx.response_tools: @@ -124,87 +153,114 @@ class StreamingResponseOrchestrator: n_iter = 0 messages = self.ctx.messages.copy() + final_status = "completed" + last_completion_result: ChatCompletionResult | None = None - while True: - # Text is the default response format for chat completion so don't need to pass it - # (some providers don't support non-empty response_format when tools are present) - response_format = None if self.ctx.response_format.type == "text" else self.ctx.response_format - logger.debug(f"calling openai_chat_completion with tools: {self.ctx.chat_tools}") - completion_result = await self.inference_api.openai_chat_completion( - model=self.ctx.model, - messages=messages, - tools=self.ctx.chat_tools, - stream=True, - temperature=self.ctx.temperature, - response_format=response_format, - ) + try: + while True: + # Text is the default response format for chat completion so don't need to pass it + # (some providers don't support non-empty response_format when tools are present) + response_format = None if self.ctx.response_format.type == "text" else self.ctx.response_format + logger.debug(f"calling openai_chat_completion with tools: {self.ctx.chat_tools}") + completion_result = await self.inference_api.openai_chat_completion( + model=self.ctx.model, + messages=messages, + tools=self.ctx.chat_tools, + stream=True, + temperature=self.ctx.temperature, + response_format=response_format, + ) - # Process streaming chunks and build complete response - completion_result_data = None - async for stream_event_or_result in self._process_streaming_chunks(completion_result, output_messages): - if isinstance(stream_event_or_result, ChatCompletionResult): - completion_result_data = stream_event_or_result - else: - yield stream_event_or_result - if not completion_result_data: - raise ValueError("Streaming chunk processor failed to return completion data") - current_response = self._build_chat_completion(completion_result_data) + # Process streaming chunks and build complete response + completion_result_data = None + async for stream_event_or_result in self._process_streaming_chunks(completion_result, output_messages): + if isinstance(stream_event_or_result, ChatCompletionResult): + completion_result_data = stream_event_or_result + else: + yield stream_event_or_result + if not completion_result_data: + raise ValueError("Streaming chunk processor failed to return completion data") + last_completion_result = completion_result_data + current_response = self._build_chat_completion(completion_result_data) - function_tool_calls, non_function_tool_calls, approvals, next_turn_messages = self._separate_tool_calls( - current_response, messages - ) + ( + function_tool_calls, + non_function_tool_calls, + approvals, + next_turn_messages, + ) = self._separate_tool_calls(current_response, messages) - # add any approval requests required - for tool_call in approvals: - async for evt in self._add_mcp_approval_request( - tool_call.function.name, tool_call.function.arguments, output_messages + # add any approval requests required + for tool_call in approvals: + async for evt in self._add_mcp_approval_request( + tool_call.function.name, tool_call.function.arguments, output_messages + ): + yield evt + + # Handle choices with no tool calls + for choice in current_response.choices: + if not (choice.message.tool_calls and self.ctx.response_tools): + output_messages.append( + await convert_chat_choice_to_response_message( + choice, + self.citation_files, + message_id=completion_result_data.message_item_id, + ) + ) + + # Execute tool calls and coordinate results + async for stream_event in self._coordinate_tool_execution( + function_tool_calls, + non_function_tool_calls, + completion_result_data, + output_messages, + next_turn_messages, ): - yield evt + yield stream_event - # Handle choices with no tool calls - for choice in current_response.choices: - if not (choice.message.tool_calls and self.ctx.response_tools): - output_messages.append(await convert_chat_choice_to_response_message(choice, self.citation_files)) + messages = next_turn_messages - # Execute tool calls and coordinate results - async for stream_event in self._coordinate_tool_execution( - function_tool_calls, - non_function_tool_calls, - completion_result_data, - output_messages, - next_turn_messages, - ): - yield stream_event + if not function_tool_calls and not non_function_tool_calls: + break - messages = next_turn_messages + if function_tool_calls: + logger.info("Exiting inference loop since there is a function (client-side) tool call") + break - if not function_tool_calls and not non_function_tool_calls: - break + n_iter += 1 + if n_iter >= self.max_infer_iters: + logger.info( + f"Exiting inference loop since iteration count({n_iter}) exceeds {self.max_infer_iters=}" + ) + final_status = "incomplete" + break - if function_tool_calls: - logger.info("Exiting inference loop since there is a function (client-side) tool call") - break + if last_completion_result and last_completion_result.finish_reason == "length": + final_status = "incomplete" - n_iter += 1 - if n_iter >= self.max_infer_iters: - logger.info(f"Exiting inference loop since iteration count({n_iter}) exceeds {self.max_infer_iters=}") - break + except Exception as exc: # noqa: BLE001 + self.final_messages = messages.copy() + self.sequence_number += 1 + error = OpenAIResponseError(code="internal_error", message=str(exc)) + failure_response = self._snapshot_response("failed", output_messages, error=error) + yield OpenAIResponseObjectStreamResponseFailed( + response=failure_response, + sequence_number=self.sequence_number, + ) + return self.final_messages = messages.copy() - # Create final response - final_response = OpenAIResponseObject( - created_at=self.created_at, - id=self.response_id, - model=self.ctx.model, - object="response", - status="completed", - text=self.text, - output=output_messages, - ) - - # Emit response.completed - yield OpenAIResponseObjectStreamResponseCompleted(response=final_response) + if final_status == "incomplete": + self.sequence_number += 1 + final_response = self._snapshot_response("incomplete", output_messages) + yield OpenAIResponseObjectStreamResponseIncomplete( + response=final_response, + sequence_number=self.sequence_number, + ) + else: + final_response = self._snapshot_response("completed", output_messages) + yield OpenAIResponseObjectStreamResponseCompleted(response=final_response) def _separate_tool_calls(self, current_response, messages) -> tuple[list, list, list, list]: """Separate tool calls into function and non-function categories.""" @@ -261,6 +317,8 @@ class StreamingResponseOrchestrator: tool_call_item_ids: dict[int, str] = {} # Track content parts for streaming events content_part_emitted = False + content_index = 0 + message_output_index = len(output_messages) async for chunk in completion_result: chat_response_id = chunk.id @@ -274,8 +332,10 @@ class StreamingResponseOrchestrator: content_part_emitted = True self.sequence_number += 1 yield OpenAIResponseObjectStreamResponseContentPartAdded( + content_index=content_index, response_id=self.response_id, item_id=message_item_id, + output_index=message_output_index, part=OpenAIResponseContentPartOutputText( text="", # Will be filled incrementally via text deltas ), @@ -283,10 +343,10 @@ class StreamingResponseOrchestrator: ) self.sequence_number += 1 yield OpenAIResponseObjectStreamResponseOutputTextDelta( - content_index=0, + content_index=content_index, delta=chunk_choice.delta.content, item_id=message_item_id, - output_index=0, + output_index=message_output_index, sequence_number=self.sequence_number, ) @@ -386,8 +446,10 @@ class StreamingResponseOrchestrator: final_text = "".join(chat_response_content) self.sequence_number += 1 yield OpenAIResponseObjectStreamResponseContentPartDone( + content_index=content_index, response_id=self.response_id, item_id=message_item_id, + output_index=message_output_index, part=OpenAIResponseContentPartOutputText( text=final_text, ), diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/utils.py b/llama_stack/providers/inline/agents/meta_reference/responses/utils.py index a3316a635..e67e9bdca 100644 --- a/llama_stack/providers/inline/agents/meta_reference/responses/utils.py +++ b/llama_stack/providers/inline/agents/meta_reference/responses/utils.py @@ -48,7 +48,10 @@ from llama_stack.apis.inference import ( async def convert_chat_choice_to_response_message( - choice: OpenAIChoice, citation_files: dict[str, str] | None = None + choice: OpenAIChoice, + citation_files: dict[str, str] | None = None, + *, + message_id: str | None = None, ) -> OpenAIResponseMessage: """Convert an OpenAI Chat Completion choice into an OpenAI Response output message.""" output_content = "" @@ -64,7 +67,7 @@ async def convert_chat_choice_to_response_message( annotations, clean_text = _extract_citations_from_text(output_content, citation_files or {}) return OpenAIResponseMessage( - id=f"msg_{uuid.uuid4()}", + id=message_id or f"msg_{uuid.uuid4()}", content=[OpenAIResponseOutputMessageContentOutputText(text=clean_text, annotations=annotations)], status="completed", role="assistant", diff --git a/tests/integration/responses/streaming_assertions.py b/tests/integration/responses/streaming_assertions.py index 4279ffbab..78dab2f5d 100644 --- a/tests/integration/responses/streaming_assertions.py +++ b/tests/integration/responses/streaming_assertions.py @@ -16,18 +16,19 @@ class StreamingValidator: def assert_basic_event_sequence(self): """Verify basic created -> completed event sequence.""" - assert len(self.chunks) >= 2, f"Expected at least 2 chunks (created + completed), got {len(self.chunks)}" + assert len(self.chunks) >= 2, f"Expected at least 2 chunks (created + terminal), got {len(self.chunks)}" assert self.chunks[0].type == "response.created", ( f"First chunk should be response.created, got {self.chunks[0].type}" ) - assert self.chunks[-1].type == "response.completed", ( - f"Last chunk should be response.completed, got {self.chunks[-1].type}" + assert any(t in self.event_types for t in ["response.completed", "response.incomplete", "response.failed"]), ( + "Expected a terminal response event (completed, incomplete, or failed)" ) - # Verify event order + terminal_types = ["response.completed", "response.incomplete", "response.failed"] + terminal_indices = [self.event_types.index(t) for t in terminal_types if t in self.event_types] + assert terminal_indices, "Expected at least one terminal event index" created_index = self.event_types.index("response.created") - completed_index = self.event_types.index("response.completed") - assert created_index < completed_index, "response.created should come before response.completed" + assert created_index < min(terminal_indices), "response.created should precede terminal events" def assert_response_consistency(self): """Verify response ID consistency across events.""" @@ -137,8 +138,23 @@ class StreamingValidator: for chunk in self.chunks: if chunk.type == "response.created": assert chunk.response.status == "in_progress" + elif chunk.type == "response.in_progress": + assert chunk.response.status == "in_progress" + assert isinstance(chunk.sequence_number, int) + elif chunk.type == "response.incomplete": + assert chunk.response.status == "incomplete" + assert isinstance(chunk.sequence_number, int) + elif chunk.type == "response.failed": + assert chunk.response.status == "failed" + assert isinstance(chunk.sequence_number, int) + assert chunk.response.error is not None elif chunk.type == "response.completed": assert chunk.response.status == "completed" + elif chunk.type in {"response.content_part.added", "response.content_part.done"}: + assert chunk.item_id, "Content part events should have non-empty item_id" + assert isinstance(chunk.content_index, int) + assert isinstance(chunk.output_index, int) + assert chunk.response_id, "Content part events should include response_id" elif hasattr(chunk, "item_id"): assert chunk.item_id, "Events with item_id should have non-empty item_id" elif hasattr(chunk, "sequence_number"): diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py index f2b29c1f7..033a33310 100644 --- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py +++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py @@ -156,9 +156,10 @@ async def test_create_openai_response_with_string_input(openai_responses_impl, m ) # Should have content part events for text streaming - # Expected: response.created, content_part.added, output_text.delta, content_part.done, response.completed - assert len(chunks) >= 4 + # Expected: response.created, response.in_progress, content_part.added, output_text.delta, content_part.done, response.completed + assert len(chunks) >= 5 assert chunks[0].type == "response.created" + assert any(chunk.type == "response.in_progress" for chunk in chunks) # Check for content part events content_part_added_events = [c for c in chunks if c.type == "response.content_part.added"] @@ -169,6 +170,14 @@ async def test_create_openai_response_with_string_input(openai_responses_impl, m assert len(content_part_done_events) >= 1, "Should have content_part.done event for text" assert len(text_delta_events) >= 1, "Should have text delta events" + added_event = content_part_added_events[0] + done_event = content_part_done_events[0] + assert added_event.content_index == 0 + assert done_event.content_index == 0 + assert added_event.output_index == done_event.output_index == 0 + assert added_event.item_id == done_event.item_id + assert added_event.response_id == done_event.response_id + # Verify final event is completion assert chunks[-1].type == "response.completed" @@ -177,6 +186,8 @@ async def test_create_openai_response_with_string_input(openai_responses_impl, m assert final_response.model == model assert len(final_response.output) == 1 assert isinstance(final_response.output[0], OpenAIResponseMessage) + assert final_response.output[0].id == added_event.item_id + assert final_response.id == added_event.response_id openai_responses_impl.responses_store.store_response_object.assert_called_once() assert final_response.output[0].content[0].text == "Dublin" @@ -303,9 +314,20 @@ async def test_create_openai_response_with_tool_call_type_none(openai_responses_ chunks = [chunk async for chunk in result] # Verify event types - # Should have: response.created, output_item.added, function_call_arguments.delta, - # function_call_arguments.done, output_item.done, response.completed - assert len(chunks) == 6 + # Should have: response.created, response.in_progress, output_item.added, + # function_call_arguments.delta, function_call_arguments.done, output_item.done, response.completed + assert len(chunks) == 7 + + event_types = [chunk.type for chunk in chunks] + assert event_types == [ + "response.created", + "response.in_progress", + "response.output_item.added", + "response.function_call_arguments.delta", + "response.function_call_arguments.done", + "response.output_item.done", + "response.completed", + ] # Verify inference API was called correctly (after iterating over result) first_call = mock_inference_api.openai_chat_completion.call_args_list[0] @@ -314,25 +336,19 @@ async def test_create_openai_response_with_tool_call_type_none(openai_responses_ assert first_call.kwargs["temperature"] == 0.1 # Check response.created event (should have empty output) - assert chunks[0].type == "response.created" assert len(chunks[0].response.output) == 0 - # Check streaming events - assert chunks[1].type == "response.output_item.added" - assert chunks[2].type == "response.function_call_arguments.delta" - assert chunks[3].type == "response.function_call_arguments.done" - assert chunks[4].type == "response.output_item.done" - # Check response.completed event (should have the tool call) - assert chunks[5].type == "response.completed" - assert len(chunks[5].response.output) == 1 - assert chunks[5].response.output[0].type == "function_call" - assert chunks[5].response.output[0].name == "get_weather" + completed_chunk = chunks[-1] + assert completed_chunk.type == "response.completed" + assert len(completed_chunk.response.output) == 1 + assert completed_chunk.response.output[0].type == "function_call" + assert completed_chunk.response.output[0].name == "get_weather" async def test_create_openai_response_with_tool_call_function_arguments_none(openai_responses_impl, mock_inference_api): - """Test creating an OpenAI response with a tool call response that has a function that does not accept arguments, or arguments set to None when they are not mandatory.""" - # Setup + """Test creating an OpenAI response with tool calls that omit arguments.""" + input_text = "What is the time right now?" model = "meta-llama/Llama-3.1-8B-Instruct" @@ -359,9 +375,21 @@ async def test_create_openai_response_with_tool_call_function_arguments_none(ope object="chat.completion.chunk", ) - mock_inference_api.openai_chat_completion.return_value = fake_stream_toolcall() + def assert_common_expectations(chunks) -> None: + first_call = mock_inference_api.openai_chat_completion.call_args_list[0] + assert first_call.kwargs["messages"][0].content == input_text + assert first_call.kwargs["tools"] is not None + assert first_call.kwargs["temperature"] == 0.1 + assert len(chunks[0].response.output) == 0 + completed_chunk = chunks[-1] + assert completed_chunk.type == "response.completed" + assert len(completed_chunk.response.output) == 1 + assert completed_chunk.response.output[0].type == "function_call" + assert completed_chunk.response.output[0].name == "get_current_time" + assert completed_chunk.response.output[0].arguments == "{}" # Function does not accept arguments + mock_inference_api.openai_chat_completion.return_value = fake_stream_toolcall() result = await openai_responses_impl.create_openai_response( input=input_text, model=model, @@ -369,46 +397,23 @@ async def test_create_openai_response_with_tool_call_function_arguments_none(ope temperature=0.1, tools=[ OpenAIResponseInputToolFunction( - name="get_current_time", - description="Get current time for system's timezone", - parameters={}, + name="get_current_time", description="Get current time for system's timezone", parameters={} ) ], ) - - # Check that we got the content from our mocked tool execution result chunks = [chunk async for chunk in result] - - # Verify event types - # Should have: response.created, output_item.added, function_call_arguments.delta, - # function_call_arguments.done, output_item.done, response.completed - assert len(chunks) == 5 - - # Verify inference API was called correctly (after iterating over result) - first_call = mock_inference_api.openai_chat_completion.call_args_list[0] - assert first_call.kwargs["messages"][0].content == input_text - assert first_call.kwargs["tools"] is not None - assert first_call.kwargs["temperature"] == 0.1 - - # Check response.created event (should have empty output) - assert chunks[0].type == "response.created" - assert len(chunks[0].response.output) == 0 - - # Check streaming events - assert chunks[1].type == "response.output_item.added" - assert chunks[2].type == "response.function_call_arguments.done" - assert chunks[3].type == "response.output_item.done" - - # Check response.completed event (should have the tool call with arguments set to "{}") - assert chunks[4].type == "response.completed" - assert len(chunks[4].response.output) == 1 - assert chunks[4].response.output[0].type == "function_call" - assert chunks[4].response.output[0].name == "get_current_time" - assert chunks[4].response.output[0].arguments == "{}" - - mock_inference_api.openai_chat_completion.return_value = fake_stream_toolcall() + assert [chunk.type for chunk in chunks] == [ + "response.created", + "response.in_progress", + "response.output_item.added", + "response.function_call_arguments.done", + "response.output_item.done", + "response.completed", + ] + assert_common_expectations(chunks) # Function accepts optional arguments + mock_inference_api.openai_chat_completion.return_value = fake_stream_toolcall() result = await openai_responses_impl.create_openai_response( input=input_text, model=model, @@ -418,42 +423,47 @@ async def test_create_openai_response_with_tool_call_function_arguments_none(ope OpenAIResponseInputToolFunction( name="get_current_time", description="Get current time for system's timezone", - parameters={ - "timezone": "string", - }, + parameters={"timezone": "string"}, ) ], ) - - # Check that we got the content from our mocked tool execution result chunks = [chunk async for chunk in result] + assert [chunk.type for chunk in chunks] == [ + "response.created", + "response.in_progress", + "response.output_item.added", + "response.function_call_arguments.done", + "response.output_item.done", + "response.completed", + ] + assert_common_expectations(chunks) - # Verify event types - # Should have: response.created, output_item.added, function_call_arguments.delta, - # function_call_arguments.done, output_item.done, response.completed - assert len(chunks) == 5 - - # Verify inference API was called correctly (after iterating over result) - first_call = mock_inference_api.openai_chat_completion.call_args_list[0] - assert first_call.kwargs["messages"][0].content == input_text - assert first_call.kwargs["tools"] is not None - assert first_call.kwargs["temperature"] == 0.1 - - # Check response.created event (should have empty output) - assert chunks[0].type == "response.created" - assert len(chunks[0].response.output) == 0 - - # Check streaming events - assert chunks[1].type == "response.output_item.added" - assert chunks[2].type == "response.function_call_arguments.done" - assert chunks[3].type == "response.output_item.done" - - # Check response.completed event (should have the tool call with arguments set to "{}") - assert chunks[4].type == "response.completed" - assert len(chunks[4].response.output) == 1 - assert chunks[4].response.output[0].type == "function_call" - assert chunks[4].response.output[0].name == "get_current_time" - assert chunks[4].response.output[0].arguments == "{}" + # Function accepts optional arguments with additional optional fields + mock_inference_api.openai_chat_completion.return_value = fake_stream_toolcall() + result = await openai_responses_impl.create_openai_response( + input=input_text, + model=model, + stream=True, + temperature=0.1, + tools=[ + OpenAIResponseInputToolFunction( + name="get_current_time", + description="Get current time for system's timezone", + parameters={"timezone": "string", "location": "string"}, + ) + ], + ) + chunks = [chunk async for chunk in result] + assert [chunk.type for chunk in chunks] == [ + "response.created", + "response.in_progress", + "response.output_item.added", + "response.function_call_arguments.done", + "response.output_item.done", + "response.completed", + ] + assert_common_expectations(chunks) + mock_inference_api.openai_chat_completion.return_value = fake_stream_toolcall() async def test_create_openai_response_with_multiple_messages(openai_responses_impl, mock_inference_api):