mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-27 18:50:41 +00:00
feat(responses): add output_text delta events to responses (#2265)
This adds initial streaming support to the Responses API. This PR makes sure that the _first_ inference call made to chat completions streams out. There's more to be done: - tool call output tokens need to stream out when possible - we need to loop through multiple rounds of inference and they all need to stream out. ## Test Plan Added a test. Executed as: ``` FIREWORKS_API_KEY=... \ pytest -s -v 'tests/verifications/openai_api/test_responses.py' \ --provider=stack:fireworks --model meta-llama/Llama-4-Scout-17B-16E-Instruct ``` Then, started a llama stack fireworks distro and tested against it like this: ``` OPENAI_API_KEY=blah \ pytest -s -v 'tests/verifications/openai_api/test_responses.py' \ --base-url http://localhost:8321/v1/openai/v1 \ --model meta-llama/Llama-4-Scout-17B-16E-Instruct ```
This commit is contained in:
parent
6ee319ae08
commit
5cdb29758a
8 changed files with 493 additions and 160 deletions
39
docs/_static/llama-stack-spec.html
vendored
39
docs/_static/llama-stack-spec.html
vendored
|
@ -7540,6 +7540,9 @@
|
|||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseCreated"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseCompleted"
|
||||
}
|
||||
|
@ -7548,6 +7551,7 @@
|
|||
"propertyName": "type",
|
||||
"mapping": {
|
||||
"response.created": "#/components/schemas/OpenAIResponseObjectStreamResponseCreated",
|
||||
"response.output_text.delta": "#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta",
|
||||
"response.completed": "#/components/schemas/OpenAIResponseObjectStreamResponseCompleted"
|
||||
}
|
||||
}
|
||||
|
@ -7590,6 +7594,41 @@
|
|||
],
|
||||
"title": "OpenAIResponseObjectStreamResponseCreated"
|
||||
},
|
||||
"OpenAIResponseObjectStreamResponseOutputTextDelta": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"content_index": {
|
||||
"type": "integer"
|
||||
},
|
||||
"delta": {
|
||||
"type": "string"
|
||||
},
|
||||
"item_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"output_index": {
|
||||
"type": "integer"
|
||||
},
|
||||
"sequence_number": {
|
||||
"type": "integer"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "response.output_text.delta",
|
||||
"default": "response.output_text.delta"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"content_index",
|
||||
"delta",
|
||||
"item_id",
|
||||
"output_index",
|
||||
"sequence_number",
|
||||
"type"
|
||||
],
|
||||
"title": "OpenAIResponseObjectStreamResponseOutputTextDelta"
|
||||
},
|
||||
"CreateUploadSessionRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
|
29
docs/_static/llama-stack-spec.yaml
vendored
29
docs/_static/llama-stack-spec.yaml
vendored
|
@ -5294,11 +5294,13 @@ components:
|
|||
OpenAIResponseObjectStream:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
|
||||
- $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta'
|
||||
- $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
response.created: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
|
||||
response.output_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta'
|
||||
response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
|
||||
"OpenAIResponseObjectStreamResponseCompleted":
|
||||
type: object
|
||||
|
@ -5330,6 +5332,33 @@ components:
|
|||
- type
|
||||
title: >-
|
||||
OpenAIResponseObjectStreamResponseCreated
|
||||
"OpenAIResponseObjectStreamResponseOutputTextDelta":
|
||||
type: object
|
||||
properties:
|
||||
content_index:
|
||||
type: integer
|
||||
delta:
|
||||
type: string
|
||||
item_id:
|
||||
type: string
|
||||
output_index:
|
||||
type: integer
|
||||
sequence_number:
|
||||
type: integer
|
||||
type:
|
||||
type: string
|
||||
const: response.output_text.delta
|
||||
default: response.output_text.delta
|
||||
additionalProperties: false
|
||||
required:
|
||||
- content_index
|
||||
- delta
|
||||
- item_id
|
||||
- output_index
|
||||
- sequence_number
|
||||
- type
|
||||
title: >-
|
||||
OpenAIResponseObjectStreamResponseOutputTextDelta
|
||||
CreateUploadSessionRequest:
|
||||
type: object
|
||||
properties:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue