mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-19 03:10:03 +00:00
feat(responses): add output_text delta events to responses (#2265)
This adds initial streaming support to the Responses API. This PR makes sure that the _first_ inference call made to chat completions streams out. There's more to be done: - tool call output tokens need to stream out when possible - we need to loop through multiple rounds of inference and they all need to stream out. ## Test Plan Added a test. Executed as: ``` FIREWORKS_API_KEY=... \ pytest -s -v 'tests/verifications/openai_api/test_responses.py' \ --provider=stack:fireworks --model meta-llama/Llama-4-Scout-17B-16E-Instruct ``` Then, started a llama stack fireworks distro and tested against it like this: ``` OPENAI_API_KEY=blah \ pytest -s -v 'tests/verifications/openai_api/test_responses.py' \ --base-url http://localhost:8321/v1/openai/v1 \ --model meta-llama/Llama-4-Scout-17B-16E-Instruct ```
This commit is contained in:
parent
6ee319ae08
commit
5cdb29758a
8 changed files with 493 additions and 160 deletions
29
docs/_static/llama-stack-spec.yaml
vendored
29
docs/_static/llama-stack-spec.yaml
vendored
|
@ -5294,11 +5294,13 @@ components:
|
|||
OpenAIResponseObjectStream:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
|
||||
- $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta'
|
||||
- $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
response.created: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
|
||||
response.output_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta'
|
||||
response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
|
||||
"OpenAIResponseObjectStreamResponseCompleted":
|
||||
type: object
|
||||
|
@ -5330,6 +5332,33 @@ components:
|
|||
- type
|
||||
title: >-
|
||||
OpenAIResponseObjectStreamResponseCreated
|
||||
"OpenAIResponseObjectStreamResponseOutputTextDelta":
|
||||
type: object
|
||||
properties:
|
||||
content_index:
|
||||
type: integer
|
||||
delta:
|
||||
type: string
|
||||
item_id:
|
||||
type: string
|
||||
output_index:
|
||||
type: integer
|
||||
sequence_number:
|
||||
type: integer
|
||||
type:
|
||||
type: string
|
||||
const: response.output_text.delta
|
||||
default: response.output_text.delta
|
||||
additionalProperties: false
|
||||
required:
|
||||
- content_index
|
||||
- delta
|
||||
- item_id
|
||||
- output_index
|
||||
- sequence_number
|
||||
- type
|
||||
title: >-
|
||||
OpenAIResponseObjectStreamResponseOutputTextDelta
|
||||
CreateUploadSessionRequest:
|
||||
type: object
|
||||
properties:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue