feat(responses): add output_text delta events to responses (#2265)

This adds initial streaming support to the Responses API. 

This PR makes sure that the _first_ inference call made to chat
completions streams out.

There's more to be done:
 - tool call output tokens need to stream out when possible
- we need to loop through multiple rounds of inference and they all need
to stream out.

## Test Plan

Added a test. Executed as:

```
FIREWORKS_API_KEY=... \
  pytest -s -v 'tests/verifications/openai_api/test_responses.py' \
  --provider=stack:fireworks --model meta-llama/Llama-4-Scout-17B-16E-Instruct
```

Then, started a llama stack fireworks distro and tested against it like
this:

```
OPENAI_API_KEY=blah \
   pytest -s -v 'tests/verifications/openai_api/test_responses.py' \
   --base-url http://localhost:8321/v1/openai/v1 \
  --model meta-llama/Llama-4-Scout-17B-16E-Instruct 
```
This commit is contained in:
Ashwin Bharambe 2025-05-27 13:07:14 -07:00 committed by GitHub
parent 6ee319ae08
commit 5cdb29758a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 493 additions and 160 deletions

View file

@ -7540,6 +7540,9 @@
{
"$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseCreated"
},
{
"$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta"
},
{
"$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseCompleted"
}
@ -7548,6 +7551,7 @@
"propertyName": "type",
"mapping": {
"response.created": "#/components/schemas/OpenAIResponseObjectStreamResponseCreated",
"response.output_text.delta": "#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta",
"response.completed": "#/components/schemas/OpenAIResponseObjectStreamResponseCompleted"
}
}
@ -7590,6 +7594,41 @@
],
"title": "OpenAIResponseObjectStreamResponseCreated"
},
"OpenAIResponseObjectStreamResponseOutputTextDelta": {
"type": "object",
"properties": {
"content_index": {
"type": "integer"
},
"delta": {
"type": "string"
},
"item_id": {
"type": "string"
},
"output_index": {
"type": "integer"
},
"sequence_number": {
"type": "integer"
},
"type": {
"type": "string",
"const": "response.output_text.delta",
"default": "response.output_text.delta"
}
},
"additionalProperties": false,
"required": [
"content_index",
"delta",
"item_id",
"output_index",
"sequence_number",
"type"
],
"title": "OpenAIResponseObjectStreamResponseOutputTextDelta"
},
"CreateUploadSessionRequest": {
"type": "object",
"properties": {