fix: OpenAI API - together.ai extra usage chunks

This fixes an issue where, with some models (ie the Llama 4 models),
together.ai is sending a final usage chunk for streaming responses
even if the user didn't ask to include usage.

With this change, the OpenAI API verification tests now pass 100% when
using Llama Stack as your API server and together.ai as the backend
provider.

As part of this, I also cleaned up the streaming/non-streaming return
types of the `openai_chat_completion` method to keep type checking happy.

Signed-off-by: Ben Browning <bbrownin@redhat.com>
This commit is contained in:
Ben Browning 2025-04-12 17:27:43 -04:00
parent a4b573d750
commit c014571258
12 changed files with 153 additions and 20 deletions

View file

@ -3096,11 +3096,18 @@
"post": {
"responses": {
"200": {
"description": "OK",
"description": "Response from an OpenAI-compatible chat completion request. **OR** Chunk from a streaming response to an OpenAI-compatible chat completion request.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/OpenAIChatCompletion"
"oneOf": [
{
"$ref": "#/components/schemas/OpenAIChatCompletion"
},
{
"$ref": "#/components/schemas/OpenAIChatCompletionChunk"
}
]
}
}
}
@ -9506,6 +9513,46 @@
"title": "OpenAIChatCompletion",
"description": "Response from an OpenAI-compatible chat completion request."
},
"OpenAIChatCompletionChunk": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "The ID of the chat completion"
},
"choices": {
"type": "array",
"items": {
"$ref": "#/components/schemas/OpenAIChoice"
},
"description": "List of choices"
},
"object": {
"type": "string",
"const": "chat.completion.chunk",
"default": "chat.completion.chunk",
"description": "The object type, which will be \"chat.completion.chunk\""
},
"created": {
"type": "integer",
"description": "The Unix timestamp in seconds when the chat completion was created"
},
"model": {
"type": "string",
"description": "The model that was used to generate the chat completion"
}
},
"additionalProperties": false,
"required": [
"id",
"choices",
"object",
"created",
"model"
],
"title": "OpenAIChatCompletionChunk",
"description": "Chunk from a streaming response to an OpenAI-compatible chat completion request."
},
"OpenAIChoice": {
"type": "object",
"properties": {

View file

@ -2135,11 +2135,15 @@ paths:
post:
responses:
'200':
description: OK
description: >-
Response from an OpenAI-compatible chat completion request. **OR** Chunk
from a streaming response to an OpenAI-compatible chat completion request.
content:
application/json:
schema:
$ref: '#/components/schemas/OpenAIChatCompletion'
oneOf:
- $ref: '#/components/schemas/OpenAIChatCompletion'
- $ref: '#/components/schemas/OpenAIChatCompletionChunk'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
@ -6507,6 +6511,41 @@ components:
title: OpenAIChatCompletion
description: >-
Response from an OpenAI-compatible chat completion request.
OpenAIChatCompletionChunk:
type: object
properties:
id:
type: string
description: The ID of the chat completion
choices:
type: array
items:
$ref: '#/components/schemas/OpenAIChoice'
description: List of choices
object:
type: string
const: chat.completion.chunk
default: chat.completion.chunk
description: >-
The object type, which will be "chat.completion.chunk"
created:
type: integer
description: >-
The Unix timestamp in seconds when the chat completion was created
model:
type: string
description: >-
The model that was used to generate the chat completion
additionalProperties: false
required:
- id
- choices
- object
- created
- model
title: OpenAIChatCompletionChunk
description: >-
Chunk from a streaming response to an OpenAI-compatible chat completion request.
OpenAIChoice:
type: object
properties: