mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-31 06:23:52 +00:00
fix: OpenAI API - together.ai extra usage chunks
This fixes an issue where, with some models (ie the Llama 4 models), together.ai is sending a final usage chunk for streaming responses even if the user didn't ask to include usage. With this change, the OpenAI API verification tests now pass 100% when using Llama Stack as your API server and together.ai as the backend provider. As part of this, I also cleaned up the streaming/non-streaming return types of the `openai_chat_completion` method to keep type checking happy. Signed-off-by: Ben Browning <bbrownin@redhat.com>
This commit is contained in:
parent
a4b573d750
commit
c014571258
12 changed files with 153 additions and 20 deletions
51
docs/_static/llama-stack-spec.html
vendored
51
docs/_static/llama-stack-spec.html
vendored
|
|
@ -3096,11 +3096,18 @@
|
|||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK",
|
||||
"description": "Response from an OpenAI-compatible chat completion request. **OR** Chunk from a streaming response to an OpenAI-compatible chat completion request.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/OpenAIChatCompletion"
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIChatCompletion"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIChatCompletionChunk"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -9506,6 +9513,46 @@
|
|||
"title": "OpenAIChatCompletion",
|
||||
"description": "Response from an OpenAI-compatible chat completion request."
|
||||
},
|
||||
"OpenAIChatCompletionChunk": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "The ID of the chat completion"
|
||||
},
|
||||
"choices": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/OpenAIChoice"
|
||||
},
|
||||
"description": "List of choices"
|
||||
},
|
||||
"object": {
|
||||
"type": "string",
|
||||
"const": "chat.completion.chunk",
|
||||
"default": "chat.completion.chunk",
|
||||
"description": "The object type, which will be \"chat.completion.chunk\""
|
||||
},
|
||||
"created": {
|
||||
"type": "integer",
|
||||
"description": "The Unix timestamp in seconds when the chat completion was created"
|
||||
},
|
||||
"model": {
|
||||
"type": "string",
|
||||
"description": "The model that was used to generate the chat completion"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"id",
|
||||
"choices",
|
||||
"object",
|
||||
"created",
|
||||
"model"
|
||||
],
|
||||
"title": "OpenAIChatCompletionChunk",
|
||||
"description": "Chunk from a streaming response to an OpenAI-compatible chat completion request."
|
||||
},
|
||||
"OpenAIChoice": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
|
|
|||
43
docs/_static/llama-stack-spec.yaml
vendored
43
docs/_static/llama-stack-spec.yaml
vendored
|
|
@ -2135,11 +2135,15 @@ paths:
|
|||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
description: >-
|
||||
Response from an OpenAI-compatible chat completion request. **OR** Chunk
|
||||
from a streaming response to an OpenAI-compatible chat completion request.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/OpenAIChatCompletion'
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/OpenAIChatCompletion'
|
||||
- $ref: '#/components/schemas/OpenAIChatCompletionChunk'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
|
|
@ -6507,6 +6511,41 @@ components:
|
|||
title: OpenAIChatCompletion
|
||||
description: >-
|
||||
Response from an OpenAI-compatible chat completion request.
|
||||
OpenAIChatCompletionChunk:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
description: The ID of the chat completion
|
||||
choices:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/OpenAIChoice'
|
||||
description: List of choices
|
||||
object:
|
||||
type: string
|
||||
const: chat.completion.chunk
|
||||
default: chat.completion.chunk
|
||||
description: >-
|
||||
The object type, which will be "chat.completion.chunk"
|
||||
created:
|
||||
type: integer
|
||||
description: >-
|
||||
The Unix timestamp in seconds when the chat completion was created
|
||||
model:
|
||||
type: string
|
||||
description: >-
|
||||
The model that was used to generate the chat completion
|
||||
additionalProperties: false
|
||||
required:
|
||||
- id
|
||||
- choices
|
||||
- object
|
||||
- created
|
||||
- model
|
||||
title: OpenAIChatCompletionChunk
|
||||
description: >-
|
||||
Chunk from a streaming response to an OpenAI-compatible chat completion request.
|
||||
OpenAIChoice:
|
||||
type: object
|
||||
properties:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue