Improve groq OpenAI API compatibility

This doesn't get Groq to 100% on the OpenAI API verification tests,
but it does get it to 88.2% when Llama Stack is in the middle,
compared to the 61.8% results for using an OpenAI client against Groq
directly.

The groq provider doesn't use litellm under the covers in its
openai_chat_completion endpoint, and instead directly uses an
AsyncOpenAI client with some special handling to improve conformance
of responses for response_format usage and tool calling.

Signed-off-by: Ben Browning <bbrownin@redhat.com>
This commit is contained in:
Ben Browning 2025-04-13 13:35:53 -04:00
parent 657bb12e85
commit 8a1c0a1008
16 changed files with 418 additions and 45 deletions

View file

@ -8923,6 +8923,9 @@
"OpenAIChatCompletionToolCall": {
"type": "object",
"properties": {
"index": {
"type": "integer"
},
"id": {
"type": "string"
},
@ -8937,9 +8940,7 @@
},
"additionalProperties": false,
"required": [
"id",
"type",
"function"
"type"
],
"title": "OpenAIChatCompletionToolCall"
},
@ -8954,10 +8955,6 @@
}
},
"additionalProperties": false,
"required": [
"name",
"arguments"
],
"title": "OpenAIChatCompletionToolCallFunction"
},
"OpenAIDeveloperMessageParam": {
@ -9563,7 +9560,7 @@
"choices": {
"type": "array",
"items": {
"$ref": "#/components/schemas/OpenAIChoice"
"$ref": "#/components/schemas/OpenAIChunkChoice"
},
"description": "List of choices"
},
@ -9605,10 +9602,12 @@
"description": "The reason the model stopped generating"
},
"index": {
"type": "integer"
"type": "integer",
"description": "The index of the choice"
},
"logprobs": {
"$ref": "#/components/schemas/OpenAIChoiceLogprobs"
"$ref": "#/components/schemas/OpenAIChoiceLogprobs",
"description": "(Optional) The log probabilities for the tokens in the message"
}
},
"additionalProperties": false,
@ -9620,6 +9619,33 @@
"title": "OpenAIChoice",
"description": "A choice from an OpenAI-compatible chat completion response."
},
"OpenAIChoiceDelta": {
"type": "object",
"properties": {
"content": {
"type": "string",
"description": "(Optional) The content of the delta"
},
"refusal": {
"type": "string",
"description": "(Optional) The refusal of the delta"
},
"role": {
"type": "string",
"description": "(Optional) The role of the delta"
},
"tool_calls": {
"type": "array",
"items": {
"$ref": "#/components/schemas/OpenAIChatCompletionToolCall"
},
"description": "(Optional) The tool calls of the delta"
}
},
"additionalProperties": false,
"title": "OpenAIChoiceDelta",
"description": "A delta from an OpenAI-compatible chat completion streaming response."
},
"OpenAIChoiceLogprobs": {
"type": "object",
"properties": {
@ -9627,19 +9653,50 @@
"type": "array",
"items": {
"$ref": "#/components/schemas/OpenAITokenLogProb"
}
},
"description": "(Optional) The log probabilities for the tokens in the message"
},
"refusal": {
"type": "array",
"items": {
"$ref": "#/components/schemas/OpenAITokenLogProb"
}
},
"description": "(Optional) The log probabilities for the tokens in the message"
}
},
"additionalProperties": false,
"title": "OpenAIChoiceLogprobs",
"description": "The log probabilities for the tokens in the message from an OpenAI-compatible chat completion response."
},
"OpenAIChunkChoice": {
"type": "object",
"properties": {
"delta": {
"$ref": "#/components/schemas/OpenAIChoiceDelta",
"description": "The delta from the chunk"
},
"finish_reason": {
"type": "string",
"description": "The reason the model stopped generating"
},
"index": {
"type": "integer",
"description": "The index of the choice"
},
"logprobs": {
"$ref": "#/components/schemas/OpenAIChoiceLogprobs",
"description": "(Optional) The log probabilities for the tokens in the message"
}
},
"additionalProperties": false,
"required": [
"delta",
"finish_reason",
"index"
],
"title": "OpenAIChunkChoice",
"description": "A chunk choice from an OpenAI-compatible chat completion streaming response."
},
"OpenAITokenLogProb": {
"type": "object",
"properties": {

View file

@ -6127,6 +6127,8 @@ components:
OpenAIChatCompletionToolCall:
type: object
properties:
index:
type: integer
id:
type: string
type:
@ -6137,9 +6139,7 @@ components:
$ref: '#/components/schemas/OpenAIChatCompletionToolCallFunction'
additionalProperties: false
required:
- id
- type
- function
title: OpenAIChatCompletionToolCall
OpenAIChatCompletionToolCallFunction:
type: object
@ -6149,9 +6149,6 @@ components:
arguments:
type: string
additionalProperties: false
required:
- name
- arguments
title: OpenAIChatCompletionToolCallFunction
OpenAIDeveloperMessageParam:
type: object
@ -6550,7 +6547,7 @@ components:
choices:
type: array
items:
$ref: '#/components/schemas/OpenAIChoice'
$ref: '#/components/schemas/OpenAIChunkChoice'
description: List of choices
object:
type: string
@ -6587,8 +6584,11 @@ components:
description: The reason the model stopped generating
index:
type: integer
description: The index of the choice
logprobs:
$ref: '#/components/schemas/OpenAIChoiceLogprobs'
description: >-
(Optional) The log probabilities for the tokens in the message
additionalProperties: false
required:
- message
@ -6597,6 +6597,27 @@ components:
title: OpenAIChoice
description: >-
A choice from an OpenAI-compatible chat completion response.
OpenAIChoiceDelta:
type: object
properties:
content:
type: string
description: (Optional) The content of the delta
refusal:
type: string
description: (Optional) The refusal of the delta
role:
type: string
description: (Optional) The role of the delta
tool_calls:
type: array
items:
$ref: '#/components/schemas/OpenAIChatCompletionToolCall'
description: (Optional) The tool calls of the delta
additionalProperties: false
title: OpenAIChoiceDelta
description: >-
A delta from an OpenAI-compatible chat completion streaming response.
OpenAIChoiceLogprobs:
type: object
properties:
@ -6604,15 +6625,43 @@ components:
type: array
items:
$ref: '#/components/schemas/OpenAITokenLogProb'
description: >-
(Optional) The log probabilities for the tokens in the message
refusal:
type: array
items:
$ref: '#/components/schemas/OpenAITokenLogProb'
description: >-
(Optional) The log probabilities for the tokens in the message
additionalProperties: false
title: OpenAIChoiceLogprobs
description: >-
The log probabilities for the tokens in the message from an OpenAI-compatible
chat completion response.
OpenAIChunkChoice:
type: object
properties:
delta:
$ref: '#/components/schemas/OpenAIChoiceDelta'
description: The delta from the chunk
finish_reason:
type: string
description: The reason the model stopped generating
index:
type: integer
description: The index of the choice
logprobs:
$ref: '#/components/schemas/OpenAIChoiceLogprobs'
description: >-
(Optional) The log probabilities for the tokens in the message
additionalProperties: false
required:
- delta
- finish_reason
- index
title: OpenAIChunkChoice
description: >-
A chunk choice from an OpenAI-compatible chat completion streaming response.
OpenAITokenLogProb:
type: object
properties: