mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-31 08:50:01 +00:00
Improve groq OpenAI API compatibility
This doesn't get Groq to 100% on the OpenAI API verification tests, but it does get it to 88.2% when Llama Stack is in the middle, compared to the 61.8% results for using an OpenAI client against Groq directly. The groq provider doesn't use litellm under the covers in its openai_chat_completion endpoint, and instead directly uses an AsyncOpenAI client with some special handling to improve conformance of responses for response_format usage and tool calling. Signed-off-by: Ben Browning <bbrownin@redhat.com>
This commit is contained in:
parent
657bb12e85
commit
8a1c0a1008
16 changed files with 418 additions and 45 deletions
81
docs/_static/llama-stack-spec.html
vendored
81
docs/_static/llama-stack-spec.html
vendored
|
|
@ -8923,6 +8923,9 @@
|
|||
"OpenAIChatCompletionToolCall": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"index": {
|
||||
"type": "integer"
|
||||
},
|
||||
"id": {
|
||||
"type": "string"
|
||||
},
|
||||
|
|
@ -8937,9 +8940,7 @@
|
|||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"id",
|
||||
"type",
|
||||
"function"
|
||||
"type"
|
||||
],
|
||||
"title": "OpenAIChatCompletionToolCall"
|
||||
},
|
||||
|
|
@ -8954,10 +8955,6 @@
|
|||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"name",
|
||||
"arguments"
|
||||
],
|
||||
"title": "OpenAIChatCompletionToolCallFunction"
|
||||
},
|
||||
"OpenAIDeveloperMessageParam": {
|
||||
|
|
@ -9563,7 +9560,7 @@
|
|||
"choices": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/OpenAIChoice"
|
||||
"$ref": "#/components/schemas/OpenAIChunkChoice"
|
||||
},
|
||||
"description": "List of choices"
|
||||
},
|
||||
|
|
@ -9605,10 +9602,12 @@
|
|||
"description": "The reason the model stopped generating"
|
||||
},
|
||||
"index": {
|
||||
"type": "integer"
|
||||
"type": "integer",
|
||||
"description": "The index of the choice"
|
||||
},
|
||||
"logprobs": {
|
||||
"$ref": "#/components/schemas/OpenAIChoiceLogprobs"
|
||||
"$ref": "#/components/schemas/OpenAIChoiceLogprobs",
|
||||
"description": "(Optional) The log probabilities for the tokens in the message"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
|
@ -9620,6 +9619,33 @@
|
|||
"title": "OpenAIChoice",
|
||||
"description": "A choice from an OpenAI-compatible chat completion response."
|
||||
},
|
||||
"OpenAIChoiceDelta": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"content": {
|
||||
"type": "string",
|
||||
"description": "(Optional) The content of the delta"
|
||||
},
|
||||
"refusal": {
|
||||
"type": "string",
|
||||
"description": "(Optional) The refusal of the delta"
|
||||
},
|
||||
"role": {
|
||||
"type": "string",
|
||||
"description": "(Optional) The role of the delta"
|
||||
},
|
||||
"tool_calls": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/OpenAIChatCompletionToolCall"
|
||||
},
|
||||
"description": "(Optional) The tool calls of the delta"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"title": "OpenAIChoiceDelta",
|
||||
"description": "A delta from an OpenAI-compatible chat completion streaming response."
|
||||
},
|
||||
"OpenAIChoiceLogprobs": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
|
@ -9627,19 +9653,50 @@
|
|||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/OpenAITokenLogProb"
|
||||
}
|
||||
},
|
||||
"description": "(Optional) The log probabilities for the tokens in the message"
|
||||
},
|
||||
"refusal": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/OpenAITokenLogProb"
|
||||
}
|
||||
},
|
||||
"description": "(Optional) The log probabilities for the tokens in the message"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"title": "OpenAIChoiceLogprobs",
|
||||
"description": "The log probabilities for the tokens in the message from an OpenAI-compatible chat completion response."
|
||||
},
|
||||
"OpenAIChunkChoice": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"delta": {
|
||||
"$ref": "#/components/schemas/OpenAIChoiceDelta",
|
||||
"description": "The delta from the chunk"
|
||||
},
|
||||
"finish_reason": {
|
||||
"type": "string",
|
||||
"description": "The reason the model stopped generating"
|
||||
},
|
||||
"index": {
|
||||
"type": "integer",
|
||||
"description": "The index of the choice"
|
||||
},
|
||||
"logprobs": {
|
||||
"$ref": "#/components/schemas/OpenAIChoiceLogprobs",
|
||||
"description": "(Optional) The log probabilities for the tokens in the message"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"delta",
|
||||
"finish_reason",
|
||||
"index"
|
||||
],
|
||||
"title": "OpenAIChunkChoice",
|
||||
"description": "A chunk choice from an OpenAI-compatible chat completion streaming response."
|
||||
},
|
||||
"OpenAITokenLogProb": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
|
|
|||
61
docs/_static/llama-stack-spec.yaml
vendored
61
docs/_static/llama-stack-spec.yaml
vendored
|
|
@ -6127,6 +6127,8 @@ components:
|
|||
OpenAIChatCompletionToolCall:
|
||||
type: object
|
||||
properties:
|
||||
index:
|
||||
type: integer
|
||||
id:
|
||||
type: string
|
||||
type:
|
||||
|
|
@ -6137,9 +6139,7 @@ components:
|
|||
$ref: '#/components/schemas/OpenAIChatCompletionToolCallFunction'
|
||||
additionalProperties: false
|
||||
required:
|
||||
- id
|
||||
- type
|
||||
- function
|
||||
title: OpenAIChatCompletionToolCall
|
||||
OpenAIChatCompletionToolCallFunction:
|
||||
type: object
|
||||
|
|
@ -6149,9 +6149,6 @@ components:
|
|||
arguments:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
required:
|
||||
- name
|
||||
- arguments
|
||||
title: OpenAIChatCompletionToolCallFunction
|
||||
OpenAIDeveloperMessageParam:
|
||||
type: object
|
||||
|
|
@ -6550,7 +6547,7 @@ components:
|
|||
choices:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/OpenAIChoice'
|
||||
$ref: '#/components/schemas/OpenAIChunkChoice'
|
||||
description: List of choices
|
||||
object:
|
||||
type: string
|
||||
|
|
@ -6587,8 +6584,11 @@ components:
|
|||
description: The reason the model stopped generating
|
||||
index:
|
||||
type: integer
|
||||
description: The index of the choice
|
||||
logprobs:
|
||||
$ref: '#/components/schemas/OpenAIChoiceLogprobs'
|
||||
description: >-
|
||||
(Optional) The log probabilities for the tokens in the message
|
||||
additionalProperties: false
|
||||
required:
|
||||
- message
|
||||
|
|
@ -6597,6 +6597,27 @@ components:
|
|||
title: OpenAIChoice
|
||||
description: >-
|
||||
A choice from an OpenAI-compatible chat completion response.
|
||||
OpenAIChoiceDelta:
|
||||
type: object
|
||||
properties:
|
||||
content:
|
||||
type: string
|
||||
description: (Optional) The content of the delta
|
||||
refusal:
|
||||
type: string
|
||||
description: (Optional) The refusal of the delta
|
||||
role:
|
||||
type: string
|
||||
description: (Optional) The role of the delta
|
||||
tool_calls:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/OpenAIChatCompletionToolCall'
|
||||
description: (Optional) The tool calls of the delta
|
||||
additionalProperties: false
|
||||
title: OpenAIChoiceDelta
|
||||
description: >-
|
||||
A delta from an OpenAI-compatible chat completion streaming response.
|
||||
OpenAIChoiceLogprobs:
|
||||
type: object
|
||||
properties:
|
||||
|
|
@ -6604,15 +6625,43 @@ components:
|
|||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/OpenAITokenLogProb'
|
||||
description: >-
|
||||
(Optional) The log probabilities for the tokens in the message
|
||||
refusal:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/OpenAITokenLogProb'
|
||||
description: >-
|
||||
(Optional) The log probabilities for the tokens in the message
|
||||
additionalProperties: false
|
||||
title: OpenAIChoiceLogprobs
|
||||
description: >-
|
||||
The log probabilities for the tokens in the message from an OpenAI-compatible
|
||||
chat completion response.
|
||||
OpenAIChunkChoice:
|
||||
type: object
|
||||
properties:
|
||||
delta:
|
||||
$ref: '#/components/schemas/OpenAIChoiceDelta'
|
||||
description: The delta from the chunk
|
||||
finish_reason:
|
||||
type: string
|
||||
description: The reason the model stopped generating
|
||||
index:
|
||||
type: integer
|
||||
description: The index of the choice
|
||||
logprobs:
|
||||
$ref: '#/components/schemas/OpenAIChoiceLogprobs'
|
||||
description: >-
|
||||
(Optional) The log probabilities for the tokens in the message
|
||||
additionalProperties: false
|
||||
required:
|
||||
- delta
|
||||
- finish_reason
|
||||
- index
|
||||
title: OpenAIChunkChoice
|
||||
description: >-
|
||||
A chunk choice from an OpenAI-compatible chat completion streaming response.
|
||||
OpenAITokenLogProb:
|
||||
type: object
|
||||
properties:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue