mirror of
https://github.com/meta-llama/llama-stack.git
synced 2026-01-01 14:40:01 +00:00
Improve groq OpenAI API compatibility
This doesn't get Groq to 100% on the OpenAI API verification tests, but it does get it to 88.2% when Llama Stack is in the middle, compared to the 61.8% results for using an OpenAI client against Groq directly. The groq provider doesn't use litellm under the covers in its openai_chat_completion endpoint, and instead directly uses an AsyncOpenAI client with some special handling to improve conformance of responses for response_format usage and tool calling. Signed-off-by: Ben Browning <bbrownin@redhat.com>
This commit is contained in:
parent
657bb12e85
commit
8a1c0a1008
16 changed files with 418 additions and 45 deletions
81
docs/_static/llama-stack-spec.html
vendored
81
docs/_static/llama-stack-spec.html
vendored
|
|
@ -8923,6 +8923,9 @@
|
|||
"OpenAIChatCompletionToolCall": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"index": {
|
||||
"type": "integer"
|
||||
},
|
||||
"id": {
|
||||
"type": "string"
|
||||
},
|
||||
|
|
@ -8937,9 +8940,7 @@
|
|||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"id",
|
||||
"type",
|
||||
"function"
|
||||
"type"
|
||||
],
|
||||
"title": "OpenAIChatCompletionToolCall"
|
||||
},
|
||||
|
|
@ -8954,10 +8955,6 @@
|
|||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"name",
|
||||
"arguments"
|
||||
],
|
||||
"title": "OpenAIChatCompletionToolCallFunction"
|
||||
},
|
||||
"OpenAIDeveloperMessageParam": {
|
||||
|
|
@ -9563,7 +9560,7 @@
|
|||
"choices": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/OpenAIChoice"
|
||||
"$ref": "#/components/schemas/OpenAIChunkChoice"
|
||||
},
|
||||
"description": "List of choices"
|
||||
},
|
||||
|
|
@ -9605,10 +9602,12 @@
|
|||
"description": "The reason the model stopped generating"
|
||||
},
|
||||
"index": {
|
||||
"type": "integer"
|
||||
"type": "integer",
|
||||
"description": "The index of the choice"
|
||||
},
|
||||
"logprobs": {
|
||||
"$ref": "#/components/schemas/OpenAIChoiceLogprobs"
|
||||
"$ref": "#/components/schemas/OpenAIChoiceLogprobs",
|
||||
"description": "(Optional) The log probabilities for the tokens in the message"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
|
@ -9620,6 +9619,33 @@
|
|||
"title": "OpenAIChoice",
|
||||
"description": "A choice from an OpenAI-compatible chat completion response."
|
||||
},
|
||||
"OpenAIChoiceDelta": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"content": {
|
||||
"type": "string",
|
||||
"description": "(Optional) The content of the delta"
|
||||
},
|
||||
"refusal": {
|
||||
"type": "string",
|
||||
"description": "(Optional) The refusal of the delta"
|
||||
},
|
||||
"role": {
|
||||
"type": "string",
|
||||
"description": "(Optional) The role of the delta"
|
||||
},
|
||||
"tool_calls": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/OpenAIChatCompletionToolCall"
|
||||
},
|
||||
"description": "(Optional) The tool calls of the delta"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"title": "OpenAIChoiceDelta",
|
||||
"description": "A delta from an OpenAI-compatible chat completion streaming response."
|
||||
},
|
||||
"OpenAIChoiceLogprobs": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
|
@ -9627,19 +9653,50 @@
|
|||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/OpenAITokenLogProb"
|
||||
}
|
||||
},
|
||||
"description": "(Optional) The log probabilities for the tokens in the message"
|
||||
},
|
||||
"refusal": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/OpenAITokenLogProb"
|
||||
}
|
||||
},
|
||||
"description": "(Optional) The log probabilities for the tokens in the message"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"title": "OpenAIChoiceLogprobs",
|
||||
"description": "The log probabilities for the tokens in the message from an OpenAI-compatible chat completion response."
|
||||
},
|
||||
"OpenAIChunkChoice": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"delta": {
|
||||
"$ref": "#/components/schemas/OpenAIChoiceDelta",
|
||||
"description": "The delta from the chunk"
|
||||
},
|
||||
"finish_reason": {
|
||||
"type": "string",
|
||||
"description": "The reason the model stopped generating"
|
||||
},
|
||||
"index": {
|
||||
"type": "integer",
|
||||
"description": "The index of the choice"
|
||||
},
|
||||
"logprobs": {
|
||||
"$ref": "#/components/schemas/OpenAIChoiceLogprobs",
|
||||
"description": "(Optional) The log probabilities for the tokens in the message"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"delta",
|
||||
"finish_reason",
|
||||
"index"
|
||||
],
|
||||
"title": "OpenAIChunkChoice",
|
||||
"description": "A chunk choice from an OpenAI-compatible chat completion streaming response."
|
||||
},
|
||||
"OpenAITokenLogProb": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue