mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-05 10:13:05 +00:00
Improve groq OpenAI API compatibility
This doesn't get Groq to 100% on the OpenAI API verification tests, but it does get it to 88.2% when Llama Stack is in the middle, compared to the 61.8% results for using an OpenAI client against Groq directly. The groq provider doesn't use litellm under the covers in its openai_chat_completion endpoint, and instead directly uses an AsyncOpenAI client with some special handling to improve conformance of responses for response_format usage and tool calling. Signed-off-by: Ben Browning <bbrownin@redhat.com>
This commit is contained in:
parent
657bb12e85
commit
8a1c0a1008
16 changed files with 418 additions and 45 deletions
81
docs/_static/llama-stack-spec.html
vendored
81
docs/_static/llama-stack-spec.html
vendored
|
@ -8923,6 +8923,9 @@
|
||||||
"OpenAIChatCompletionToolCall": {
|
"OpenAIChatCompletionToolCall": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
"index": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
"id": {
|
"id": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
|
@ -8937,9 +8940,7 @@
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"required": [
|
"required": [
|
||||||
"id",
|
"type"
|
||||||
"type",
|
|
||||||
"function"
|
|
||||||
],
|
],
|
||||||
"title": "OpenAIChatCompletionToolCall"
|
"title": "OpenAIChatCompletionToolCall"
|
||||||
},
|
},
|
||||||
|
@ -8954,10 +8955,6 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"required": [
|
|
||||||
"name",
|
|
||||||
"arguments"
|
|
||||||
],
|
|
||||||
"title": "OpenAIChatCompletionToolCallFunction"
|
"title": "OpenAIChatCompletionToolCallFunction"
|
||||||
},
|
},
|
||||||
"OpenAIDeveloperMessageParam": {
|
"OpenAIDeveloperMessageParam": {
|
||||||
|
@ -9563,7 +9560,7 @@
|
||||||
"choices": {
|
"choices": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/components/schemas/OpenAIChoice"
|
"$ref": "#/components/schemas/OpenAIChunkChoice"
|
||||||
},
|
},
|
||||||
"description": "List of choices"
|
"description": "List of choices"
|
||||||
},
|
},
|
||||||
|
@ -9605,10 +9602,12 @@
|
||||||
"description": "The reason the model stopped generating"
|
"description": "The reason the model stopped generating"
|
||||||
},
|
},
|
||||||
"index": {
|
"index": {
|
||||||
"type": "integer"
|
"type": "integer",
|
||||||
|
"description": "The index of the choice"
|
||||||
},
|
},
|
||||||
"logprobs": {
|
"logprobs": {
|
||||||
"$ref": "#/components/schemas/OpenAIChoiceLogprobs"
|
"$ref": "#/components/schemas/OpenAIChoiceLogprobs",
|
||||||
|
"description": "(Optional) The log probabilities for the tokens in the message"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
@ -9620,6 +9619,33 @@
|
||||||
"title": "OpenAIChoice",
|
"title": "OpenAIChoice",
|
||||||
"description": "A choice from an OpenAI-compatible chat completion response."
|
"description": "A choice from an OpenAI-compatible chat completion response."
|
||||||
},
|
},
|
||||||
|
"OpenAIChoiceDelta": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"content": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "(Optional) The content of the delta"
|
||||||
|
},
|
||||||
|
"refusal": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "(Optional) The refusal of the delta"
|
||||||
|
},
|
||||||
|
"role": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "(Optional) The role of the delta"
|
||||||
|
},
|
||||||
|
"tool_calls": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/components/schemas/OpenAIChatCompletionToolCall"
|
||||||
|
},
|
||||||
|
"description": "(Optional) The tool calls of the delta"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"title": "OpenAIChoiceDelta",
|
||||||
|
"description": "A delta from an OpenAI-compatible chat completion streaming response."
|
||||||
|
},
|
||||||
"OpenAIChoiceLogprobs": {
|
"OpenAIChoiceLogprobs": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
@ -9627,19 +9653,50 @@
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/components/schemas/OpenAITokenLogProb"
|
"$ref": "#/components/schemas/OpenAITokenLogProb"
|
||||||
}
|
},
|
||||||
|
"description": "(Optional) The log probabilities for the tokens in the message"
|
||||||
},
|
},
|
||||||
"refusal": {
|
"refusal": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/components/schemas/OpenAITokenLogProb"
|
"$ref": "#/components/schemas/OpenAITokenLogProb"
|
||||||
}
|
},
|
||||||
|
"description": "(Optional) The log probabilities for the tokens in the message"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"title": "OpenAIChoiceLogprobs",
|
"title": "OpenAIChoiceLogprobs",
|
||||||
"description": "The log probabilities for the tokens in the message from an OpenAI-compatible chat completion response."
|
"description": "The log probabilities for the tokens in the message from an OpenAI-compatible chat completion response."
|
||||||
},
|
},
|
||||||
|
"OpenAIChunkChoice": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"delta": {
|
||||||
|
"$ref": "#/components/schemas/OpenAIChoiceDelta",
|
||||||
|
"description": "The delta from the chunk"
|
||||||
|
},
|
||||||
|
"finish_reason": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The reason the model stopped generating"
|
||||||
|
},
|
||||||
|
"index": {
|
||||||
|
"type": "integer",
|
||||||
|
"description": "The index of the choice"
|
||||||
|
},
|
||||||
|
"logprobs": {
|
||||||
|
"$ref": "#/components/schemas/OpenAIChoiceLogprobs",
|
||||||
|
"description": "(Optional) The log probabilities for the tokens in the message"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"delta",
|
||||||
|
"finish_reason",
|
||||||
|
"index"
|
||||||
|
],
|
||||||
|
"title": "OpenAIChunkChoice",
|
||||||
|
"description": "A chunk choice from an OpenAI-compatible chat completion streaming response."
|
||||||
|
},
|
||||||
"OpenAITokenLogProb": {
|
"OpenAITokenLogProb": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
|
61
docs/_static/llama-stack-spec.yaml
vendored
61
docs/_static/llama-stack-spec.yaml
vendored
|
@ -6127,6 +6127,8 @@ components:
|
||||||
OpenAIChatCompletionToolCall:
|
OpenAIChatCompletionToolCall:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
index:
|
||||||
|
type: integer
|
||||||
id:
|
id:
|
||||||
type: string
|
type: string
|
||||||
type:
|
type:
|
||||||
|
@ -6137,9 +6139,7 @@ components:
|
||||||
$ref: '#/components/schemas/OpenAIChatCompletionToolCallFunction'
|
$ref: '#/components/schemas/OpenAIChatCompletionToolCallFunction'
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- id
|
|
||||||
- type
|
- type
|
||||||
- function
|
|
||||||
title: OpenAIChatCompletionToolCall
|
title: OpenAIChatCompletionToolCall
|
||||||
OpenAIChatCompletionToolCallFunction:
|
OpenAIChatCompletionToolCallFunction:
|
||||||
type: object
|
type: object
|
||||||
|
@ -6149,9 +6149,6 @@ components:
|
||||||
arguments:
|
arguments:
|
||||||
type: string
|
type: string
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
|
||||||
- name
|
|
||||||
- arguments
|
|
||||||
title: OpenAIChatCompletionToolCallFunction
|
title: OpenAIChatCompletionToolCallFunction
|
||||||
OpenAIDeveloperMessageParam:
|
OpenAIDeveloperMessageParam:
|
||||||
type: object
|
type: object
|
||||||
|
@ -6550,7 +6547,7 @@ components:
|
||||||
choices:
|
choices:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/OpenAIChoice'
|
$ref: '#/components/schemas/OpenAIChunkChoice'
|
||||||
description: List of choices
|
description: List of choices
|
||||||
object:
|
object:
|
||||||
type: string
|
type: string
|
||||||
|
@ -6587,8 +6584,11 @@ components:
|
||||||
description: The reason the model stopped generating
|
description: The reason the model stopped generating
|
||||||
index:
|
index:
|
||||||
type: integer
|
type: integer
|
||||||
|
description: The index of the choice
|
||||||
logprobs:
|
logprobs:
|
||||||
$ref: '#/components/schemas/OpenAIChoiceLogprobs'
|
$ref: '#/components/schemas/OpenAIChoiceLogprobs'
|
||||||
|
description: >-
|
||||||
|
(Optional) The log probabilities for the tokens in the message
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- message
|
- message
|
||||||
|
@ -6597,6 +6597,27 @@ components:
|
||||||
title: OpenAIChoice
|
title: OpenAIChoice
|
||||||
description: >-
|
description: >-
|
||||||
A choice from an OpenAI-compatible chat completion response.
|
A choice from an OpenAI-compatible chat completion response.
|
||||||
|
OpenAIChoiceDelta:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
content:
|
||||||
|
type: string
|
||||||
|
description: (Optional) The content of the delta
|
||||||
|
refusal:
|
||||||
|
type: string
|
||||||
|
description: (Optional) The refusal of the delta
|
||||||
|
role:
|
||||||
|
type: string
|
||||||
|
description: (Optional) The role of the delta
|
||||||
|
tool_calls:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/OpenAIChatCompletionToolCall'
|
||||||
|
description: (Optional) The tool calls of the delta
|
||||||
|
additionalProperties: false
|
||||||
|
title: OpenAIChoiceDelta
|
||||||
|
description: >-
|
||||||
|
A delta from an OpenAI-compatible chat completion streaming response.
|
||||||
OpenAIChoiceLogprobs:
|
OpenAIChoiceLogprobs:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -6604,15 +6625,43 @@ components:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/OpenAITokenLogProb'
|
$ref: '#/components/schemas/OpenAITokenLogProb'
|
||||||
|
description: >-
|
||||||
|
(Optional) The log probabilities for the tokens in the message
|
||||||
refusal:
|
refusal:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/OpenAITokenLogProb'
|
$ref: '#/components/schemas/OpenAITokenLogProb'
|
||||||
|
description: >-
|
||||||
|
(Optional) The log probabilities for the tokens in the message
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
title: OpenAIChoiceLogprobs
|
title: OpenAIChoiceLogprobs
|
||||||
description: >-
|
description: >-
|
||||||
The log probabilities for the tokens in the message from an OpenAI-compatible
|
The log probabilities for the tokens in the message from an OpenAI-compatible
|
||||||
chat completion response.
|
chat completion response.
|
||||||
|
OpenAIChunkChoice:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
delta:
|
||||||
|
$ref: '#/components/schemas/OpenAIChoiceDelta'
|
||||||
|
description: The delta from the chunk
|
||||||
|
finish_reason:
|
||||||
|
type: string
|
||||||
|
description: The reason the model stopped generating
|
||||||
|
index:
|
||||||
|
type: integer
|
||||||
|
description: The index of the choice
|
||||||
|
logprobs:
|
||||||
|
$ref: '#/components/schemas/OpenAIChoiceLogprobs'
|
||||||
|
description: >-
|
||||||
|
(Optional) The log probabilities for the tokens in the message
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- delta
|
||||||
|
- finish_reason
|
||||||
|
- index
|
||||||
|
title: OpenAIChunkChoice
|
||||||
|
description: >-
|
||||||
|
A chunk choice from an OpenAI-compatible chat completion streaming response.
|
||||||
OpenAITokenLogProb:
|
OpenAITokenLogProb:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
|
@ -43,7 +43,9 @@ The following models are available by default:
|
||||||
- `groq/llama-3.3-70b-versatile (aliases: meta-llama/Llama-3.3-70B-Instruct)`
|
- `groq/llama-3.3-70b-versatile (aliases: meta-llama/Llama-3.3-70B-Instruct)`
|
||||||
- `groq/llama-3.2-3b-preview (aliases: meta-llama/Llama-3.2-3B-Instruct)`
|
- `groq/llama-3.2-3b-preview (aliases: meta-llama/Llama-3.2-3B-Instruct)`
|
||||||
- `groq/llama-4-scout-17b-16e-instruct (aliases: meta-llama/Llama-4-Scout-17B-16E-Instruct)`
|
- `groq/llama-4-scout-17b-16e-instruct (aliases: meta-llama/Llama-4-Scout-17B-16E-Instruct)`
|
||||||
|
- `groq/meta-llama/llama-4-scout-17b-16e-instruct (aliases: meta-llama/Llama-4-Scout-17B-16E-Instruct)`
|
||||||
- `groq/llama-4-maverick-17b-128e-instruct (aliases: meta-llama/Llama-4-Maverick-17B-128E-Instruct)`
|
- `groq/llama-4-maverick-17b-128e-instruct (aliases: meta-llama/Llama-4-Maverick-17B-128E-Instruct)`
|
||||||
|
- `groq/meta-llama/llama-4-maverick-17b-128e-instruct (aliases: meta-llama/Llama-4-Maverick-17B-128E-Instruct)`
|
||||||
|
|
||||||
|
|
||||||
### Prerequisite: API Keys
|
### Prerequisite: API Keys
|
||||||
|
|
|
@ -503,15 +503,16 @@ class OpenAISystemMessageParam(BaseModel):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class OpenAIChatCompletionToolCallFunction(BaseModel):
|
class OpenAIChatCompletionToolCallFunction(BaseModel):
|
||||||
name: str
|
name: Optional[str] = None
|
||||||
arguments: str
|
arguments: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class OpenAIChatCompletionToolCall(BaseModel):
|
class OpenAIChatCompletionToolCall(BaseModel):
|
||||||
id: str
|
index: Optional[int] = None
|
||||||
|
id: Optional[str] = None
|
||||||
type: Literal["function"] = "function"
|
type: Literal["function"] = "function"
|
||||||
function: OpenAIChatCompletionToolCallFunction
|
function: Optional[OpenAIChatCompletionToolCallFunction] = None
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
|
@ -645,22 +646,54 @@ class OpenAITokenLogProb(BaseModel):
|
||||||
class OpenAIChoiceLogprobs(BaseModel):
|
class OpenAIChoiceLogprobs(BaseModel):
|
||||||
"""The log probabilities for the tokens in the message from an OpenAI-compatible chat completion response.
|
"""The log probabilities for the tokens in the message from an OpenAI-compatible chat completion response.
|
||||||
|
|
||||||
:content: (Optional) The log probabilities for the tokens in the message
|
:param content: (Optional) The log probabilities for the tokens in the message
|
||||||
:refusal: (Optional) The log probabilities for the tokens in the message
|
:param refusal: (Optional) The log probabilities for the tokens in the message
|
||||||
"""
|
"""
|
||||||
|
|
||||||
content: Optional[List[OpenAITokenLogProb]] = None
|
content: Optional[List[OpenAITokenLogProb]] = None
|
||||||
refusal: Optional[List[OpenAITokenLogProb]] = None
|
refusal: Optional[List[OpenAITokenLogProb]] = None
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class OpenAIChoiceDelta(BaseModel):
|
||||||
|
"""A delta from an OpenAI-compatible chat completion streaming response.
|
||||||
|
|
||||||
|
:param content: (Optional) The content of the delta
|
||||||
|
:param refusal: (Optional) The refusal of the delta
|
||||||
|
:param role: (Optional) The role of the delta
|
||||||
|
:param tool_calls: (Optional) The tool calls of the delta
|
||||||
|
"""
|
||||||
|
|
||||||
|
content: Optional[str] = None
|
||||||
|
refusal: Optional[str] = None
|
||||||
|
role: Optional[str] = None
|
||||||
|
tool_calls: Optional[List[OpenAIChatCompletionToolCall]] = None
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class OpenAIChunkChoice(BaseModel):
|
||||||
|
"""A chunk choice from an OpenAI-compatible chat completion streaming response.
|
||||||
|
|
||||||
|
:param delta: The delta from the chunk
|
||||||
|
:param finish_reason: The reason the model stopped generating
|
||||||
|
:param index: The index of the choice
|
||||||
|
:param logprobs: (Optional) The log probabilities for the tokens in the message
|
||||||
|
"""
|
||||||
|
|
||||||
|
delta: OpenAIChoiceDelta
|
||||||
|
finish_reason: str
|
||||||
|
index: int
|
||||||
|
logprobs: Optional[OpenAIChoiceLogprobs] = None
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class OpenAIChoice(BaseModel):
|
class OpenAIChoice(BaseModel):
|
||||||
"""A choice from an OpenAI-compatible chat completion response.
|
"""A choice from an OpenAI-compatible chat completion response.
|
||||||
|
|
||||||
:param message: The message from the model
|
:param message: The message from the model
|
||||||
:param finish_reason: The reason the model stopped generating
|
:param finish_reason: The reason the model stopped generating
|
||||||
:index: The index of the choice
|
:param index: The index of the choice
|
||||||
:logprobs: (Optional) The log probabilities for the tokens in the message
|
:param logprobs: (Optional) The log probabilities for the tokens in the message
|
||||||
"""
|
"""
|
||||||
|
|
||||||
message: OpenAIMessageParam
|
message: OpenAIMessageParam
|
||||||
|
@ -699,7 +732,7 @@ class OpenAIChatCompletionChunk(BaseModel):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
id: str
|
id: str
|
||||||
choices: List[OpenAIChoice]
|
choices: List[OpenAIChunkChoice]
|
||||||
object: Literal["chat.completion.chunk"] = "chat.completion.chunk"
|
object: Literal["chat.completion.chunk"] = "chat.completion.chunk"
|
||||||
created: int
|
created: int
|
||||||
model: str
|
model: str
|
||||||
|
|
|
@ -4,8 +4,24 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from typing import Any, AsyncIterator, Dict, List, Optional, Union
|
||||||
|
|
||||||
|
from openai import AsyncOpenAI
|
||||||
|
|
||||||
|
from llama_stack.apis.inference.inference import (
|
||||||
|
OpenAIChatCompletion,
|
||||||
|
OpenAIChatCompletionChunk,
|
||||||
|
OpenAIChoiceDelta,
|
||||||
|
OpenAIChunkChoice,
|
||||||
|
OpenAIMessageParam,
|
||||||
|
OpenAIResponseFormatParam,
|
||||||
|
OpenAISystemMessageParam,
|
||||||
|
)
|
||||||
from llama_stack.providers.remote.inference.groq.config import GroqConfig
|
from llama_stack.providers.remote.inference.groq.config import GroqConfig
|
||||||
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
|
||||||
|
from llama_stack.providers.utils.inference.openai_compat import (
|
||||||
|
prepare_openai_completion_params,
|
||||||
|
)
|
||||||
|
|
||||||
from .models import MODEL_ENTRIES
|
from .models import MODEL_ENTRIES
|
||||||
|
|
||||||
|
@ -21,9 +37,129 @@ class GroqInferenceAdapter(LiteLLMOpenAIMixin):
|
||||||
provider_data_api_key_field="groq_api_key",
|
provider_data_api_key_field="groq_api_key",
|
||||||
)
|
)
|
||||||
self.config = config
|
self.config = config
|
||||||
|
self._openai_client = None
|
||||||
|
|
||||||
async def initialize(self):
|
async def initialize(self):
|
||||||
await super().initialize()
|
await super().initialize()
|
||||||
|
|
||||||
async def shutdown(self):
|
async def shutdown(self):
|
||||||
await super().shutdown()
|
await super().shutdown()
|
||||||
|
if self._openai_client:
|
||||||
|
await self._openai_client.close()
|
||||||
|
self._openai_client = None
|
||||||
|
|
||||||
|
def _get_openai_client(self) -> AsyncOpenAI:
|
||||||
|
if not self._openai_client:
|
||||||
|
self._openai_client = AsyncOpenAI(
|
||||||
|
base_url=f"{self.config.url}/openai/v1",
|
||||||
|
api_key=self.config.api_key,
|
||||||
|
)
|
||||||
|
return self._openai_client
|
||||||
|
|
||||||
|
async def openai_chat_completion(
|
||||||
|
self,
|
||||||
|
model: str,
|
||||||
|
messages: List[OpenAIMessageParam],
|
||||||
|
frequency_penalty: Optional[float] = None,
|
||||||
|
function_call: Optional[Union[str, Dict[str, Any]]] = None,
|
||||||
|
functions: Optional[List[Dict[str, Any]]] = None,
|
||||||
|
logit_bias: Optional[Dict[str, float]] = None,
|
||||||
|
logprobs: Optional[bool] = None,
|
||||||
|
max_completion_tokens: Optional[int] = None,
|
||||||
|
max_tokens: Optional[int] = None,
|
||||||
|
n: Optional[int] = None,
|
||||||
|
parallel_tool_calls: Optional[bool] = None,
|
||||||
|
presence_penalty: Optional[float] = None,
|
||||||
|
response_format: Optional[OpenAIResponseFormatParam] = None,
|
||||||
|
seed: Optional[int] = None,
|
||||||
|
stop: Optional[Union[str, List[str]]] = None,
|
||||||
|
stream: Optional[bool] = None,
|
||||||
|
stream_options: Optional[Dict[str, Any]] = None,
|
||||||
|
temperature: Optional[float] = None,
|
||||||
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
|
||||||
|
tools: Optional[List[Dict[str, Any]]] = None,
|
||||||
|
top_logprobs: Optional[int] = None,
|
||||||
|
top_p: Optional[float] = None,
|
||||||
|
user: Optional[str] = None,
|
||||||
|
) -> Union[OpenAIChatCompletion, AsyncIterator[OpenAIChatCompletionChunk]]:
|
||||||
|
model_obj = await self.model_store.get_model(model)
|
||||||
|
|
||||||
|
# Groq does not support json_schema response format, so we need to convert it to json_object
|
||||||
|
if response_format and response_format.type == "json_schema":
|
||||||
|
response_format.type = "json_object"
|
||||||
|
schema = response_format.json_schema.get("schema", {})
|
||||||
|
response_format.json_schema = None
|
||||||
|
json_instructions = f"\nYour response should be a JSON object that matches the following schema: {schema}"
|
||||||
|
if messages and messages[0].role == "system":
|
||||||
|
messages[0].content = messages[0].content + json_instructions
|
||||||
|
else:
|
||||||
|
messages.insert(0, OpenAISystemMessageParam(content=json_instructions))
|
||||||
|
|
||||||
|
# Groq returns a 400 error if tools are provided but none are called
|
||||||
|
# So, set tool_choice to "required" to attempt to force a call
|
||||||
|
if tools and (not tool_choice or tool_choice == "auto"):
|
||||||
|
tool_choice = "required"
|
||||||
|
|
||||||
|
params = await prepare_openai_completion_params(
|
||||||
|
model=model_obj.provider_resource_id.replace("groq/", ""),
|
||||||
|
messages=messages,
|
||||||
|
frequency_penalty=frequency_penalty,
|
||||||
|
function_call=function_call,
|
||||||
|
functions=functions,
|
||||||
|
logit_bias=logit_bias,
|
||||||
|
logprobs=logprobs,
|
||||||
|
max_completion_tokens=max_completion_tokens,
|
||||||
|
max_tokens=max_tokens,
|
||||||
|
n=n,
|
||||||
|
parallel_tool_calls=parallel_tool_calls,
|
||||||
|
presence_penalty=presence_penalty,
|
||||||
|
response_format=response_format,
|
||||||
|
seed=seed,
|
||||||
|
stop=stop,
|
||||||
|
stream=stream,
|
||||||
|
stream_options=stream_options,
|
||||||
|
temperature=temperature,
|
||||||
|
tool_choice=tool_choice,
|
||||||
|
tools=tools,
|
||||||
|
top_logprobs=top_logprobs,
|
||||||
|
top_p=top_p,
|
||||||
|
user=user,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Groq does not support streaming requests that set response_format
|
||||||
|
fake_stream = False
|
||||||
|
if stream and response_format:
|
||||||
|
params["stream"] = False
|
||||||
|
fake_stream = True
|
||||||
|
|
||||||
|
response = await self._get_openai_client().chat.completions.create(**params)
|
||||||
|
|
||||||
|
if fake_stream:
|
||||||
|
chunk_choices = []
|
||||||
|
for choice in response.choices:
|
||||||
|
delta = OpenAIChoiceDelta(
|
||||||
|
content=choice.message.content,
|
||||||
|
role=choice.message.role,
|
||||||
|
tool_calls=choice.message.tool_calls,
|
||||||
|
)
|
||||||
|
chunk_choice = OpenAIChunkChoice(
|
||||||
|
delta=delta,
|
||||||
|
finish_reason=choice.finish_reason,
|
||||||
|
index=choice.index,
|
||||||
|
logprobs=None,
|
||||||
|
)
|
||||||
|
chunk_choices.append(chunk_choice)
|
||||||
|
chunk = OpenAIChatCompletionChunk(
|
||||||
|
id=response.id,
|
||||||
|
choices=chunk_choices,
|
||||||
|
object="chat.completion.chunk",
|
||||||
|
created=response.created,
|
||||||
|
model=response.model,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _fake_stream_generator():
|
||||||
|
yield chunk
|
||||||
|
|
||||||
|
return _fake_stream_generator()
|
||||||
|
else:
|
||||||
|
return response
|
||||||
|
|
|
@ -39,8 +39,16 @@ MODEL_ENTRIES = [
|
||||||
"groq/llama-4-scout-17b-16e-instruct",
|
"groq/llama-4-scout-17b-16e-instruct",
|
||||||
CoreModelId.llama4_scout_17b_16e_instruct.value,
|
CoreModelId.llama4_scout_17b_16e_instruct.value,
|
||||||
),
|
),
|
||||||
|
build_hf_repo_model_entry(
|
||||||
|
"groq/meta-llama/llama-4-scout-17b-16e-instruct",
|
||||||
|
CoreModelId.llama4_scout_17b_16e_instruct.value,
|
||||||
|
),
|
||||||
build_hf_repo_model_entry(
|
build_hf_repo_model_entry(
|
||||||
"groq/llama-4-maverick-17b-128e-instruct",
|
"groq/llama-4-maverick-17b-128e-instruct",
|
||||||
CoreModelId.llama4_maverick_17b_128e_instruct.value,
|
CoreModelId.llama4_maverick_17b_128e_instruct.value,
|
||||||
),
|
),
|
||||||
|
build_hf_repo_model_entry(
|
||||||
|
"groq/meta-llama/llama-4-maverick-17b-128e-instruct",
|
||||||
|
CoreModelId.llama4_maverick_17b_128e_instruct.value,
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
|
@ -298,7 +298,7 @@ class LiteLLMOpenAIMixin(
|
||||||
guided_choice=guided_choice,
|
guided_choice=guided_choice,
|
||||||
prompt_logprobs=prompt_logprobs,
|
prompt_logprobs=prompt_logprobs,
|
||||||
)
|
)
|
||||||
return litellm.text_completion(**params)
|
return await litellm.atext_completion(**params)
|
||||||
|
|
||||||
async def openai_chat_completion(
|
async def openai_chat_completion(
|
||||||
self,
|
self,
|
||||||
|
@ -352,7 +352,7 @@ class LiteLLMOpenAIMixin(
|
||||||
top_p=top_p,
|
top_p=top_p,
|
||||||
user=user,
|
user=user,
|
||||||
)
|
)
|
||||||
return litellm.completion(**params)
|
return await litellm.acompletion(**params)
|
||||||
|
|
||||||
async def batch_completion(
|
async def batch_completion(
|
||||||
self,
|
self,
|
||||||
|
|
|
@ -1354,14 +1354,7 @@ class OpenAIChatCompletionToLlamaStackMixin:
|
||||||
i = 0
|
i = 0
|
||||||
async for chunk in response:
|
async for chunk in response:
|
||||||
event = chunk.event
|
event = chunk.event
|
||||||
if event.stop_reason == StopReason.end_of_turn:
|
finish_reason = _convert_stop_reason_to_openai_finish_reason(event.stop_reason)
|
||||||
finish_reason = "stop"
|
|
||||||
elif event.stop_reason == StopReason.end_of_message:
|
|
||||||
finish_reason = "eos"
|
|
||||||
elif event.stop_reason == StopReason.out_of_tokens:
|
|
||||||
finish_reason = "length"
|
|
||||||
else:
|
|
||||||
finish_reason = None
|
|
||||||
|
|
||||||
if isinstance(event.delta, TextDelta):
|
if isinstance(event.delta, TextDelta):
|
||||||
text_delta = event.delta.text
|
text_delta = event.delta.text
|
||||||
|
|
|
@ -386,6 +386,16 @@ models:
|
||||||
provider_id: groq
|
provider_id: groq
|
||||||
provider_model_id: groq/llama-4-scout-17b-16e-instruct
|
provider_model_id: groq/llama-4-scout-17b-16e-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
|
||||||
|
provider_id: groq
|
||||||
|
provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
|
||||||
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||||
|
provider_id: groq
|
||||||
|
provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: groq/llama-4-maverick-17b-128e-instruct
|
model_id: groq/llama-4-maverick-17b-128e-instruct
|
||||||
provider_id: groq
|
provider_id: groq
|
||||||
|
@ -396,6 +406,16 @@ models:
|
||||||
provider_id: groq
|
provider_id: groq
|
||||||
provider_model_id: groq/llama-4-maverick-17b-128e-instruct
|
provider_model_id: groq/llama-4-maverick-17b-128e-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
|
||||||
|
provider_id: groq
|
||||||
|
provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
|
||||||
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
|
||||||
|
provider_id: groq
|
||||||
|
provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
|
||||||
|
model_type: llm
|
||||||
- metadata:
|
- metadata:
|
||||||
embedding_dimension: 384
|
embedding_dimension: 384
|
||||||
model_id: all-MiniLM-L6-v2
|
model_id: all-MiniLM-L6-v2
|
||||||
|
|
|
@ -158,6 +158,16 @@ models:
|
||||||
provider_id: groq
|
provider_id: groq
|
||||||
provider_model_id: groq/llama-4-scout-17b-16e-instruct
|
provider_model_id: groq/llama-4-scout-17b-16e-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
|
||||||
|
provider_id: groq
|
||||||
|
provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
|
||||||
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||||
|
provider_id: groq
|
||||||
|
provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: groq/llama-4-maverick-17b-128e-instruct
|
model_id: groq/llama-4-maverick-17b-128e-instruct
|
||||||
provider_id: groq
|
provider_id: groq
|
||||||
|
@ -168,6 +178,16 @@ models:
|
||||||
provider_id: groq
|
provider_id: groq
|
||||||
provider_model_id: groq/llama-4-maverick-17b-128e-instruct
|
provider_model_id: groq/llama-4-maverick-17b-128e-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
|
||||||
|
provider_id: groq
|
||||||
|
provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
|
||||||
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
|
||||||
|
provider_id: groq
|
||||||
|
provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
|
||||||
|
model_type: llm
|
||||||
- metadata:
|
- metadata:
|
||||||
embedding_dimension: 384
|
embedding_dimension: 384
|
||||||
model_id: all-MiniLM-L6-v2
|
model_id: all-MiniLM-L6-v2
|
||||||
|
|
|
@ -474,6 +474,16 @@ models:
|
||||||
provider_id: groq-openai-compat
|
provider_id: groq-openai-compat
|
||||||
provider_model_id: groq/llama-4-scout-17b-16e-instruct
|
provider_model_id: groq/llama-4-scout-17b-16e-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
|
||||||
|
provider_id: groq-openai-compat
|
||||||
|
provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
|
||||||
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||||
|
provider_id: groq-openai-compat
|
||||||
|
provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: groq/llama-4-maverick-17b-128e-instruct
|
model_id: groq/llama-4-maverick-17b-128e-instruct
|
||||||
provider_id: groq-openai-compat
|
provider_id: groq-openai-compat
|
||||||
|
@ -484,6 +494,16 @@ models:
|
||||||
provider_id: groq-openai-compat
|
provider_id: groq-openai-compat
|
||||||
provider_model_id: groq/llama-4-maverick-17b-128e-instruct
|
provider_model_id: groq/llama-4-maverick-17b-128e-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
|
||||||
|
provider_id: groq-openai-compat
|
||||||
|
provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
|
||||||
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
|
||||||
|
provider_id: groq-openai-compat
|
||||||
|
provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: Meta-Llama-3.1-8B-Instruct
|
model_id: Meta-Llama-3.1-8B-Instruct
|
||||||
provider_id: sambanova-openai-compat
|
provider_id: sambanova-openai-compat
|
||||||
|
|
14
tests/verifications/conf/groq-llama-stack.yaml
Normal file
14
tests/verifications/conf/groq-llama-stack.yaml
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
base_url: http://localhost:8321/v1/openai/v1
|
||||||
|
api_key_var: GROQ_API_KEY
|
||||||
|
models:
|
||||||
|
- groq/llama-3.3-70b-versatile
|
||||||
|
- groq/llama-4-scout-17b-16e-instruct
|
||||||
|
- groq/llama-4-maverick-17b-128e-instruct
|
||||||
|
model_display_names:
|
||||||
|
groq/llama-3.3-70b-versatile: Llama-3.3-70B-Instruct
|
||||||
|
groq/llama-4-scout-17b-16e-instruct: Llama-4-Scout-Instruct
|
||||||
|
groq/llama-4-maverick-17b-128e-instruct: Llama-4-Maverick-Instruct
|
||||||
|
test_exclusions:
|
||||||
|
groq/llama-3.3-70b-versatile:
|
||||||
|
- test_chat_non_streaming_image
|
||||||
|
- test_chat_streaming_image
|
|
@ -2,12 +2,12 @@ base_url: https://api.groq.com/openai/v1
|
||||||
api_key_var: GROQ_API_KEY
|
api_key_var: GROQ_API_KEY
|
||||||
models:
|
models:
|
||||||
- llama-3.3-70b-versatile
|
- llama-3.3-70b-versatile
|
||||||
- llama-4-scout-17b-16e-instruct
|
- meta-llama/llama-4-scout-17b-16e-instruct
|
||||||
- llama-4-maverick-17b-128e-instruct
|
- meta-llama/llama-4-maverick-17b-128e-instruct
|
||||||
model_display_names:
|
model_display_names:
|
||||||
llama-3.3-70b-versatile: Llama-3.3-70B-Instruct
|
llama-3.3-70b-versatile: Llama-3.3-70B-Instruct
|
||||||
llama-4-scout-17b-16e-instruct: Llama-4-Scout-Instruct
|
meta-llama/llama-4-scout-17b-16e-instruct: Llama-4-Scout-Instruct
|
||||||
llama-4-maverick-17b-128e-instruct: Llama-4-Maverick-Instruct
|
meta-llama/llama-4-maverick-17b-128e-instruct: Llama-4-Maverick-Instruct
|
||||||
test_exclusions:
|
test_exclusions:
|
||||||
llama-3.3-70b-versatile:
|
llama-3.3-70b-versatile:
|
||||||
- test_chat_non_streaming_image
|
- test_chat_non_streaming_image
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
base_url: http://localhost:8321/v1/openai/v1
|
base_url: http://localhost:8321/v1/openai/v1
|
||||||
api_key_var: OPENAI_API_KEY
|
api_key_var: OPENAI_API_KEY
|
||||||
models:
|
models:
|
||||||
- gpt-4o
|
- openai/gpt-4o
|
||||||
- gpt-4o-mini
|
- openai/gpt-4o-mini
|
||||||
model_display_names:
|
model_display_names:
|
||||||
gpt-4o: gpt-4o
|
openai/gpt-4o: gpt-4o
|
||||||
gpt-4o-mini: gpt-4o-mini
|
openai/gpt-4o-mini: gpt-4o-mini
|
||||||
test_exclusions: {}
|
test_exclusions: {}
|
||||||
|
|
|
@ -75,6 +75,7 @@ PROVIDER_ORDER = [
|
||||||
"openai",
|
"openai",
|
||||||
"together-llama-stack",
|
"together-llama-stack",
|
||||||
"fireworks-llama-stack",
|
"fireworks-llama-stack",
|
||||||
|
"groq-llama-stack",
|
||||||
"openai-llama-stack",
|
"openai-llama-stack",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
@ -17,6 +17,11 @@ providers:
|
||||||
config:
|
config:
|
||||||
url: https://api.fireworks.ai/inference/v1
|
url: https://api.fireworks.ai/inference/v1
|
||||||
api_key: ${env.FIREWORKS_API_KEY}
|
api_key: ${env.FIREWORKS_API_KEY}
|
||||||
|
- provider_id: groq
|
||||||
|
provider_type: remote::groq
|
||||||
|
config:
|
||||||
|
url: https://api.groq.com
|
||||||
|
api_key: ${env.GROQ_API_KEY}
|
||||||
- provider_id: openai
|
- provider_id: openai
|
||||||
provider_type: remote::openai
|
provider_type: remote::openai
|
||||||
config:
|
config:
|
||||||
|
@ -98,6 +103,21 @@ models:
|
||||||
provider_id: fireworks
|
provider_id: fireworks
|
||||||
provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
|
provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: groq/llama-3.3-70b-versatile
|
||||||
|
provider_id: groq
|
||||||
|
provider_model_id: groq/llama-3.3-70b-versatile
|
||||||
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: groq/llama-4-scout-17b-16e-instruct
|
||||||
|
provider_id: groq
|
||||||
|
provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
|
||||||
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: groq/llama-4-maverick-17b-128e-instruct
|
||||||
|
provider_id: groq
|
||||||
|
provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: openai/gpt-4o
|
model_id: openai/gpt-4o
|
||||||
provider_id: openai
|
provider_id: openai
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue