Improve groq OpenAI API compatibility

This doesn't get Groq to 100% on the OpenAI API verification tests,
but it does get it to 88.2% when Llama Stack is in the middle,
compared to the 61.8% results for using an OpenAI client against Groq
directly.

The groq provider doesn't use litellm under the covers in its
openai_chat_completion endpoint, and instead directly uses an
AsyncOpenAI client with some special handling to improve conformance
of responses for response_format usage and tool calling.

Signed-off-by: Ben Browning <bbrownin@redhat.com>
This commit is contained in:
Ben Browning 2025-04-13 13:35:53 -04:00
parent 657bb12e85
commit 8a1c0a1008
16 changed files with 418 additions and 45 deletions

View file

@ -503,15 +503,16 @@ class OpenAISystemMessageParam(BaseModel):
@json_schema_type
class OpenAIChatCompletionToolCallFunction(BaseModel):
name: str
arguments: str
name: Optional[str] = None
arguments: Optional[str] = None
@json_schema_type
class OpenAIChatCompletionToolCall(BaseModel):
id: str
index: Optional[int] = None
id: Optional[str] = None
type: Literal["function"] = "function"
function: OpenAIChatCompletionToolCallFunction
function: Optional[OpenAIChatCompletionToolCallFunction] = None
@json_schema_type
@ -645,22 +646,54 @@ class OpenAITokenLogProb(BaseModel):
class OpenAIChoiceLogprobs(BaseModel):
"""The log probabilities for the tokens in the message from an OpenAI-compatible chat completion response.
:content: (Optional) The log probabilities for the tokens in the message
:refusal: (Optional) The log probabilities for the tokens in the message
:param content: (Optional) The log probabilities for the tokens in the message
:param refusal: (Optional) The log probabilities for the tokens in the message
"""
content: Optional[List[OpenAITokenLogProb]] = None
refusal: Optional[List[OpenAITokenLogProb]] = None
@json_schema_type
class OpenAIChoiceDelta(BaseModel):
"""A delta from an OpenAI-compatible chat completion streaming response.
:param content: (Optional) The content of the delta
:param refusal: (Optional) The refusal of the delta
:param role: (Optional) The role of the delta
:param tool_calls: (Optional) The tool calls of the delta
"""
content: Optional[str] = None
refusal: Optional[str] = None
role: Optional[str] = None
tool_calls: Optional[List[OpenAIChatCompletionToolCall]] = None
@json_schema_type
class OpenAIChunkChoice(BaseModel):
"""A chunk choice from an OpenAI-compatible chat completion streaming response.
:param delta: The delta from the chunk
:param finish_reason: The reason the model stopped generating
:param index: The index of the choice
:param logprobs: (Optional) The log probabilities for the tokens in the message
"""
delta: OpenAIChoiceDelta
finish_reason: str
index: int
logprobs: Optional[OpenAIChoiceLogprobs] = None
@json_schema_type
class OpenAIChoice(BaseModel):
"""A choice from an OpenAI-compatible chat completion response.
:param message: The message from the model
:param finish_reason: The reason the model stopped generating
:index: The index of the choice
:logprobs: (Optional) The log probabilities for the tokens in the message
:param index: The index of the choice
:param logprobs: (Optional) The log probabilities for the tokens in the message
"""
message: OpenAIMessageParam
@ -699,7 +732,7 @@ class OpenAIChatCompletionChunk(BaseModel):
"""
id: str
choices: List[OpenAIChoice]
choices: List[OpenAIChunkChoice]
object: Literal["chat.completion.chunk"] = "chat.completion.chunk"
created: int
model: str