mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-31 03:29:59 +00:00
Improve groq OpenAI API compatibility
This doesn't get Groq to 100% on the OpenAI API verification tests, but it does get it to 88.2% when Llama Stack is in the middle, compared to the 61.8% results for using an OpenAI client against Groq directly. The groq provider doesn't use litellm under the covers in its openai_chat_completion endpoint, and instead directly uses an AsyncOpenAI client with some special handling to improve conformance of responses for response_format usage and tool calling. Signed-off-by: Ben Browning <bbrownin@redhat.com>
This commit is contained in:
parent
657bb12e85
commit
8a1c0a1008
16 changed files with 418 additions and 45 deletions
|
|
@ -503,15 +503,16 @@ class OpenAISystemMessageParam(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class OpenAIChatCompletionToolCallFunction(BaseModel):
|
||||
name: str
|
||||
arguments: str
|
||||
name: Optional[str] = None
|
||||
arguments: Optional[str] = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIChatCompletionToolCall(BaseModel):
|
||||
id: str
|
||||
index: Optional[int] = None
|
||||
id: Optional[str] = None
|
||||
type: Literal["function"] = "function"
|
||||
function: OpenAIChatCompletionToolCallFunction
|
||||
function: Optional[OpenAIChatCompletionToolCallFunction] = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
|
|
@ -645,22 +646,54 @@ class OpenAITokenLogProb(BaseModel):
|
|||
class OpenAIChoiceLogprobs(BaseModel):
|
||||
"""The log probabilities for the tokens in the message from an OpenAI-compatible chat completion response.
|
||||
|
||||
:content: (Optional) The log probabilities for the tokens in the message
|
||||
:refusal: (Optional) The log probabilities for the tokens in the message
|
||||
:param content: (Optional) The log probabilities for the tokens in the message
|
||||
:param refusal: (Optional) The log probabilities for the tokens in the message
|
||||
"""
|
||||
|
||||
content: Optional[List[OpenAITokenLogProb]] = None
|
||||
refusal: Optional[List[OpenAITokenLogProb]] = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIChoiceDelta(BaseModel):
|
||||
"""A delta from an OpenAI-compatible chat completion streaming response.
|
||||
|
||||
:param content: (Optional) The content of the delta
|
||||
:param refusal: (Optional) The refusal of the delta
|
||||
:param role: (Optional) The role of the delta
|
||||
:param tool_calls: (Optional) The tool calls of the delta
|
||||
"""
|
||||
|
||||
content: Optional[str] = None
|
||||
refusal: Optional[str] = None
|
||||
role: Optional[str] = None
|
||||
tool_calls: Optional[List[OpenAIChatCompletionToolCall]] = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIChunkChoice(BaseModel):
|
||||
"""A chunk choice from an OpenAI-compatible chat completion streaming response.
|
||||
|
||||
:param delta: The delta from the chunk
|
||||
:param finish_reason: The reason the model stopped generating
|
||||
:param index: The index of the choice
|
||||
:param logprobs: (Optional) The log probabilities for the tokens in the message
|
||||
"""
|
||||
|
||||
delta: OpenAIChoiceDelta
|
||||
finish_reason: str
|
||||
index: int
|
||||
logprobs: Optional[OpenAIChoiceLogprobs] = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIChoice(BaseModel):
|
||||
"""A choice from an OpenAI-compatible chat completion response.
|
||||
|
||||
:param message: The message from the model
|
||||
:param finish_reason: The reason the model stopped generating
|
||||
:index: The index of the choice
|
||||
:logprobs: (Optional) The log probabilities for the tokens in the message
|
||||
:param index: The index of the choice
|
||||
:param logprobs: (Optional) The log probabilities for the tokens in the message
|
||||
"""
|
||||
|
||||
message: OpenAIMessageParam
|
||||
|
|
@ -699,7 +732,7 @@ class OpenAIChatCompletionChunk(BaseModel):
|
|||
"""
|
||||
|
||||
id: str
|
||||
choices: List[OpenAIChoice]
|
||||
choices: List[OpenAIChunkChoice]
|
||||
object: Literal["chat.completion.chunk"] = "chat.completion.chunk"
|
||||
created: int
|
||||
model: str
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue