mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-31 05:13:53 +00:00
Improve groq OpenAI API compatibility
This doesn't get Groq to 100% on the OpenAI API verification tests, but it does get it to 88.2% when Llama Stack is in the middle, compared to the 61.8% results for using an OpenAI client against Groq directly. The groq provider doesn't use litellm under the covers in its openai_chat_completion endpoint, and instead directly uses an AsyncOpenAI client with some special handling to improve conformance of responses for response_format usage and tool calling. Signed-off-by: Ben Browning <bbrownin@redhat.com>
This commit is contained in:
parent
657bb12e85
commit
8a1c0a1008
16 changed files with 418 additions and 45 deletions
14
tests/verifications/conf/groq-llama-stack.yaml
Normal file
14
tests/verifications/conf/groq-llama-stack.yaml
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
base_url: http://localhost:8321/v1/openai/v1
|
||||
api_key_var: GROQ_API_KEY
|
||||
models:
|
||||
- groq/llama-3.3-70b-versatile
|
||||
- groq/llama-4-scout-17b-16e-instruct
|
||||
- groq/llama-4-maverick-17b-128e-instruct
|
||||
model_display_names:
|
||||
groq/llama-3.3-70b-versatile: Llama-3.3-70B-Instruct
|
||||
groq/llama-4-scout-17b-16e-instruct: Llama-4-Scout-Instruct
|
||||
groq/llama-4-maverick-17b-128e-instruct: Llama-4-Maverick-Instruct
|
||||
test_exclusions:
|
||||
groq/llama-3.3-70b-versatile:
|
||||
- test_chat_non_streaming_image
|
||||
- test_chat_streaming_image
|
||||
|
|
@ -2,12 +2,12 @@ base_url: https://api.groq.com/openai/v1
|
|||
api_key_var: GROQ_API_KEY
|
||||
models:
|
||||
- llama-3.3-70b-versatile
|
||||
- llama-4-scout-17b-16e-instruct
|
||||
- llama-4-maverick-17b-128e-instruct
|
||||
- meta-llama/llama-4-scout-17b-16e-instruct
|
||||
- meta-llama/llama-4-maverick-17b-128e-instruct
|
||||
model_display_names:
|
||||
llama-3.3-70b-versatile: Llama-3.3-70B-Instruct
|
||||
llama-4-scout-17b-16e-instruct: Llama-4-Scout-Instruct
|
||||
llama-4-maverick-17b-128e-instruct: Llama-4-Maverick-Instruct
|
||||
meta-llama/llama-4-scout-17b-16e-instruct: Llama-4-Scout-Instruct
|
||||
meta-llama/llama-4-maverick-17b-128e-instruct: Llama-4-Maverick-Instruct
|
||||
test_exclusions:
|
||||
llama-3.3-70b-versatile:
|
||||
- test_chat_non_streaming_image
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
base_url: http://localhost:8321/v1/openai/v1
|
||||
api_key_var: OPENAI_API_KEY
|
||||
models:
|
||||
- gpt-4o
|
||||
- gpt-4o-mini
|
||||
- openai/gpt-4o
|
||||
- openai/gpt-4o-mini
|
||||
model_display_names:
|
||||
gpt-4o: gpt-4o
|
||||
gpt-4o-mini: gpt-4o-mini
|
||||
openai/gpt-4o: gpt-4o
|
||||
openai/gpt-4o-mini: gpt-4o-mini
|
||||
test_exclusions: {}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue