Improve groq OpenAI API compatibility

This doesn't get Groq to 100% on the OpenAI API verification tests,
but it does get it to 88.2% when Llama Stack is in the middle,
compared to the 61.8% results for using an OpenAI client against Groq
directly.

The groq provider doesn't use litellm under the covers in its
openai_chat_completion endpoint, and instead directly uses an
AsyncOpenAI client with some special handling to improve conformance
of responses for response_format usage and tool calling.

Signed-off-by: Ben Browning <bbrownin@redhat.com>
This commit is contained in:
Ben Browning 2025-04-13 13:35:53 -04:00
parent 657bb12e85
commit 8a1c0a1008
16 changed files with 418 additions and 45 deletions

View file

@ -0,0 +1,14 @@
base_url: http://localhost:8321/v1/openai/v1
api_key_var: GROQ_API_KEY
models:
- groq/llama-3.3-70b-versatile
- groq/llama-4-scout-17b-16e-instruct
- groq/llama-4-maverick-17b-128e-instruct
model_display_names:
groq/llama-3.3-70b-versatile: Llama-3.3-70B-Instruct
groq/llama-4-scout-17b-16e-instruct: Llama-4-Scout-Instruct
groq/llama-4-maverick-17b-128e-instruct: Llama-4-Maverick-Instruct
test_exclusions:
groq/llama-3.3-70b-versatile:
- test_chat_non_streaming_image
- test_chat_streaming_image

View file

@ -2,12 +2,12 @@ base_url: https://api.groq.com/openai/v1
api_key_var: GROQ_API_KEY
models:
- llama-3.3-70b-versatile
- llama-4-scout-17b-16e-instruct
- llama-4-maverick-17b-128e-instruct
- meta-llama/llama-4-scout-17b-16e-instruct
- meta-llama/llama-4-maverick-17b-128e-instruct
model_display_names:
llama-3.3-70b-versatile: Llama-3.3-70B-Instruct
llama-4-scout-17b-16e-instruct: Llama-4-Scout-Instruct
llama-4-maverick-17b-128e-instruct: Llama-4-Maverick-Instruct
meta-llama/llama-4-scout-17b-16e-instruct: Llama-4-Scout-Instruct
meta-llama/llama-4-maverick-17b-128e-instruct: Llama-4-Maverick-Instruct
test_exclusions:
llama-3.3-70b-versatile:
- test_chat_non_streaming_image

View file

@ -1,9 +1,9 @@
base_url: http://localhost:8321/v1/openai/v1
api_key_var: OPENAI_API_KEY
models:
- gpt-4o
- gpt-4o-mini
- openai/gpt-4o
- openai/gpt-4o-mini
model_display_names:
gpt-4o: gpt-4o
gpt-4o-mini: gpt-4o-mini
openai/gpt-4o: gpt-4o
openai/gpt-4o-mini: gpt-4o-mini
test_exclusions: {}

View file

@ -75,6 +75,7 @@ PROVIDER_ORDER = [
"openai",
"together-llama-stack",
"fireworks-llama-stack",
"groq-llama-stack",
"openai-llama-stack",
]

View file

@ -17,6 +17,11 @@ providers:
config:
url: https://api.fireworks.ai/inference/v1
api_key: ${env.FIREWORKS_API_KEY}
- provider_id: groq
provider_type: remote::groq
config:
url: https://api.groq.com
api_key: ${env.GROQ_API_KEY}
- provider_id: openai
provider_type: remote::openai
config:
@ -98,6 +103,21 @@ models:
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
model_type: llm
- metadata: {}
model_id: groq/llama-3.3-70b-versatile
provider_id: groq
provider_model_id: groq/llama-3.3-70b-versatile
model_type: llm
- metadata: {}
model_id: groq/llama-4-scout-17b-16e-instruct
provider_id: groq
provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
model_type: llm
- metadata: {}
model_id: groq/llama-4-maverick-17b-128e-instruct
provider_id: groq
provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
model_type: llm
- metadata: {}
model_id: openai/gpt-4o
provider_id: openai