Improve groq OpenAI API compatibility

This doesn't get Groq to 100% on the OpenAI API verification tests, but it does get it to 88.2% when Llama Stack is in the middle, compared to the 61.8% results for using an OpenAI client against Groq directly. The groq provider doesn't use litellm under the covers in its openai_chat_completion endpoint, and instead directly uses an AsyncOpenAI client with some special handling to improve conformance of responses for response_format usage and tool calling. Signed-off-by: Ben Browning <bbrownin@redhat.com>
2025-12-30 17:10:01 +00:00 · 2025-04-13 13:35:53 -04:00 · 2025-04-13 13:35:53 -04:00 · 8a1c0a1008
commit 8a1c0a1008
parent 657bb12e85
16 changed files with 418 additions and 45 deletions
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@ -8923,6 +8923,9 @@
            "OpenAIChatCompletionToolCall": {
                "type": "object",
                "properties": {
                    "index": {
                        "type": "integer"
                    },
                    "id": {
                        "type": "string"
                    },
@ -8937,9 +8940,7 @@
                },
                "additionalProperties": false,
                "required": [
-                    "id",
+                    "type"
                    "type",
                    "function"
                ],
                "title": "OpenAIChatCompletionToolCall"
            },
@ -8954,10 +8955,6 @@
                    }
                },
                "additionalProperties": false,
                "required": [
                    "name",
                    "arguments"
                ],
                "title": "OpenAIChatCompletionToolCallFunction"
            },
            "OpenAIDeveloperMessageParam": {
@ -9563,7 +9560,7 @@
                    "choices": {
                        "type": "array",
                        "items": {
-                            "$ref": "#/components/schemas/OpenAIChoice"
+                            "$ref": "#/components/schemas/OpenAIChunkChoice"
                        },
                        "description": "List of choices"
                    },
@ -9605,10 +9602,12 @@
                        "description": "The reason the model stopped generating"
                    },
                    "index": {
-                        "type": "integer"
+                        "type": "integer",
                        "description": "The index of the choice"
                    },
                    "logprobs": {
-                        "$ref": "#/components/schemas/OpenAIChoiceLogprobs"
+                        "$ref": "#/components/schemas/OpenAIChoiceLogprobs",
                        "description": "(Optional) The log probabilities for the tokens in the message"
                    }
                },
                "additionalProperties": false,
@ -9620,6 +9619,33 @@
                "title": "OpenAIChoice",
                "description": "A choice from an OpenAI-compatible chat completion response."
            },
            "OpenAIChoiceDelta": {
                "type": "object",
                "properties": {
                    "content": {
                        "type": "string",
                        "description": "(Optional) The content of the delta"
                    },
                    "refusal": {
                        "type": "string",
                        "description": "(Optional) The refusal of the delta"
                    },
                    "role": {
                        "type": "string",
                        "description": "(Optional) The role of the delta"
                    },
                    "tool_calls": {
                        "type": "array",
                        "items": {
                            "$ref": "#/components/schemas/OpenAIChatCompletionToolCall"
                        },
                        "description": "(Optional) The tool calls of the delta"
                    }
                },
                "additionalProperties": false,
                "title": "OpenAIChoiceDelta",
                "description": "A delta from an OpenAI-compatible chat completion streaming response."
            },
            "OpenAIChoiceLogprobs": {
                "type": "object",
                "properties": {
@ -9627,19 +9653,50 @@
                        "type": "array",
                        "items": {
                            "$ref": "#/components/schemas/OpenAITokenLogProb"
-                        }
+                        },
                        "description": "(Optional) The log probabilities for the tokens in the message"
                    },
                    "refusal": {
                        "type": "array",
                        "items": {
                            "$ref": "#/components/schemas/OpenAITokenLogProb"
-                        }
+                        },
                        "description": "(Optional) The log probabilities for the tokens in the message"
                    }
                },
                "additionalProperties": false,
                "title": "OpenAIChoiceLogprobs",
                "description": "The log probabilities for the tokens in the message from an OpenAI-compatible chat completion response."
            },
            "OpenAIChunkChoice": {
                "type": "object",
                "properties": {
                    "delta": {
                        "$ref": "#/components/schemas/OpenAIChoiceDelta",
                        "description": "The delta from the chunk"
                    },
                    "finish_reason": {
                        "type": "string",
                        "description": "The reason the model stopped generating"
                    },
                    "index": {
                        "type": "integer",
                        "description": "The index of the choice"
                    },
                    "logprobs": {
                        "$ref": "#/components/schemas/OpenAIChoiceLogprobs",
                        "description": "(Optional) The log probabilities for the tokens in the message"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "delta",
                    "finish_reason",
                    "index"
                ],
                "title": "OpenAIChunkChoice",
                "description": "A chunk choice from an OpenAI-compatible chat completion streaming response."
            },
            "OpenAITokenLogProb": {
                "type": "object",
                "properties": {
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -6127,6 +6127,8 @@ components:
    OpenAIChatCompletionToolCall:
      type: object
      properties:
        index:
          type: integer
        id:
          type: string
        type:
@ -6137,9 +6139,7 @@ components:
          $ref: '#/components/schemas/OpenAIChatCompletionToolCallFunction'
      additionalProperties: false
      required:
        - id
        - type
        - function
      title: OpenAIChatCompletionToolCall
    OpenAIChatCompletionToolCallFunction:
      type: object
@ -6149,9 +6149,6 @@ components:
        arguments:
          type: string
      additionalProperties: false
      required:
        - name
        - arguments
      title: OpenAIChatCompletionToolCallFunction
    OpenAIDeveloperMessageParam:
      type: object
@ -6550,7 +6547,7 @@ components:
        choices:
          type: array
          items:
-            $ref: '#/components/schemas/OpenAIChoice'
+            $ref: '#/components/schemas/OpenAIChunkChoice'
          description: List of choices
        object:
          type: string
@ -6587,8 +6584,11 @@ components:
          description: The reason the model stopped generating
        index:
          type: integer
          description: The index of the choice
        logprobs:
          $ref: '#/components/schemas/OpenAIChoiceLogprobs'
          description: >-
            (Optional) The log probabilities for the tokens in the message
      additionalProperties: false
      required:
        - message
@ -6597,6 +6597,27 @@ components:
      title: OpenAIChoice
      description: >-
        A choice from an OpenAI-compatible chat completion response.
    OpenAIChoiceDelta:
      type: object
      properties:
        content:
          type: string
          description: (Optional) The content of the delta
        refusal:
          type: string
          description: (Optional) The refusal of the delta
        role:
          type: string
          description: (Optional) The role of the delta
        tool_calls:
          type: array
          items:
            $ref: '#/components/schemas/OpenAIChatCompletionToolCall'
          description: (Optional) The tool calls of the delta
      additionalProperties: false
      title: OpenAIChoiceDelta
      description: >-
        A delta from an OpenAI-compatible chat completion streaming response.
    OpenAIChoiceLogprobs:
      type: object
      properties:
@ -6604,15 +6625,43 @@ components:
          type: array
          items:
            $ref: '#/components/schemas/OpenAITokenLogProb'
          description: >-
            (Optional) The log probabilities for the tokens in the message
        refusal:
          type: array
          items:
            $ref: '#/components/schemas/OpenAITokenLogProb'
          description: >-
            (Optional) The log probabilities for the tokens in the message
      additionalProperties: false
      title: OpenAIChoiceLogprobs
      description: >-
        The log probabilities for the tokens in the message from an OpenAI-compatible
        chat completion response.
    OpenAIChunkChoice:
      type: object
      properties:
        delta:
          $ref: '#/components/schemas/OpenAIChoiceDelta'
          description: The delta from the chunk
        finish_reason:
          type: string
          description: The reason the model stopped generating
        index:
          type: integer
          description: The index of the choice
        logprobs:
          $ref: '#/components/schemas/OpenAIChoiceLogprobs'
          description: >-
            (Optional) The log probabilities for the tokens in the message
      additionalProperties: false
      required:
        - delta
        - finish_reason
        - index
      title: OpenAIChunkChoice
      description: >-
        A chunk choice from an OpenAI-compatible chat completion streaming response.
    OpenAITokenLogProb:
      type: object
      properties:
--- a/docs/source/distributions/self_hosted_distro/groq.md
+++ b/docs/source/distributions/self_hosted_distro/groq.md
@ -43,7 +43,9 @@ The following models are available by default:
 - `groq/llama-3.3-70b-versatile (aliases: meta-llama/Llama-3.3-70B-Instruct)`
 - `groq/llama-3.2-3b-preview (aliases: meta-llama/Llama-3.2-3B-Instruct)`
 - `groq/llama-4-scout-17b-16e-instruct (aliases: meta-llama/Llama-4-Scout-17B-16E-Instruct)`
 - `groq/meta-llama/llama-4-scout-17b-16e-instruct (aliases: meta-llama/Llama-4-Scout-17B-16E-Instruct)`
 - `groq/llama-4-maverick-17b-128e-instruct (aliases: meta-llama/Llama-4-Maverick-17B-128E-Instruct)`
 - `groq/meta-llama/llama-4-maverick-17b-128e-instruct (aliases: meta-llama/Llama-4-Maverick-17B-128E-Instruct)`
 ### Prerequisite: API Keys
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@ -503,15 +503,16 @@ class OpenAISystemMessageParam(BaseModel):
@json_schema_type
 class OpenAIChatCompletionToolCallFunction(BaseModel):
-    name: str
+    name: Optional[str] = None
-    arguments: str
+    arguments: Optional[str] = None
@json_schema_type
 class OpenAIChatCompletionToolCall(BaseModel):
-    id: str
+    index: Optional[int] = None
    id: Optional[str] = None
    type: Literal["function"] = "function"
-    function: OpenAIChatCompletionToolCallFunction
+    function: Optional[OpenAIChatCompletionToolCallFunction] = None
@json_schema_type
@ -645,22 +646,54 @@ class OpenAITokenLogProb(BaseModel):
 class OpenAIChoiceLogprobs(BaseModel):
    """The log probabilities for the tokens in the message from an OpenAI-compatible chat completion response.
-    :content: (Optional) The log probabilities for the tokens in the message
+    :param content: (Optional) The log probabilities for the tokens in the message
-    :refusal: (Optional) The log probabilities for the tokens in the message
+    :param refusal: (Optional) The log probabilities for the tokens in the message
    """
    content: Optional[List[OpenAITokenLogProb]] = None
    refusal: Optional[List[OpenAITokenLogProb]] = None
@json_schema_type
 class OpenAIChoiceDelta(BaseModel):
    """A delta from an OpenAI-compatible chat completion streaming response.
    :param content: (Optional) The content of the delta
    :param refusal: (Optional) The refusal of the delta
    :param role: (Optional) The role of the delta
    :param tool_calls: (Optional) The tool calls of the delta
    """
    content: Optional[str] = None
    refusal: Optional[str] = None
    role: Optional[str] = None
    tool_calls: Optional[List[OpenAIChatCompletionToolCall]] = None
@json_schema_type
 class OpenAIChunkChoice(BaseModel):
    """A chunk choice from an OpenAI-compatible chat completion streaming response.
    :param delta: The delta from the chunk
    :param finish_reason: The reason the model stopped generating
    :param index: The index of the choice
    :param logprobs: (Optional) The log probabilities for the tokens in the message
    """
    delta: OpenAIChoiceDelta
    finish_reason: str
    index: int
    logprobs: Optional[OpenAIChoiceLogprobs] = None
@json_schema_type
 class OpenAIChoice(BaseModel):
    """A choice from an OpenAI-compatible chat completion response.
    :param message: The message from the model
    :param finish_reason: The reason the model stopped generating
-    :index: The index of the choice
+    :param index: The index of the choice
-    :logprobs: (Optional) The log probabilities for the tokens in the message
+    :param logprobs: (Optional) The log probabilities for the tokens in the message
    """
    message: OpenAIMessageParam
@ -699,7 +732,7 @@ class OpenAIChatCompletionChunk(BaseModel):
    """
    id: str
-    choices: List[OpenAIChoice]
+    choices: List[OpenAIChunkChoice]
    object: Literal["chat.completion.chunk"] = "chat.completion.chunk"
    created: int
    model: str
--- a/llama_stack/providers/remote/inference/groq/groq.py
+++ b/llama_stack/providers/remote/inference/groq/groq.py
@ -4,8 +4,24 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from typing import Any, AsyncIterator, Dict, List, Optional, Union
 from openai import AsyncOpenAI
 from llama_stack.apis.inference.inference import (
    OpenAIChatCompletion,
    OpenAIChatCompletionChunk,
    OpenAIChoiceDelta,
    OpenAIChunkChoice,
    OpenAIMessageParam,
    OpenAIResponseFormatParam,
    OpenAISystemMessageParam,
 )
 from llama_stack.providers.remote.inference.groq.config import GroqConfig
 from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
 from llama_stack.providers.utils.inference.openai_compat import (
    prepare_openai_completion_params,
 )
 from .models import MODEL_ENTRIES
@ -21,9 +37,129 @@ class GroqInferenceAdapter(LiteLLMOpenAIMixin):
            provider_data_api_key_field="groq_api_key",
        )
        self.config = config
        self._openai_client = None
    async def initialize(self):
        await super().initialize()
    async def shutdown(self):
        await super().shutdown()
        if self._openai_client:
            await self._openai_client.close()
            self._openai_client = None
    def _get_openai_client(self) -> AsyncOpenAI:
        if not self._openai_client:
            self._openai_client = AsyncOpenAI(
                base_url=f"{self.config.url}/openai/v1",
                api_key=self.config.api_key,
            )
        return self._openai_client
    async def openai_chat_completion(
        self,
        model: str,
        messages: List[OpenAIMessageParam],
        frequency_penalty: Optional[float] = None,
        function_call: Optional[Union[str, Dict[str, Any]]] = None,
        functions: Optional[List[Dict[str, Any]]] = None,
        logit_bias: Optional[Dict[str, float]] = None,
        logprobs: Optional[bool] = None,
        max_completion_tokens: Optional[int] = None,
        max_tokens: Optional[int] = None,
        n: Optional[int] = None,
        parallel_tool_calls: Optional[bool] = None,
        presence_penalty: Optional[float] = None,
        response_format: Optional[OpenAIResponseFormatParam] = None,
        seed: Optional[int] = None,
        stop: Optional[Union[str, List[str]]] = None,
        stream: Optional[bool] = None,
        stream_options: Optional[Dict[str, Any]] = None,
        temperature: Optional[float] = None,
        tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
        tools: Optional[List[Dict[str, Any]]] = None,
        top_logprobs: Optional[int] = None,
        top_p: Optional[float] = None,
        user: Optional[str] = None,
    ) -> Union[OpenAIChatCompletion, AsyncIterator[OpenAIChatCompletionChunk]]:
        model_obj = await self.model_store.get_model(model)
        # Groq does not support json_schema response format, so we need to convert it to json_object
        if response_format and response_format.type == "json_schema":
            response_format.type = "json_object"
            schema = response_format.json_schema.get("schema", {})
            response_format.json_schema = None
            json_instructions = f"\nYour response should be a JSON object that matches the following schema: {schema}"
            if messages and messages[0].role == "system":
                messages[0].content = messages[0].content + json_instructions
            else:
                messages.insert(0, OpenAISystemMessageParam(content=json_instructions))
        # Groq returns a 400 error if tools are provided but none are called
        # So, set tool_choice to "required" to attempt to force a call
        if tools and (not tool_choice or tool_choice == "auto"):
            tool_choice = "required"
        params = await prepare_openai_completion_params(
            model=model_obj.provider_resource_id.replace("groq/", ""),
            messages=messages,
            frequency_penalty=frequency_penalty,
            function_call=function_call,
            functions=functions,
            logit_bias=logit_bias,
            logprobs=logprobs,
            max_completion_tokens=max_completion_tokens,
            max_tokens=max_tokens,
            n=n,
            parallel_tool_calls=parallel_tool_calls,
            presence_penalty=presence_penalty,
            response_format=response_format,
            seed=seed,
            stop=stop,
            stream=stream,
            stream_options=stream_options,
            temperature=temperature,
            tool_choice=tool_choice,
            tools=tools,
            top_logprobs=top_logprobs,
            top_p=top_p,
            user=user,
        )
        # Groq does not support streaming requests that set response_format
        fake_stream = False
        if stream and response_format:
            params["stream"] = False
            fake_stream = True
        response = await self._get_openai_client().chat.completions.create(**params)
        if fake_stream:
            chunk_choices = []
            for choice in response.choices:
                delta = OpenAIChoiceDelta(
                    content=choice.message.content,
                    role=choice.message.role,
                    tool_calls=choice.message.tool_calls,
                )
                chunk_choice = OpenAIChunkChoice(
                    delta=delta,
                    finish_reason=choice.finish_reason,
                    index=choice.index,
                    logprobs=None,
                )
                chunk_choices.append(chunk_choice)
            chunk = OpenAIChatCompletionChunk(
                id=response.id,
                choices=chunk_choices,
                object="chat.completion.chunk",
                created=response.created,
                model=response.model,
            )
            async def _fake_stream_generator():
                yield chunk
            return _fake_stream_generator()
        else:
            return response
--- a/llama_stack/providers/remote/inference/groq/models.py
+++ b/llama_stack/providers/remote/inference/groq/models.py
@ -39,8 +39,16 @@ MODEL_ENTRIES = [
        "groq/llama-4-scout-17b-16e-instruct",
        CoreModelId.llama4_scout_17b_16e_instruct.value,
    ),
    build_hf_repo_model_entry(
        "groq/meta-llama/llama-4-scout-17b-16e-instruct",
        CoreModelId.llama4_scout_17b_16e_instruct.value,
    ),
    build_hf_repo_model_entry(
        "groq/llama-4-maverick-17b-128e-instruct",
        CoreModelId.llama4_maverick_17b_128e_instruct.value,
    ),
    build_hf_repo_model_entry(
        "groq/meta-llama/llama-4-maverick-17b-128e-instruct",
        CoreModelId.llama4_maverick_17b_128e_instruct.value,
    ),
 ]
--- a/llama_stack/providers/utils/inference/litellm_openai_mixin.py
+++ b/llama_stack/providers/utils/inference/litellm_openai_mixin.py
@ -298,7 +298,7 @@ class LiteLLMOpenAIMixin(
            guided_choice=guided_choice,
            prompt_logprobs=prompt_logprobs,
        )
-        return litellm.text_completion(**params)
+        return await litellm.atext_completion(**params)
    async def openai_chat_completion(
        self,
@ -352,7 +352,7 @@ class LiteLLMOpenAIMixin(
            top_p=top_p,
            user=user,
        )
-        return litellm.completion(**params)
+        return await litellm.acompletion(**params)
    async def batch_completion(
        self,
--- a/llama_stack/providers/utils/inference/openai_compat.py
+++ b/llama_stack/providers/utils/inference/openai_compat.py
@ -1354,14 +1354,7 @@ class OpenAIChatCompletionToLlamaStackMixin:
            i = 0
            async for chunk in response:
                event = chunk.event
-                if event.stop_reason == StopReason.end_of_turn:
+                finish_reason = _convert_stop_reason_to_openai_finish_reason(event.stop_reason)
                    finish_reason = "stop"
                elif event.stop_reason == StopReason.end_of_message:
                    finish_reason = "eos"
                elif event.stop_reason == StopReason.out_of_tokens:
                    finish_reason = "length"
                else:
                    finish_reason = None
                if isinstance(event.delta, TextDelta):
                    text_delta = event.delta.text
--- a/llama_stack/templates/dev/run.yaml
+++ b/llama_stack/templates/dev/run.yaml
@ -386,6 +386,16 @@ models:
  provider_id: groq
  provider_model_id: groq/llama-4-scout-17b-16e-instruct
  model_type: llm
 - metadata: {}
  model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
  provider_id: groq
  provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
  provider_id: groq
  provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
  model_type: llm
 - metadata: {}
  model_id: groq/llama-4-maverick-17b-128e-instruct
  provider_id: groq
@ -396,6 +406,16 @@ models:
  provider_id: groq
  provider_model_id: groq/llama-4-maverick-17b-128e-instruct
  model_type: llm
 - metadata: {}
  model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
  provider_id: groq
  provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
  provider_id: groq
  provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
  model_type: llm
 - metadata:
    embedding_dimension: 384
  model_id: all-MiniLM-L6-v2
--- a/llama_stack/templates/groq/run.yaml
+++ b/llama_stack/templates/groq/run.yaml
@ -158,6 +158,16 @@ models:
  provider_id: groq
  provider_model_id: groq/llama-4-scout-17b-16e-instruct
  model_type: llm
 - metadata: {}
  model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
  provider_id: groq
  provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
  provider_id: groq
  provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
  model_type: llm
 - metadata: {}
  model_id: groq/llama-4-maverick-17b-128e-instruct
  provider_id: groq
@ -168,6 +178,16 @@ models:
  provider_id: groq
  provider_model_id: groq/llama-4-maverick-17b-128e-instruct
  model_type: llm
 - metadata: {}
  model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
  provider_id: groq
  provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
  provider_id: groq
  provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
  model_type: llm
 - metadata:
    embedding_dimension: 384
  model_id: all-MiniLM-L6-v2
--- a/llama_stack/templates/verification/run.yaml
+++ b/llama_stack/templates/verification/run.yaml
@ -474,6 +474,16 @@ models:
  provider_id: groq-openai-compat
  provider_model_id: groq/llama-4-scout-17b-16e-instruct
  model_type: llm
 - metadata: {}
  model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
  provider_id: groq-openai-compat
  provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
  provider_id: groq-openai-compat
  provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
  model_type: llm
 - metadata: {}
  model_id: groq/llama-4-maverick-17b-128e-instruct
  provider_id: groq-openai-compat
@ -484,6 +494,16 @@ models:
  provider_id: groq-openai-compat
  provider_model_id: groq/llama-4-maverick-17b-128e-instruct
  model_type: llm
 - metadata: {}
  model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
  provider_id: groq-openai-compat
  provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
  provider_id: groq-openai-compat
  provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
  model_type: llm
 - metadata: {}
  model_id: Meta-Llama-3.1-8B-Instruct
  provider_id: sambanova-openai-compat
--- a/tests/verifications/conf/groq-llama-stack.yaml
+++ b/tests/verifications/conf/groq-llama-stack.yaml
@ -0,0 +1,14 @@
 base_url: http://localhost:8321/v1/openai/v1
 api_key_var: GROQ_API_KEY
 models:
 - groq/llama-3.3-70b-versatile
 - groq/llama-4-scout-17b-16e-instruct
 - groq/llama-4-maverick-17b-128e-instruct
 model_display_names:
  groq/llama-3.3-70b-versatile: Llama-3.3-70B-Instruct
  groq/llama-4-scout-17b-16e-instruct: Llama-4-Scout-Instruct
  groq/llama-4-maverick-17b-128e-instruct: Llama-4-Maverick-Instruct
 test_exclusions:
  groq/llama-3.3-70b-versatile:
  - test_chat_non_streaming_image
  - test_chat_streaming_image
--- a/tests/verifications/conf/groq.yaml
+++ b/tests/verifications/conf/groq.yaml
@ -2,12 +2,12 @@ base_url: https://api.groq.com/openai/v1
 api_key_var: GROQ_API_KEY
 models:
 - llama-3.3-70b-versatile
- llama-4-scout-17b-16e-instruct
+- meta-llama/llama-4-scout-17b-16e-instruct
- llama-4-maverick-17b-128e-instruct
+- meta-llama/llama-4-maverick-17b-128e-instruct
 model_display_names:
  llama-3.3-70b-versatile: Llama-3.3-70B-Instruct
-  llama-4-scout-17b-16e-instruct: Llama-4-Scout-Instruct
+  meta-llama/llama-4-scout-17b-16e-instruct: Llama-4-Scout-Instruct
-  llama-4-maverick-17b-128e-instruct: Llama-4-Maverick-Instruct
+  meta-llama/llama-4-maverick-17b-128e-instruct: Llama-4-Maverick-Instruct
 test_exclusions:
  llama-3.3-70b-versatile:
  - test_chat_non_streaming_image
--- a/tests/verifications/conf/openai-llama-stack.yaml
+++ b/tests/verifications/conf/openai-llama-stack.yaml
@ -1,9 +1,9 @@
 base_url: http://localhost:8321/v1/openai/v1
 api_key_var: OPENAI_API_KEY
 models:
- gpt-4o
+- openai/gpt-4o
- gpt-4o-mini
+- openai/gpt-4o-mini
 model_display_names:
-  gpt-4o: gpt-4o
+  openai/gpt-4o: gpt-4o
-  gpt-4o-mini: gpt-4o-mini
+  openai/gpt-4o-mini: gpt-4o-mini
 test_exclusions: {}
--- a/tests/verifications/generate_report.py
+++ b/tests/verifications/generate_report.py
@ -75,6 +75,7 @@ PROVIDER_ORDER = [
    "openai",
    "together-llama-stack",
    "fireworks-llama-stack",
    "groq-llama-stack",
    "openai-llama-stack",
 ]
--- a/tests/verifications/openai-api-verification-run.yaml
+++ b/tests/verifications/openai-api-verification-run.yaml
@ -17,6 +17,11 @@ providers:
    config:
      url: https://api.fireworks.ai/inference/v1
      api_key: ${env.FIREWORKS_API_KEY}
  - provider_id: groq
    provider_type: remote::groq
    config:
      url: https://api.groq.com
      api_key: ${env.GROQ_API_KEY}
  - provider_id: openai
    provider_type: remote::openai
    config:
@ -98,6 +103,21 @@ models:
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
  model_type: llm
 - metadata: {}
  model_id: groq/llama-3.3-70b-versatile
  provider_id: groq
  provider_model_id: groq/llama-3.3-70b-versatile
  model_type: llm
 - metadata: {}
  model_id: groq/llama-4-scout-17b-16e-instruct
  provider_id: groq
  provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
  model_type: llm
 - metadata: {}
  model_id: groq/llama-4-maverick-17b-128e-instruct
  provider_id: groq
  provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
  model_type: llm
 - metadata: {}
  model_id: openai/gpt-4o
  provider_id: openai