diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 124e6b0fa..296b32e18 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -8923,6 +8923,9 @@
             "OpenAIChatCompletionToolCall": {
                 "type": "object",
                 "properties": {
+                    "index": {
+                        "type": "integer"
+                    },
                     "id": {
                         "type": "string"
                     },
@@ -8937,9 +8940,7 @@
                 },
                 "additionalProperties": false,
                 "required": [
-                    "id",
-                    "type",
-                    "function"
+                    "type"
                 ],
                 "title": "OpenAIChatCompletionToolCall"
             },
@@ -8954,10 +8955,6 @@
                     }
                 },
                 "additionalProperties": false,
-                "required": [
-                    "name",
-                    "arguments"
-                ],
                 "title": "OpenAIChatCompletionToolCallFunction"
             },
             "OpenAIDeveloperMessageParam": {
@@ -9563,7 +9560,7 @@
                     "choices": {
                         "type": "array",
                         "items": {
-                            "$ref": "#/components/schemas/OpenAIChoice"
+                            "$ref": "#/components/schemas/OpenAIChunkChoice"
                         },
                         "description": "List of choices"
                     },
@@ -9605,10 +9602,12 @@
                         "description": "The reason the model stopped generating"
                     },
                     "index": {
-                        "type": "integer"
+                        "type": "integer",
+                        "description": "The index of the choice"
                     },
                     "logprobs": {
-                        "$ref": "#/components/schemas/OpenAIChoiceLogprobs"
+                        "$ref": "#/components/schemas/OpenAIChoiceLogprobs",
+                        "description": "(Optional) The log probabilities for the tokens in the message"
                     }
                 },
                 "additionalProperties": false,
@@ -9620,6 +9619,33 @@
                 "title": "OpenAIChoice",
                 "description": "A choice from an OpenAI-compatible chat completion response."
             },
+            "OpenAIChoiceDelta": {
+                "type": "object",
+                "properties": {
+                    "content": {
+                        "type": "string",
+                        "description": "(Optional) The content of the delta"
+                    },
+                    "refusal": {
+                        "type": "string",
+                        "description": "(Optional) The refusal of the delta"
+                    },
+                    "role": {
+                        "type": "string",
+                        "description": "(Optional) The role of the delta"
+                    },
+                    "tool_calls": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAIChatCompletionToolCall"
+                        },
+                        "description": "(Optional) The tool calls of the delta"
+                    }
+                },
+                "additionalProperties": false,
+                "title": "OpenAIChoiceDelta",
+                "description": "A delta from an OpenAI-compatible chat completion streaming response."
+            },
             "OpenAIChoiceLogprobs": {
                 "type": "object",
                 "properties": {
@@ -9627,19 +9653,50 @@
                         "type": "array",
                         "items": {
                             "$ref": "#/components/schemas/OpenAITokenLogProb"
-                        }
+                        },
+                        "description": "(Optional) The log probabilities for the tokens in the message"
                     },
                     "refusal": {
                         "type": "array",
                         "items": {
                             "$ref": "#/components/schemas/OpenAITokenLogProb"
-                        }
+                        },
+                        "description": "(Optional) The log probabilities for the tokens in the message"
                     }
                 },
                 "additionalProperties": false,
                 "title": "OpenAIChoiceLogprobs",
                 "description": "The log probabilities for the tokens in the message from an OpenAI-compatible chat completion response."
             },
+            "OpenAIChunkChoice": {
+                "type": "object",
+                "properties": {
+                    "delta": {
+                        "$ref": "#/components/schemas/OpenAIChoiceDelta",
+                        "description": "The delta from the chunk"
+                    },
+                    "finish_reason": {
+                        "type": "string",
+                        "description": "The reason the model stopped generating"
+                    },
+                    "index": {
+                        "type": "integer",
+                        "description": "The index of the choice"
+                    },
+                    "logprobs": {
+                        "$ref": "#/components/schemas/OpenAIChoiceLogprobs",
+                        "description": "(Optional) The log probabilities for the tokens in the message"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "delta",
+                    "finish_reason",
+                    "index"
+                ],
+                "title": "OpenAIChunkChoice",
+                "description": "A chunk choice from an OpenAI-compatible chat completion streaming response."
+            },
             "OpenAITokenLogProb": {
                 "type": "object",
                 "properties": {
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index 781fbc618..7a983ccc0 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -6127,6 +6127,8 @@ components:
     OpenAIChatCompletionToolCall:
       type: object
       properties:
+        index:
+          type: integer
         id:
           type: string
         type:
@@ -6137,9 +6139,7 @@ components:
           $ref: '#/components/schemas/OpenAIChatCompletionToolCallFunction'
       additionalProperties: false
       required:
-        - id
         - type
-        - function
       title: OpenAIChatCompletionToolCall
     OpenAIChatCompletionToolCallFunction:
       type: object
@@ -6149,9 +6149,6 @@ components:
         arguments:
           type: string
       additionalProperties: false
-      required:
-        - name
-        - arguments
       title: OpenAIChatCompletionToolCallFunction
     OpenAIDeveloperMessageParam:
       type: object
@@ -6550,7 +6547,7 @@ components:
         choices:
           type: array
           items:
-            $ref: '#/components/schemas/OpenAIChoice'
+            $ref: '#/components/schemas/OpenAIChunkChoice'
           description: List of choices
         object:
           type: string
@@ -6587,8 +6584,11 @@ components:
           description: The reason the model stopped generating
         index:
           type: integer
+          description: The index of the choice
         logprobs:
           $ref: '#/components/schemas/OpenAIChoiceLogprobs'
+          description: >-
+            (Optional) The log probabilities for the tokens in the message
       additionalProperties: false
       required:
         - message
@@ -6597,6 +6597,27 @@ components:
       title: OpenAIChoice
       description: >-
         A choice from an OpenAI-compatible chat completion response.
+    OpenAIChoiceDelta:
+      type: object
+      properties:
+        content:
+          type: string
+          description: (Optional) The content of the delta
+        refusal:
+          type: string
+          description: (Optional) The refusal of the delta
+        role:
+          type: string
+          description: (Optional) The role of the delta
+        tool_calls:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIChatCompletionToolCall'
+          description: (Optional) The tool calls of the delta
+      additionalProperties: false
+      title: OpenAIChoiceDelta
+      description: >-
+        A delta from an OpenAI-compatible chat completion streaming response.
     OpenAIChoiceLogprobs:
       type: object
       properties:
@@ -6604,15 +6625,43 @@ components:
           type: array
           items:
             $ref: '#/components/schemas/OpenAITokenLogProb'
+          description: >-
+            (Optional) The log probabilities for the tokens in the message
         refusal:
           type: array
           items:
             $ref: '#/components/schemas/OpenAITokenLogProb'
+          description: >-
+            (Optional) The log probabilities for the tokens in the message
       additionalProperties: false
       title: OpenAIChoiceLogprobs
       description: >-
         The log probabilities for the tokens in the message from an OpenAI-compatible
         chat completion response.
+    OpenAIChunkChoice:
+      type: object
+      properties:
+        delta:
+          $ref: '#/components/schemas/OpenAIChoiceDelta'
+          description: The delta from the chunk
+        finish_reason:
+          type: string
+          description: The reason the model stopped generating
+        index:
+          type: integer
+          description: The index of the choice
+        logprobs:
+          $ref: '#/components/schemas/OpenAIChoiceLogprobs'
+          description: >-
+            (Optional) The log probabilities for the tokens in the message
+      additionalProperties: false
+      required:
+        - delta
+        - finish_reason
+        - index
+      title: OpenAIChunkChoice
+      description: >-
+        A chunk choice from an OpenAI-compatible chat completion streaming response.
     OpenAITokenLogProb:
       type: object
       properties:
diff --git a/docs/source/distributions/self_hosted_distro/groq.md b/docs/source/distributions/self_hosted_distro/groq.md
index 4f5a8a859..b18be1b2f 100644
--- a/docs/source/distributions/self_hosted_distro/groq.md
+++ b/docs/source/distributions/self_hosted_distro/groq.md
@@ -43,7 +43,9 @@ The following models are available by default:
 - `groq/llama-3.3-70b-versatile (aliases: meta-llama/Llama-3.3-70B-Instruct)`
 - `groq/llama-3.2-3b-preview (aliases: meta-llama/Llama-3.2-3B-Instruct)`
 - `groq/llama-4-scout-17b-16e-instruct (aliases: meta-llama/Llama-4-Scout-17B-16E-Instruct)`
+- `groq/meta-llama/llama-4-scout-17b-16e-instruct (aliases: meta-llama/Llama-4-Scout-17B-16E-Instruct)`
 - `groq/llama-4-maverick-17b-128e-instruct (aliases: meta-llama/Llama-4-Maverick-17B-128E-Instruct)`
+- `groq/meta-llama/llama-4-maverick-17b-128e-instruct (aliases: meta-llama/Llama-4-Maverick-17B-128E-Instruct)`
 
 
 ### Prerequisite: API Keys
diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py
index 1f3e64dd6..596efb136 100644
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@@ -503,15 +503,16 @@ class OpenAISystemMessageParam(BaseModel):
 
 @json_schema_type
 class OpenAIChatCompletionToolCallFunction(BaseModel):
-    name: str
-    arguments: str
+    name: Optional[str] = None
+    arguments: Optional[str] = None
 
 
 @json_schema_type
 class OpenAIChatCompletionToolCall(BaseModel):
-    id: str
+    index: Optional[int] = None
+    id: Optional[str] = None
     type: Literal["function"] = "function"
-    function: OpenAIChatCompletionToolCallFunction
+    function: Optional[OpenAIChatCompletionToolCallFunction] = None
 
 
 @json_schema_type
@@ -645,22 +646,54 @@ class OpenAITokenLogProb(BaseModel):
 class OpenAIChoiceLogprobs(BaseModel):
     """The log probabilities for the tokens in the message from an OpenAI-compatible chat completion response.
 
-    :content: (Optional) The log probabilities for the tokens in the message
-    :refusal: (Optional) The log probabilities for the tokens in the message
+    :param content: (Optional) The log probabilities for the tokens in the message
+    :param refusal: (Optional) The log probabilities for the tokens in the message
     """
 
     content: Optional[List[OpenAITokenLogProb]] = None
     refusal: Optional[List[OpenAITokenLogProb]] = None
 
 
+@json_schema_type
+class OpenAIChoiceDelta(BaseModel):
+    """A delta from an OpenAI-compatible chat completion streaming response.
+
+    :param content: (Optional) The content of the delta
+    :param refusal: (Optional) The refusal of the delta
+    :param role: (Optional) The role of the delta
+    :param tool_calls: (Optional) The tool calls of the delta
+    """
+
+    content: Optional[str] = None
+    refusal: Optional[str] = None
+    role: Optional[str] = None
+    tool_calls: Optional[List[OpenAIChatCompletionToolCall]] = None
+
+
+@json_schema_type
+class OpenAIChunkChoice(BaseModel):
+    """A chunk choice from an OpenAI-compatible chat completion streaming response.
+
+    :param delta: The delta from the chunk
+    :param finish_reason: The reason the model stopped generating
+    :param index: The index of the choice
+    :param logprobs: (Optional) The log probabilities for the tokens in the message
+    """
+
+    delta: OpenAIChoiceDelta
+    finish_reason: str
+    index: int
+    logprobs: Optional[OpenAIChoiceLogprobs] = None
+
+
 @json_schema_type
 class OpenAIChoice(BaseModel):
     """A choice from an OpenAI-compatible chat completion response.
 
     :param message: The message from the model
     :param finish_reason: The reason the model stopped generating
-    :index: The index of the choice
-    :logprobs: (Optional) The log probabilities for the tokens in the message
+    :param index: The index of the choice
+    :param logprobs: (Optional) The log probabilities for the tokens in the message
     """
 
     message: OpenAIMessageParam
@@ -699,7 +732,7 @@ class OpenAIChatCompletionChunk(BaseModel):
     """
 
     id: str
-    choices: List[OpenAIChoice]
+    choices: List[OpenAIChunkChoice]
     object: Literal["chat.completion.chunk"] = "chat.completion.chunk"
     created: int
     model: str
diff --git a/llama_stack/providers/remote/inference/groq/groq.py b/llama_stack/providers/remote/inference/groq/groq.py
index c8789434f..f3f14e9af 100644
--- a/llama_stack/providers/remote/inference/groq/groq.py
+++ b/llama_stack/providers/remote/inference/groq/groq.py
@@ -4,8 +4,24 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+from typing import Any, AsyncIterator, Dict, List, Optional, Union
+
+from openai import AsyncOpenAI
+
+from llama_stack.apis.inference.inference import (
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAIChoiceDelta,
+    OpenAIChunkChoice,
+    OpenAIMessageParam,
+    OpenAIResponseFormatParam,
+    OpenAISystemMessageParam,
+)
 from llama_stack.providers.remote.inference.groq.config import GroqConfig
 from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
+from llama_stack.providers.utils.inference.openai_compat import (
+    prepare_openai_completion_params,
+)
 
 from .models import MODEL_ENTRIES
 
@@ -21,9 +37,129 @@ class GroqInferenceAdapter(LiteLLMOpenAIMixin):
             provider_data_api_key_field="groq_api_key",
         )
         self.config = config
+        self._openai_client = None
 
     async def initialize(self):
         await super().initialize()
 
     async def shutdown(self):
         await super().shutdown()
+        if self._openai_client:
+            await self._openai_client.close()
+            self._openai_client = None
+
+    def _get_openai_client(self) -> AsyncOpenAI:
+        if not self._openai_client:
+            self._openai_client = AsyncOpenAI(
+                base_url=f"{self.config.url}/openai/v1",
+                api_key=self.config.api_key,
+            )
+        return self._openai_client
+
+    async def openai_chat_completion(
+        self,
+        model: str,
+        messages: List[OpenAIMessageParam],
+        frequency_penalty: Optional[float] = None,
+        function_call: Optional[Union[str, Dict[str, Any]]] = None,
+        functions: Optional[List[Dict[str, Any]]] = None,
+        logit_bias: Optional[Dict[str, float]] = None,
+        logprobs: Optional[bool] = None,
+        max_completion_tokens: Optional[int] = None,
+        max_tokens: Optional[int] = None,
+        n: Optional[int] = None,
+        parallel_tool_calls: Optional[bool] = None,
+        presence_penalty: Optional[float] = None,
+        response_format: Optional[OpenAIResponseFormatParam] = None,
+        seed: Optional[int] = None,
+        stop: Optional[Union[str, List[str]]] = None,
+        stream: Optional[bool] = None,
+        stream_options: Optional[Dict[str, Any]] = None,
+        temperature: Optional[float] = None,
+        tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
+        tools: Optional[List[Dict[str, Any]]] = None,
+        top_logprobs: Optional[int] = None,
+        top_p: Optional[float] = None,
+        user: Optional[str] = None,
+    ) -> Union[OpenAIChatCompletion, AsyncIterator[OpenAIChatCompletionChunk]]:
+        model_obj = await self.model_store.get_model(model)
+
+        # Groq does not support json_schema response format, so we need to convert it to json_object
+        if response_format and response_format.type == "json_schema":
+            response_format.type = "json_object"
+            schema = response_format.json_schema.get("schema", {})
+            response_format.json_schema = None
+            json_instructions = f"\nYour response should be a JSON object that matches the following schema: {schema}"
+            if messages and messages[0].role == "system":
+                messages[0].content = messages[0].content + json_instructions
+            else:
+                messages.insert(0, OpenAISystemMessageParam(content=json_instructions))
+
+        # Groq returns a 400 error if tools are provided but none are called
+        # So, set tool_choice to "required" to attempt to force a call
+        if tools and (not tool_choice or tool_choice == "auto"):
+            tool_choice = "required"
+
+        params = await prepare_openai_completion_params(
+            model=model_obj.provider_resource_id.replace("groq/", ""),
+            messages=messages,
+            frequency_penalty=frequency_penalty,
+            function_call=function_call,
+            functions=functions,
+            logit_bias=logit_bias,
+            logprobs=logprobs,
+            max_completion_tokens=max_completion_tokens,
+            max_tokens=max_tokens,
+            n=n,
+            parallel_tool_calls=parallel_tool_calls,
+            presence_penalty=presence_penalty,
+            response_format=response_format,
+            seed=seed,
+            stop=stop,
+            stream=stream,
+            stream_options=stream_options,
+            temperature=temperature,
+            tool_choice=tool_choice,
+            tools=tools,
+            top_logprobs=top_logprobs,
+            top_p=top_p,
+            user=user,
+        )
+
+        # Groq does not support streaming requests that set response_format
+        fake_stream = False
+        if stream and response_format:
+            params["stream"] = False
+            fake_stream = True
+
+        response = await self._get_openai_client().chat.completions.create(**params)
+
+        if fake_stream:
+            chunk_choices = []
+            for choice in response.choices:
+                delta = OpenAIChoiceDelta(
+                    content=choice.message.content,
+                    role=choice.message.role,
+                    tool_calls=choice.message.tool_calls,
+                )
+                chunk_choice = OpenAIChunkChoice(
+                    delta=delta,
+                    finish_reason=choice.finish_reason,
+                    index=choice.index,
+                    logprobs=None,
+                )
+                chunk_choices.append(chunk_choice)
+            chunk = OpenAIChatCompletionChunk(
+                id=response.id,
+                choices=chunk_choices,
+                object="chat.completion.chunk",
+                created=response.created,
+                model=response.model,
+            )
+
+            async def _fake_stream_generator():
+                yield chunk
+
+            return _fake_stream_generator()
+        else:
+            return response
diff --git a/llama_stack/providers/remote/inference/groq/models.py b/llama_stack/providers/remote/inference/groq/models.py
index d0c10ca62..0b4b81cfe 100644
--- a/llama_stack/providers/remote/inference/groq/models.py
+++ b/llama_stack/providers/remote/inference/groq/models.py
@@ -39,8 +39,16 @@ MODEL_ENTRIES = [
         "groq/llama-4-scout-17b-16e-instruct",
         CoreModelId.llama4_scout_17b_16e_instruct.value,
     ),
+    build_hf_repo_model_entry(
+        "groq/meta-llama/llama-4-scout-17b-16e-instruct",
+        CoreModelId.llama4_scout_17b_16e_instruct.value,
+    ),
     build_hf_repo_model_entry(
         "groq/llama-4-maverick-17b-128e-instruct",
         CoreModelId.llama4_maverick_17b_128e_instruct.value,
     ),
+    build_hf_repo_model_entry(
+        "groq/meta-llama/llama-4-maverick-17b-128e-instruct",
+        CoreModelId.llama4_maverick_17b_128e_instruct.value,
+    ),
 ]
diff --git a/llama_stack/providers/utils/inference/litellm_openai_mixin.py b/llama_stack/providers/utils/inference/litellm_openai_mixin.py
index 95e8b767b..efe7031f5 100644
--- a/llama_stack/providers/utils/inference/litellm_openai_mixin.py
+++ b/llama_stack/providers/utils/inference/litellm_openai_mixin.py
@@ -298,7 +298,7 @@ class LiteLLMOpenAIMixin(
             guided_choice=guided_choice,
             prompt_logprobs=prompt_logprobs,
         )
-        return litellm.text_completion(**params)
+        return await litellm.atext_completion(**params)
 
     async def openai_chat_completion(
         self,
@@ -352,7 +352,7 @@ class LiteLLMOpenAIMixin(
             top_p=top_p,
             user=user,
         )
-        return litellm.completion(**params)
+        return await litellm.acompletion(**params)
 
     async def batch_completion(
         self,
diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py
index 2fcfa341e..d98261abb 100644
--- a/llama_stack/providers/utils/inference/openai_compat.py
+++ b/llama_stack/providers/utils/inference/openai_compat.py
@@ -1354,14 +1354,7 @@ class OpenAIChatCompletionToLlamaStackMixin:
             i = 0
             async for chunk in response:
                 event = chunk.event
-                if event.stop_reason == StopReason.end_of_turn:
-                    finish_reason = "stop"
-                elif event.stop_reason == StopReason.end_of_message:
-                    finish_reason = "eos"
-                elif event.stop_reason == StopReason.out_of_tokens:
-                    finish_reason = "length"
-                else:
-                    finish_reason = None
+                finish_reason = _convert_stop_reason_to_openai_finish_reason(event.stop_reason)
 
                 if isinstance(event.delta, TextDelta):
                     text_delta = event.delta.text
diff --git a/llama_stack/templates/dev/run.yaml b/llama_stack/templates/dev/run.yaml
index ea3b7252a..0dd056405 100644
--- a/llama_stack/templates/dev/run.yaml
+++ b/llama_stack/templates/dev/run.yaml
@@ -386,6 +386,16 @@ models:
   provider_id: groq
   provider_model_id: groq/llama-4-scout-17b-16e-instruct
   model_type: llm
+- metadata: {}
+  model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
+  provider_id: groq
+  provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
+  provider_id: groq
+  provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
+  model_type: llm
 - metadata: {}
   model_id: groq/llama-4-maverick-17b-128e-instruct
   provider_id: groq
@@ -396,6 +406,16 @@ models:
   provider_id: groq
   provider_model_id: groq/llama-4-maverick-17b-128e-instruct
   model_type: llm
+- metadata: {}
+  model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
+  provider_id: groq
+  provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
+  provider_id: groq
+  provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
+  model_type: llm
 - metadata:
     embedding_dimension: 384
   model_id: all-MiniLM-L6-v2
diff --git a/llama_stack/templates/groq/run.yaml b/llama_stack/templates/groq/run.yaml
index f557e64fd..444452dcb 100644
--- a/llama_stack/templates/groq/run.yaml
+++ b/llama_stack/templates/groq/run.yaml
@@ -158,6 +158,16 @@ models:
   provider_id: groq
   provider_model_id: groq/llama-4-scout-17b-16e-instruct
   model_type: llm
+- metadata: {}
+  model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
+  provider_id: groq
+  provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
+  provider_id: groq
+  provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
+  model_type: llm
 - metadata: {}
   model_id: groq/llama-4-maverick-17b-128e-instruct
   provider_id: groq
@@ -168,6 +178,16 @@ models:
   provider_id: groq
   provider_model_id: groq/llama-4-maverick-17b-128e-instruct
   model_type: llm
+- metadata: {}
+  model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
+  provider_id: groq
+  provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
+  provider_id: groq
+  provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
+  model_type: llm
 - metadata:
     embedding_dimension: 384
   model_id: all-MiniLM-L6-v2
diff --git a/llama_stack/templates/verification/run.yaml b/llama_stack/templates/verification/run.yaml
index b6c2ca98d..454ecba5b 100644
--- a/llama_stack/templates/verification/run.yaml
+++ b/llama_stack/templates/verification/run.yaml
@@ -474,6 +474,16 @@ models:
   provider_id: groq-openai-compat
   provider_model_id: groq/llama-4-scout-17b-16e-instruct
   model_type: llm
+- metadata: {}
+  model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
+  provider_id: groq-openai-compat
+  provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
+  provider_id: groq-openai-compat
+  provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
+  model_type: llm
 - metadata: {}
   model_id: groq/llama-4-maverick-17b-128e-instruct
   provider_id: groq-openai-compat
@@ -484,6 +494,16 @@ models:
   provider_id: groq-openai-compat
   provider_model_id: groq/llama-4-maverick-17b-128e-instruct
   model_type: llm
+- metadata: {}
+  model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
+  provider_id: groq-openai-compat
+  provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
+  provider_id: groq-openai-compat
+  provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
+  model_type: llm
 - metadata: {}
   model_id: Meta-Llama-3.1-8B-Instruct
   provider_id: sambanova-openai-compat
diff --git a/tests/verifications/conf/groq-llama-stack.yaml b/tests/verifications/conf/groq-llama-stack.yaml
new file mode 100644
index 000000000..fd5e9abec
--- /dev/null
+++ b/tests/verifications/conf/groq-llama-stack.yaml
@@ -0,0 +1,14 @@
+base_url: http://localhost:8321/v1/openai/v1
+api_key_var: GROQ_API_KEY
+models:
+- groq/llama-3.3-70b-versatile
+- groq/llama-4-scout-17b-16e-instruct
+- groq/llama-4-maverick-17b-128e-instruct
+model_display_names:
+  groq/llama-3.3-70b-versatile: Llama-3.3-70B-Instruct
+  groq/llama-4-scout-17b-16e-instruct: Llama-4-Scout-Instruct
+  groq/llama-4-maverick-17b-128e-instruct: Llama-4-Maverick-Instruct
+test_exclusions:
+  groq/llama-3.3-70b-versatile:
+  - test_chat_non_streaming_image
+  - test_chat_streaming_image
diff --git a/tests/verifications/conf/groq.yaml b/tests/verifications/conf/groq.yaml
index 7871036dc..76b1244ae 100644
--- a/tests/verifications/conf/groq.yaml
+++ b/tests/verifications/conf/groq.yaml
@@ -2,12 +2,12 @@ base_url: https://api.groq.com/openai/v1
 api_key_var: GROQ_API_KEY
 models:
 - llama-3.3-70b-versatile
-- llama-4-scout-17b-16e-instruct
-- llama-4-maverick-17b-128e-instruct
+- meta-llama/llama-4-scout-17b-16e-instruct
+- meta-llama/llama-4-maverick-17b-128e-instruct
 model_display_names:
   llama-3.3-70b-versatile: Llama-3.3-70B-Instruct
-  llama-4-scout-17b-16e-instruct: Llama-4-Scout-Instruct
-  llama-4-maverick-17b-128e-instruct: Llama-4-Maverick-Instruct
+  meta-llama/llama-4-scout-17b-16e-instruct: Llama-4-Scout-Instruct
+  meta-llama/llama-4-maverick-17b-128e-instruct: Llama-4-Maverick-Instruct
 test_exclusions:
   llama-3.3-70b-versatile:
   - test_chat_non_streaming_image
diff --git a/tests/verifications/conf/openai-llama-stack.yaml b/tests/verifications/conf/openai-llama-stack.yaml
index ee116dcf0..de35439ae 100644
--- a/tests/verifications/conf/openai-llama-stack.yaml
+++ b/tests/verifications/conf/openai-llama-stack.yaml
@@ -1,9 +1,9 @@
 base_url: http://localhost:8321/v1/openai/v1
 api_key_var: OPENAI_API_KEY
 models:
-- gpt-4o
-- gpt-4o-mini
+- openai/gpt-4o
+- openai/gpt-4o-mini
 model_display_names:
-  gpt-4o: gpt-4o
-  gpt-4o-mini: gpt-4o-mini
+  openai/gpt-4o: gpt-4o
+  openai/gpt-4o-mini: gpt-4o-mini
 test_exclusions: {}
diff --git a/tests/verifications/generate_report.py b/tests/verifications/generate_report.py
index c1eac8a33..b39c3fd19 100755
--- a/tests/verifications/generate_report.py
+++ b/tests/verifications/generate_report.py
@@ -75,6 +75,7 @@ PROVIDER_ORDER = [
     "openai",
     "together-llama-stack",
     "fireworks-llama-stack",
+    "groq-llama-stack",
     "openai-llama-stack",
 ]
 
diff --git a/tests/verifications/openai-api-verification-run.yaml b/tests/verifications/openai-api-verification-run.yaml
index 0e8b99e4f..71885d058 100644
--- a/tests/verifications/openai-api-verification-run.yaml
+++ b/tests/verifications/openai-api-verification-run.yaml
@@ -17,6 +17,11 @@ providers:
     config:
       url: https://api.fireworks.ai/inference/v1
       api_key: ${env.FIREWORKS_API_KEY}
+  - provider_id: groq
+    provider_type: remote::groq
+    config:
+      url: https://api.groq.com
+      api_key: ${env.GROQ_API_KEY}
   - provider_id: openai
     provider_type: remote::openai
     config:
@@ -98,6 +103,21 @@ models:
   provider_id: fireworks
   provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
   model_type: llm
+- metadata: {}
+  model_id: groq/llama-3.3-70b-versatile
+  provider_id: groq
+  provider_model_id: groq/llama-3.3-70b-versatile
+  model_type: llm
+- metadata: {}
+  model_id: groq/llama-4-scout-17b-16e-instruct
+  provider_id: groq
+  provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
+  model_type: llm
+- metadata: {}
+  model_id: groq/llama-4-maverick-17b-128e-instruct
+  provider_id: groq
+  provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
+  model_type: llm
 - metadata: {}
   model_id: openai/gpt-4o
   provider_id: openai