diff --git a/.coveragerc b/.coveragerc
new file mode 100644
index 000000000..e16c2e461
--- /dev/null
+++ b/.coveragerc
@@ -0,0 +1,6 @@
+[run]
+omit =
+    */tests/*
+    */llama_stack/providers/*
+    */llama_stack/templates/*
+    .venv/*
diff --git a/.github/workflows/install-script-ci.yml b/.github/workflows/install-script-ci.yml
new file mode 100644
index 000000000..2eb234c77
--- /dev/null
+++ b/.github/workflows/install-script-ci.yml
@@ -0,0 +1,26 @@
+name: Installer CI
+
+on:
+  pull_request:
+    paths:
+      - 'install.sh'
+  push:
+    paths:
+      - 'install.sh'
+  schedule:
+    - cron: '0 2 * * *'  # every day at 02:00 UTC
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # 4.2.2
+      - name: Run ShellCheck on install.sh
+        run: shellcheck install.sh
+  smoke-test:
+    needs: lint
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # 4.2.2
+      - name: Run installer end-to-end
+        run: ./install.sh
diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml
index 0eb252695..f54bed839 100644
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@@ -6,7 +6,6 @@ on:
   pull_request:
     branches: [ main ]
     paths:
-      - 'distributions/**'
       - 'llama_stack/**'
       - 'tests/integration/**'
       - 'uv.lock'
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
index 17a42dd26..173d64dca 100644
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@@ -18,7 +18,7 @@ jobs:
         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 
       - name: Set up Python
-        uses: actions/setup-python@8d9ed9ac5c53483de85588cdf95a591a75ab9f55 # v5.5.0
+        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
         with:
           python-version: '3.11'
           cache: pip
diff --git a/.github/workflows/providers-build.yml b/.github/workflows/providers-build.yml
index ee532a94a..60e9d1bcd 100644
--- a/.github/workflows/providers-build.yml
+++ b/.github/workflows/providers-build.yml
@@ -51,7 +51,7 @@ jobs:
         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 
       - name: Set up Python
-        uses: actions/setup-python@8d9ed9ac5c53483de85588cdf95a591a75ab9f55 # v5.5.0
+        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
         with:
           python-version: '3.10'
 
@@ -86,15 +86,15 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 
       - name: Set up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
         with:
           python-version: '3.10'
 
       - name: Install uv
-        uses: astral-sh/setup-uv@v5
+        uses: astral-sh/setup-uv@0c5e2b8115b80b4c7c5ddf6ffdd634974642d182 # v5.4.1
         with:
           python-version: "3.10"
 
@@ -107,3 +107,41 @@ jobs:
       - name: Build a single provider
         run: |
           USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --image-type venv --image-name test --providers inference=remote::ollama
+
+  build-custom-container-distribution:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+
+      - name: Set up Python
+        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
+        with:
+          python-version: '3.10'
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@0c5e2b8115b80b4c7c5ddf6ffdd634974642d182 # v5.4.1
+        with:
+          python-version: "3.10"
+
+      - name: Install LlamaStack
+        run: |
+          uv venv
+          source .venv/bin/activate
+          uv pip install -e .
+
+      - name: Build a single provider
+        run: |
+          yq -i '.image_type = "container"' llama_stack/templates/dev/build.yaml
+          yq -i '.image_name = "test"' llama_stack/templates/dev/build.yaml
+          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config llama_stack/templates/dev/build.yaml
+
+      - name: Inspect the container image entrypoint
+        run: |
+          IMAGE_ID=$(docker images --format "{{.Repository}}:{{.Tag}}" | head -n 1)
+          entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
+          echo "Entrypoint: $entrypoint"
+          if [ "$entrypoint" != "[python -m llama_stack.distribution.server.server --config /app/run.yaml]" ]; then
+            echo "Entrypoint is not correct"
+            exit 1
+          fi
diff --git a/.github/workflows/test-external-providers.yml b/.github/workflows/test-external-providers.yml
index 2ead8f845..37f5c45ab 100644
--- a/.github/workflows/test-external-providers.yml
+++ b/.github/workflows/test-external-providers.yml
@@ -5,10 +5,22 @@ on:
     branches: [ main ]
   pull_request:
     branches: [ main ]
+    paths:
+      - 'llama_stack/**'
+      - 'tests/integration/**'
+      - 'uv.lock'
+      - 'pyproject.toml'
+      - 'requirements.txt'
+      - '.github/workflows/test-external-providers.yml' # This workflow
 
 jobs:
   test-external-providers:
     runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        image-type: [venv]
+        # We don't do container yet, it's tricky to install a package from the host into the
+        # container and point 'uv pip install' to the correct path...
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4
@@ -35,17 +47,25 @@ jobs:
           uv sync --extra dev --extra test
           uv pip install -e .
 
-      - name: Install Ollama custom provider
+      - name: Apply image type to config file
+        run: |
+          yq -i '.image_type = "${{ matrix.image-type }}"' tests/external-provider/llama-stack-provider-ollama/custom-distro.yaml
+          cat tests/external-provider/llama-stack-provider-ollama/custom-distro.yaml
+
+      - name: Setup directory for Ollama custom provider
         run: |
           mkdir -p tests/external-provider/llama-stack-provider-ollama/src/
           cp -a llama_stack/providers/remote/inference/ollama/ tests/external-provider/llama-stack-provider-ollama/src/llama_stack_provider_ollama
-          uv pip install tests/external-provider/llama-stack-provider-ollama
 
       - name: Create provider configuration
         run: |
           mkdir -p /tmp/providers.d/remote/inference
           cp tests/external-provider/llama-stack-provider-ollama/custom_ollama.yaml /tmp/providers.d/remote/inference/custom_ollama.yaml
 
+      - name: Build distro from config file
+        run: |
+          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config tests/external-provider/llama-stack-provider-ollama/custom-distro.yaml
+
       - name: Wait for Ollama to start
         run: |
           echo "Waiting for Ollama..."
@@ -62,11 +82,13 @@ jobs:
           exit 1
 
       - name: Start Llama Stack server in background
+        if: ${{ matrix.image-type }} == 'venv'
         env:
           INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
         run: |
-          source .venv/bin/activate
-          nohup uv run llama stack run tests/external-provider/llama-stack-provider-ollama/run.yaml --image-type venv > server.log 2>&1 &
+          source ci-test/bin/activate
+          uv run pip list
+          nohup uv run --active llama stack run tests/external-provider/llama-stack-provider-ollama/run.yaml --image-type ${{ matrix.image-type }} > server.log 2>&1 &
 
       - name: Wait for Llama Stack server to be ready
         run: |
diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
index 4b0c58b99..1f6356281 100644
--- a/.github/workflows/unit-tests.yml
+++ b/.github/workflows/unit-tests.yml
@@ -6,7 +6,6 @@ on:
   pull_request:
     branches: [ main ]
     paths:
-      - 'distributions/**'
       - 'llama_stack/**'
       - 'tests/unit/**'
       - 'uv.lock'
@@ -34,7 +33,7 @@ jobs:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 
       - name: Set up Python ${{ matrix.python }}
-        uses: actions/setup-python@8d9ed9ac5c53483de85588cdf95a591a75ab9f55 # v5.5.0
+        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
         with:
           python-version: ${{ matrix.python }}
 
diff --git a/.github/workflows/update-readthedocs.yml b/.github/workflows/update-readthedocs.yml
index 794a727be..f8dce781e 100644
--- a/.github/workflows/update-readthedocs.yml
+++ b/.github/workflows/update-readthedocs.yml
@@ -36,7 +36,7 @@ jobs:
         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 
       - name: Set up Python
-        uses: actions/setup-python@8d9ed9ac5c53483de85588cdf95a591a75ab9f55 # v5.5.0
+        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
         with:
           python-version: '3.11'
 
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5086094ad..373e6b4fb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,33 @@
 # Changelog
 
+# v0.2.3
+Published on: 2025-04-25T22:46:21Z
+
+## Highlights
+
+* OpenAI compatible inference endpoints and client-SDK support. `client.chat.completions.create()` now works.
+* significant improvements and functionality added to the nVIDIA distribution
+* many improvements to the test verification suite.
+* new inference providers: Ramalama, IBM WatsonX
+* many improvements to the Playground UI
+
+
+---
+
+# v0.2.2
+Published on: 2025-04-13T01:19:49Z
+
+## Main changes
+
+- Bring Your Own Provider (@leseb) - use out-of-tree provider code to execute the distribution server
+- OpenAI compatible inference API in progress (@bbrowning)
+- Provider verifications (@ehhuang)
+- Many updates and fixes to playground
+- Several llama4 related fixes
+
+
+---
+
 # v0.2.1
 Published on: 2025-04-05T23:13:00Z
 
diff --git a/README.md b/README.md
index 8c201e43d..b2b2d12d9 100644
--- a/README.md
+++ b/README.md
@@ -70,6 +70,13 @@ As more providers start supporting Llama 4, you can use them in Llama Stack as w
 
 </details>
 
+### 🚀 One-Line Installer 🚀
+
+To try Llama Stack locally, run:
+
+```bash
+curl -LsSf https://github.com/meta-llama/llama-stack/raw/main/install.sh | sh
+```
 
 ### Overview
 
@@ -119,6 +126,7 @@ Here is a list of the various API providers and available distributions that can
 |          OpenAI          |         Hosted         |            |       ✅       |            |            |               |
 |        Anthropic         |         Hosted         |            |       ✅       |            |            |               |
 |          Gemini          |         Hosted         |            |       ✅       |            |            |               |
+|          watsonx         |         Hosted         |            |       ✅       |            |            |               |
 
 
 ### Distributions
@@ -128,7 +136,6 @@ A Llama Stack Distribution (or "distro") is a pre-configured bundle of provider
 |               **Distribution**                |                                                                    **Llama Stack Docker**                                                                     |                                                 Start This Distribution                                                  |
 |:---------------------------------------------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------:|
 |                Meta Reference                 |           [llamastack/distribution-meta-reference-gpu](https://hub.docker.com/repository/docker/llamastack/distribution-meta-reference-gpu/general)           |      [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/meta-reference-gpu.html)      |
-|           Meta Reference Quantized            | [llamastack/distribution-meta-reference-quantized-gpu](https://hub.docker.com/repository/docker/llamastack/distribution-meta-reference-quantized-gpu/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/meta-reference-quantized-gpu.html) |
 |                   SambaNova                   |                     [llamastack/distribution-sambanova](https://hub.docker.com/repository/docker/llamastack/distribution-sambanova/general)                     |   [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/sambanova.html)   |
 |                   Cerebras                    |                     [llamastack/distribution-cerebras](https://hub.docker.com/repository/docker/llamastack/distribution-cerebras/general)                     |   [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/cerebras.html)   |
 |                    Ollama                     |                       [llamastack/distribution-ollama](https://hub.docker.com/repository/docker/llamastack/distribution-ollama/general)                       |            [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/ollama.html)            |
diff --git a/docs/_static/js/detect_theme.js b/docs/_static/js/detect_theme.js
index 484b2bb8b..712565ef7 100644
--- a/docs/_static/js/detect_theme.js
+++ b/docs/_static/js/detect_theme.js
@@ -1,9 +1,32 @@
 document.addEventListener("DOMContentLoaded", function () {
   const prefersDark = window.matchMedia("(prefers-color-scheme: dark)").matches;
   const htmlElement = document.documentElement;
-  if (prefersDark) {
-    htmlElement.setAttribute("data-theme", "dark");
+
+  // Check if theme is saved in localStorage
+  const savedTheme = localStorage.getItem("sphinx-rtd-theme");
+
+  if (savedTheme) {
+    // Use the saved theme preference
+    htmlElement.setAttribute("data-theme", savedTheme);
+    document.body.classList.toggle("dark", savedTheme === "dark");
   } else {
-    htmlElement.setAttribute("data-theme", "light");
+    // Fall back to system preference
+    const theme = prefersDark ? "dark" : "light";
+    htmlElement.setAttribute("data-theme", theme);
+    document.body.classList.toggle("dark", theme === "dark");
+    // Save initial preference
+    localStorage.setItem("sphinx-rtd-theme", theme);
   }
+
+  // Listen for theme changes from the existing toggle
+  const observer = new MutationObserver(function(mutations) {
+    mutations.forEach(function(mutation) {
+      if (mutation.attributeName === "data-theme") {
+        const currentTheme = htmlElement.getAttribute("data-theme");
+        localStorage.setItem("sphinx-rtd-theme", currentTheme);
+      }
+    });
+  });
+
+  observer.observe(htmlElement, { attributes: true });
 });
diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 54d888441..4c5393947 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -5221,17 +5221,25 @@
                         "default": 10
                     },
                     "model": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The model identifier to use for the agent"
                     },
                     "instructions": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The system instructions for the agent"
+                    },
+                    "name": {
+                        "type": "string",
+                        "description": "Optional name for the agent, used in telemetry and identification"
                     },
                     "enable_session_persistence": {
                         "type": "boolean",
-                        "default": false
+                        "default": false,
+                        "description": "Optional flag indicating whether session data has to be persisted"
                     },
                     "response_format": {
-                        "$ref": "#/components/schemas/ResponseFormat"
+                        "$ref": "#/components/schemas/ResponseFormat",
+                        "description": "Optional response format configuration"
                     }
                 },
                 "additionalProperties": false,
@@ -5239,7 +5247,8 @@
                     "model",
                     "instructions"
                 ],
-                "title": "AgentConfig"
+                "title": "AgentConfig",
+                "description": "Configuration for an agent."
             },
             "AgentTool": {
                 "oneOf": [
@@ -8891,8 +8900,7 @@
                 },
                 "additionalProperties": false,
                 "required": [
-                    "role",
-                    "content"
+                    "role"
                 ],
                 "title": "OpenAIAssistantMessageParam",
                 "description": "A message containing the model's (assistant) response in an OpenAI-compatible chat completion request."
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index cf657bff9..a24f1a9db 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -3686,18 +3686,29 @@ components:
           default: 10
         model:
           type: string
+          description: >-
+            The model identifier to use for the agent
         instructions:
           type: string
+          description: The system instructions for the agent
+        name:
+          type: string
+          description: >-
+            Optional name for the agent, used in telemetry and identification
         enable_session_persistence:
           type: boolean
           default: false
+          description: >-
+            Optional flag indicating whether session data has to be persisted
         response_format:
           $ref: '#/components/schemas/ResponseFormat'
+          description: Optional response format configuration
       additionalProperties: false
       required:
         - model
         - instructions
       title: AgentConfig
+      description: Configuration for an agent.
     AgentTool:
       oneOf:
         - type: string
@@ -6097,7 +6108,6 @@ components:
       additionalProperties: false
       required:
         - role
-        - content
       title: OpenAIAssistantMessageParam
       description: >-
         A message containing the model's (assistant) response in an OpenAI-compatible
diff --git a/docs/source/building_applications/rag.md b/docs/source/building_applications/rag.md
index 39d1ba333..db6303209 100644
--- a/docs/source/building_applications/rag.md
+++ b/docs/source/building_applications/rag.md
@@ -68,7 +68,8 @@ chunks_response = client.vector_io.query(
 ### Using the RAG Tool
 
 A better way to ingest documents is to use the RAG Tool. This tool allows you to ingest documents from URLs, files, etc.
-and automatically chunks them into smaller pieces.
+and automatically chunks them into smaller pieces. More examples for how to format a RAGDocument can be found in the
+[appendix](#more-ragdocument-examples).
 
 ```python
 from llama_stack_client import RAGDocument
@@ -178,3 +179,38 @@ for vector_db_id in client.vector_dbs.list():
     print(f"Unregistering vector database: {vector_db_id.identifier}")
     client.vector_dbs.unregister(vector_db_id=vector_db_id.identifier)
 ```
+
+### Appendix
+
+#### More RAGDocument Examples
+```python
+from llama_stack_client import RAGDocument
+import base64
+
+RAGDocument(document_id="num-0", content={"uri": "file://path/to/file"})
+RAGDocument(document_id="num-1", content="plain text")
+RAGDocument(
+    document_id="num-2",
+    content={
+        "type": "text",
+        "text": "plain text input",
+    },  # for inputs that should be treated as text explicitly
+)
+RAGDocument(
+    document_id="num-3",
+    content={
+        "type": "image",
+        "image": {"url": {"uri": "https://mywebsite.com/image.jpg"}},
+    },
+)
+B64_ENCODED_IMAGE = base64.b64encode(
+    requests.get(
+        "https://raw.githubusercontent.com/meta-llama/llama-stack/refs/heads/main/docs/_static/llama-stack.png"
+    ).content
+)
+RAGDocuemnt(
+    document_id="num-4",
+    content={"type": "image", "image": {"data": B64_ENCODED_IMAGE}},
+)
+```
+for more strongly typed interaction use the typed dicts found [here](https://github.com/meta-llama/llama-stack-client-python/blob/38cd91c9e396f2be0bec1ee96a19771582ba6f17/src/llama_stack_client/types/shared_params/document.py).
diff --git a/docs/source/building_applications/tools.md b/docs/source/building_applications/tools.md
index 94841a773..6da1c5a6a 100644
--- a/docs/source/building_applications/tools.md
+++ b/docs/source/building_applications/tools.md
@@ -41,7 +41,7 @@ client.toolgroups.register(
 
 The tool requires an API key which can be provided either in the configuration or through the request header `X-LlamaStack-Provider-Data`. The format of the header is `{"<provider_name>_api_key": <your api key>}`.
 
-
+> **NOTE:** When using Tavily Search and Bing Search, the inference output will still display "Brave Search." This is because Llama models have been trained with Brave Search as a built-in tool. Tavily and bing is just being used in lieu of Brave search.
 
 #### Code Interpreter
 
@@ -214,3 +214,69 @@ response = agent.create_turn(
     session_id=session_id,
 )
 ```
+## Simple Example 2: Using an Agent with the Web Search Tool
+1. Start by registering a Tavily API key at [Tavily](https://tavily.com/).
+2. [Optional] Provide the API key directly to the Llama Stack server
+```bash
+export TAVILY_SEARCH_API_KEY="your key"
+```
+```bash
+--env TAVILY_SEARCH_API_KEY=${TAVILY_SEARCH_API_KEY}
+```
+3. Run the following script.
+```python
+from llama_stack_client.lib.agents.agent import Agent
+from llama_stack_client.types.agent_create_params import AgentConfig
+from llama_stack_client.lib.agents.event_logger import EventLogger
+from llama_stack_client import LlamaStackClient
+
+client = LlamaStackClient(
+    base_url=f"http://localhost:8321",
+    provider_data={
+        "tavily_search_api_key": "your_TAVILY_SEARCH_API_KEY"
+    },  # Set this from the client side. No need to provide it if it has already been configured on the Llama Stack server.
+)
+
+agent = Agent(
+    client,
+    model="meta-llama/Llama-3.2-3B-Instruct",
+    instructions=(
+        "You are a web search assistant, must use websearch tool to look up the most current and precise information available. "
+    ),
+    tools=["builtin::websearch"],
+)
+
+session_id = agent.create_session("websearch-session")
+
+response = agent.create_turn(
+    messages=[
+        {"role": "user", "content": "How did the USA perform in the last Olympics?"}
+    ],
+    session_id=session_id,
+)
+for log in EventLogger().log(response):
+    log.print()
+```
+
+## Simple Example3: Using an Agent with the WolframAlpha Tool
+1. Start by registering for a WolframAlpha API key at [WolframAlpha Developer Portal](https://developer.wolframalpha.com/access).
+2. Provide the API key either when starting the Llama Stack server:
+    ```bash
+    --env WOLFRAM_ALPHA_API_KEY=${WOLFRAM_ALPHA_API_KEY}
+    ```
+    or from the client side:
+    ```python
+    client = LlamaStackClient(
+        base_url="http://localhost:8321",
+        provider_data={"wolfram_alpha_api_key": wolfram_api_key},
+    )
+    ```
+3. Configure the tools in the Agent by setting `tools=["builtin::wolfram_alpha"]`.
+4. Example user query:
+    ```python
+    response = agent.create_turn(
+        messages=[{"role": "user", "content": "Solve x^2 + 2x + 1 = 0 using WolframAlpha"}],
+        session_id=session_id,
+    )
+    ```
+```
diff --git a/docs/source/distributions/building_distro.md b/docs/source/distributions/building_distro.md
index ad5d3bff4..56b8d30a8 100644
--- a/docs/source/distributions/building_distro.md
+++ b/docs/source/distributions/building_distro.md
@@ -109,8 +109,6 @@ llama stack build --list-templates
 +------------------------------+-----------------------------------------------------------------------------+
 | nvidia                       | Use NVIDIA NIM for running LLM inference                                    |
 +------------------------------+-----------------------------------------------------------------------------+
-| meta-reference-quantized-gpu | Use Meta Reference with fp8, int4 quantization for running LLM inference    |
-+------------------------------+-----------------------------------------------------------------------------+
 | cerebras                     | Use Cerebras for running LLM inference                                      |
 +------------------------------+-----------------------------------------------------------------------------+
 | ollama                       | Use (an external) Ollama server for running LLM inference                   |
@@ -176,7 +174,11 @@ distribution_spec:
     safety: inline::llama-guard
     agents: inline::meta-reference
     telemetry: inline::meta-reference
+image_name: ollama
 image_type: conda
+
+# If some providers are external, you can specify the path to the implementation
+external_providers_dir: /etc/llama-stack/providers.d
 ```
 
 ```
@@ -184,6 +186,57 @@ llama stack build --config llama_stack/templates/ollama/build.yaml
 ```
 :::
 
+:::{tab-item} Building with External Providers
+
+Llama Stack supports external providers that live outside of the main codebase. This allows you to create and maintain your own providers independently or use community-provided providers.
+
+To build a distribution with external providers, you need to:
+
+1. Configure the `external_providers_dir` in your build configuration file:
+
+```yaml
+# Example my-external-stack.yaml with external providers
+version: '2'
+distribution_spec:
+  description: Custom distro for CI tests
+  providers:
+    inference:
+    - remote::custom_ollama
+# Add more providers as needed
+image_type: container
+image_name: ci-test
+# Path to external provider implementations
+external_providers_dir: /etc/llama-stack/providers.d
+```
+
+Here's an example for a custom Ollama provider:
+
+```yaml
+adapter:
+  adapter_type: custom_ollama
+  pip_packages:
+  - ollama
+  - aiohttp
+  - llama-stack-provider-ollama # This is the provider package
+  config_class: llama_stack_ollama_provider.config.OllamaImplConfig
+  module: llama_stack_ollama_provider
+api_dependencies: []
+optional_api_dependencies: []
+```
+
+The `pip_packages` section lists the Python packages required by the provider, as well as the
+provider package itself. The package must be available on PyPI or can be provided from a local
+directory or a git repository (git must be installed on the build environment).
+
+2. Build your distribution using the config file:
+
+```
+llama stack build --config my-external-stack.yaml
+```
+
+For more information on external providers, including directory structure, provider types, and implementation requirements, see the [External Providers documentation](../providers/external.md).
+:::
+
 :::{tab-item} Building Container
 
 ```{admonition} Podman Alternative
diff --git a/docs/source/distributions/ondevice_distro/android_sdk.md b/docs/source/distributions/ondevice_distro/android_sdk.md
index 4fa6eaf70..a097a2adf 100644
--- a/docs/source/distributions/ondevice_distro/android_sdk.md
+++ b/docs/source/distributions/ondevice_distro/android_sdk.md
@@ -24,7 +24,7 @@ The key files in the app are `ExampleLlamaStackLocalInference.kt`, `ExampleLlama
 Add the following dependency in your `build.gradle.kts` file:
 ```
 dependencies {
- implementation("com.llama.llamastack:llama-stack-client-kotlin:0.1.4.2")
+ implementation("com.llama.llamastack:llama-stack-client-kotlin:0.2.2")
 }
 ```
 This will download jar files in your gradle cache in a directory like `~/.gradle/caches/modules-2/files-2.1/com.llama.llamastack/`
@@ -37,11 +37,7 @@ For local inferencing, it is required to include the ExecuTorch library into you
 
 Include the ExecuTorch library by:
 1. Download the `download-prebuilt-et-lib.sh` script file from the [llama-stack-client-kotlin-client-local](https://github.com/meta-llama/llama-stack-client-kotlin/tree/latest-release/llama-stack-client-kotlin-client-local/download-prebuilt-et-lib.sh) directory to your local machine.
-2. Move the script to the top level of your Android app where the app directory resides:
-<p align="center">
-<img src="https://github.com/meta-llama/llama-stack-client-kotlin/blob/latest-release/doc/img/example_android_app_directory.png" style="width:300px">
-</p>
-
+2. Move the script to the top level of your Android app where the `app` directory resides.
 3. Run `sh download-prebuilt-et-lib.sh` to create an `app/libs` directory and download the `executorch.aar` in that path. This generates an ExecuTorch library for the XNNPACK delegate.
 4. Add the `executorch.aar` dependency in your `build.gradle.kts` file:
 ```
@@ -52,6 +48,8 @@ dependencies {
 }
 ```
 
+See other dependencies for the local RAG in Android app [README](https://github.com/meta-llama/llama-stack-client-kotlin/tree/latest-release/examples/android_app#quick-start).
+
 ## Llama Stack APIs in Your Android App
 Breaking down the demo app, this section will show the core pieces that are used to initialize and run inference with Llama Stack using the Kotlin library.
 
@@ -60,7 +58,7 @@ Start a Llama Stack server on localhost. Here is an example of how you can do th
 ```
 conda create -n stack-fireworks python=3.10
 conda activate stack-fireworks
-pip install --no-cache llama-stack==0.1.4
+pip install --no-cache llama-stack==0.2.2
 llama stack build --template fireworks --image-type conda
 export FIREWORKS_API_KEY=<SOME_KEY>
 llama stack run fireworks --port 5050
diff --git a/docs/source/distributions/remote_hosted_distro/nvidia.md b/docs/source/distributions/remote_hosted_distro/nvidia.md
deleted file mode 100644
index e7c926a36..000000000
--- a/docs/source/distributions/remote_hosted_distro/nvidia.md
+++ /dev/null
@@ -1,89 +0,0 @@
-<!-- This file was auto-generated by distro_codegen.py, please edit source -->
-# NVIDIA Distribution
-
-The `llamastack/distribution-nvidia` distribution consists of the following provider configurations.
-
-| API | Provider(s) |
-|-----|-------------|
-| agents | `inline::meta-reference` |
-| datasetio | `inline::localfs`, `remote::nvidia` |
-| eval | `remote::nvidia` |
-| inference | `remote::nvidia` |
-| post_training | `remote::nvidia` |
-| safety | `remote::nvidia` |
-| scoring | `inline::basic` |
-| telemetry | `inline::meta-reference` |
-| tool_runtime | `inline::rag-runtime` |
-| vector_io | `inline::faiss` |
-
-
-### Environment Variables
-
-The following environment variables can be configured:
-
-- `NVIDIA_API_KEY`: NVIDIA API Key (default: ``)
-- `NVIDIA_USER_ID`: NVIDIA User ID (default: `llama-stack-user`)
-- `NVIDIA_DATASET_NAMESPACE`: NVIDIA Dataset Namespace (default: `default`)
-- `NVIDIA_ACCESS_POLICIES`: NVIDIA Access Policies (default: `{}`)
-- `NVIDIA_PROJECT_ID`: NVIDIA Project ID (default: `test-project`)
-- `NVIDIA_CUSTOMIZER_URL`: NVIDIA Customizer URL (default: `https://customizer.api.nvidia.com`)
-- `NVIDIA_OUTPUT_MODEL_DIR`: NVIDIA Output Model Directory (default: `test-example-model@v1`)
-- `GUARDRAILS_SERVICE_URL`: URL for the NeMo Guardrails Service (default: `http://0.0.0.0:7331`)
-- `NVIDIA_EVALUATOR_URL`: URL for the NeMo Evaluator Service (default: `http://0.0.0.0:7331`)
-- `INFERENCE_MODEL`: Inference model (default: `Llama3.1-8B-Instruct`)
-- `SAFETY_MODEL`: Name of the model to use for safety (default: `meta/llama-3.1-8b-instruct`)
-
-### Models
-
-The following models are available by default:
-
-- `meta/llama3-8b-instruct (aliases: meta-llama/Llama-3-8B-Instruct)`
-- `meta/llama3-70b-instruct (aliases: meta-llama/Llama-3-70B-Instruct)`
-- `meta/llama-3.1-8b-instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)`
-- `meta/llama-3.1-70b-instruct (aliases: meta-llama/Llama-3.1-70B-Instruct)`
-- `meta/llama-3.1-405b-instruct (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)`
-- `meta/llama-3.2-1b-instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)`
-- `meta/llama-3.2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)`
-- `meta/llama-3.2-11b-vision-instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)`
-- `meta/llama-3.2-90b-vision-instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)`
-- `nvidia/llama-3.2-nv-embedqa-1b-v2 `
-- `nvidia/nv-embedqa-e5-v5 `
-- `nvidia/nv-embedqa-mistral-7b-v2 `
-- `snowflake/arctic-embed-l `
-
-
-### Prerequisite: API Keys
-
-Make sure you have access to a NVIDIA API Key. You can get one by visiting [https://build.nvidia.com/](https://build.nvidia.com/).
-
-
-## Running Llama Stack with NVIDIA
-
-You can do this via Conda (build code) or Docker which has a pre-built image.
-
-### Via Docker
-
-This method allows you to get started quickly without having to build the distribution code.
-
-```bash
-LLAMA_STACK_PORT=8321
-docker run \
-  -it \
-  --pull always \
-  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ./run.yaml:/root/my-run.yaml \
-  llamastack/distribution-nvidia \
-  --yaml-config /root/my-run.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env NVIDIA_API_KEY=$NVIDIA_API_KEY
-```
-
-### Via Conda
-
-```bash
-llama stack build --template nvidia --image-type conda
-llama stack run ./run.yaml \
-  --port 8321 \
-  --env NVIDIA_API_KEY=$NVIDIA_API_KEY
-  --env INFERENCE_MODEL=$INFERENCE_MODEL
-```
diff --git a/docs/source/distributions/remote_hosted_distro/watsonx.md b/docs/source/distributions/remote_hosted_distro/watsonx.md
new file mode 100644
index 000000000..018dc2a3c
--- /dev/null
+++ b/docs/source/distributions/remote_hosted_distro/watsonx.md
@@ -0,0 +1,88 @@
+---
+orphan: true
+---
+<!-- This file was auto-generated by distro_codegen.py, please edit source -->
+# watsonx Distribution
+
+```{toctree}
+:maxdepth: 2
+:hidden:
+
+self
+```
+
+The `llamastack/distribution-watsonx` distribution consists of the following provider configurations.
+
+| API | Provider(s) |
+|-----|-------------|
+| agents | `inline::meta-reference` |
+| datasetio | `remote::huggingface`, `inline::localfs` |
+| eval | `inline::meta-reference` |
+| inference | `remote::watsonx` |
+| safety | `inline::llama-guard` |
+| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
+| telemetry | `inline::meta-reference` |
+| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime`, `remote::model-context-protocol` |
+| vector_io | `inline::faiss` |
+
+
+
+### Environment Variables
+
+The following environment variables can be configured:
+
+- `LLAMASTACK_PORT`: Port for the Llama Stack distribution server (default: `5001`)
+- `WATSONX_API_KEY`: watsonx API Key (default: ``)
+- `WATSONX_PROJECT_ID`: watsonx Project ID (default: ``)
+
+### Models
+
+The following models are available by default:
+
+- `meta-llama/llama-3-3-70b-instruct (aliases: meta-llama/Llama-3.3-70B-Instruct)`
+- `meta-llama/llama-2-13b-chat (aliases: meta-llama/Llama-2-13b)`
+- `meta-llama/llama-3-1-70b-instruct (aliases: meta-llama/Llama-3.1-70B-Instruct)`
+- `meta-llama/llama-3-1-8b-instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)`
+- `meta-llama/llama-3-2-11b-vision-instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)`
+- `meta-llama/llama-3-2-1b-instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)`
+- `meta-llama/llama-3-2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)`
+- `meta-llama/llama-3-2-90b-vision-instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)`
+- `meta-llama/llama-guard-3-11b-vision (aliases: meta-llama/Llama-Guard-3-11B-Vision)`
+
+
+### Prerequisite: API Keys
+
+Make sure you have access to a watsonx API Key. You can get one by referring [watsonx.ai](https://www.ibm.com/docs/en/masv-and-l/maximo-manage/continuous-delivery?topic=setup-create-watsonx-api-key).
+
+
+## Running Llama Stack with watsonx
+
+You can do this via Conda (build code), venv or Docker which has a pre-built image.
+
+### Via Docker
+
+This method allows you to get started quickly without having to build the distribution code.
+
+```bash
+LLAMA_STACK_PORT=5001
+docker run \
+  -it \
+  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
+  -v ./run.yaml:/root/my-run.yaml \
+  llamastack/distribution-watsonx \
+  --yaml-config /root/my-run.yaml \
+  --port $LLAMA_STACK_PORT \
+  --env WATSONX_API_KEY=$WATSONX_API_KEY \
+  --env WATSONX_PROJECT_ID=$WATSONX_PROJECT_ID \
+  --env WATSONX_BASE_URL=$WATSONX_BASE_URL
+```
+
+### Via Conda
+
+```bash
+llama stack build --template watsonx --image-type conda
+llama stack run ./run.yaml \
+  --port $LLAMA_STACK_PORT \
+  --env WATSONX_API_KEY=$WATSONX_API_KEY \
+  --env WATSONX_PROJECT_ID=$WATSONX_PROJECT_ID
+```
diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md
index b90f75347..f58d7bbee 100644
--- a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md
+++ b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md
@@ -81,6 +81,7 @@ LLAMA_STACK_PORT=8321
 docker run \
   -it \
   --pull always \
+  --gpu all \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
   -v ~/.llama:/root/.llama \
   llamastack/distribution-meta-reference-gpu \
@@ -94,6 +95,7 @@ If you are using Llama Stack Safety / Shield APIs, use:
 docker run \
   -it \
   --pull always \
+  --gpu all \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
   -v ~/.llama:/root/.llama \
   llamastack/distribution-meta-reference-gpu \
diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md
deleted file mode 100644
index c3e2b4f2c..000000000
--- a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md
+++ /dev/null
@@ -1,123 +0,0 @@
----
-orphan: true
----
-<!-- This file was auto-generated by distro_codegen.py, please edit source -->
-# Meta Reference Quantized Distribution
-
-```{toctree}
-:maxdepth: 2
-:hidden:
-
-self
-```
-
-The `llamastack/distribution-meta-reference-quantized-gpu` distribution consists of the following provider configurations:
-
-| API | Provider(s) |
-|-----|-------------|
-| agents | `inline::meta-reference` |
-| datasetio | `remote::huggingface`, `inline::localfs` |
-| eval | `inline::meta-reference` |
-| inference | `inline::meta-reference-quantized` |
-| safety | `inline::llama-guard` |
-| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
-| telemetry | `inline::meta-reference` |
-| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime`, `remote::model-context-protocol` |
-| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
-
-
-The only difference vs. the `meta-reference-gpu` distribution is that it has support for more efficient inference -- with fp8, int4 quantization, etc.
-
-Note that you need access to nvidia GPUs to run this distribution. This distribution is not compatible with CPU-only machines or machines with AMD GPUs.
-
-### Environment Variables
-
-The following environment variables can be configured:
-
-- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `8321`)
-- `INFERENCE_MODEL`: Inference model loaded into the Meta Reference server (default: `meta-llama/Llama-3.2-3B-Instruct`)
-- `INFERENCE_CHECKPOINT_DIR`: Directory containing the Meta Reference model checkpoint (default: `null`)
-
-
-## Prerequisite: Downloading Models
-
-Please use `llama model list --downloaded` to check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
-
-```
-$ llama model list --downloaded
-┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓
-┃ Model                                   ┃ Size     ┃ Modified Time       ┃
-┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩
-│ Llama3.2-1B-Instruct:int4-qlora-eo8     │ 1.53 GB  │ 2025-02-26 11:22:28 │
-├─────────────────────────────────────────┼──────────┼─────────────────────┤
-│ Llama3.2-1B                             │ 2.31 GB  │ 2025-02-18 21:48:52 │
-├─────────────────────────────────────────┼──────────┼─────────────────────┤
-│ Prompt-Guard-86M                        │ 0.02 GB  │ 2025-02-26 11:29:28 │
-├─────────────────────────────────────────┼──────────┼─────────────────────┤
-│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB  │ 2025-02-26 11:37:41 │
-├─────────────────────────────────────────┼──────────┼─────────────────────┤
-│ Llama3.2-3B                             │ 5.99 GB  │ 2025-02-18 21:51:26 │
-├─────────────────────────────────────────┼──────────┼─────────────────────┤
-│ Llama3.1-8B                             │ 14.97 GB │ 2025-02-16 10:36:37 │
-├─────────────────────────────────────────┼──────────┼─────────────────────┤
-│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB  │ 2025-02-26 11:35:02 │
-├─────────────────────────────────────────┼──────────┼─────────────────────┤
-│ Llama-Guard-3-1B                        │ 2.80 GB  │ 2025-02-26 11:20:46 │
-├─────────────────────────────────────────┼──────────┼─────────────────────┤
-│ Llama-Guard-3-1B:int4                   │ 0.43 GB  │ 2025-02-26 11:33:33 │
-└─────────────────────────────────────────┴──────────┴─────────────────────┘
-```
-
-## Running the Distribution
-
-You can do this via Conda (build code) or Docker which has a pre-built image.
-
-### Via Docker
-
-This method allows you to get started quickly without having to build the distribution code.
-
-```bash
-LLAMA_STACK_PORT=8321
-docker run \
-  -it \
-  --pull always \
-  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ~/.llama:/root/.llama \
-  llamastack/distribution-meta-reference-quantized-gpu \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
-```
-
-If you are using Llama Stack Safety / Shield APIs, use:
-
-```bash
-docker run \
-  -it \
-  --pull always \
-  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ~/.llama:/root/.llama \
-  llamastack/distribution-meta-reference-quantized-gpu \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
-  --env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
-```
-
-### Via Conda
-
-Make sure you have done `uv pip install llama-stack` and have the Llama Stack CLI available.
-
-```bash
-llama stack build --template meta-reference-quantized-gpu --image-type conda
-llama stack run distributions/meta-reference-quantized-gpu/run.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
-```
-
-If you are using Llama Stack Safety / Shield APIs, use:
-
-```bash
-llama stack run distributions/meta-reference-quantized-gpu/run-with-safety.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
-  --env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
-```
diff --git a/docs/source/distributions/self_hosted_distro/nvidia.md b/docs/source/distributions/self_hosted_distro/nvidia.md
index 365d34762..8679fd9be 100644
--- a/docs/source/distributions/self_hosted_distro/nvidia.md
+++ b/docs/source/distributions/self_hosted_distro/nvidia.md
@@ -22,10 +22,8 @@ The `llamastack/distribution-nvidia` distribution consists of the following prov
 The following environment variables can be configured:
 
 - `NVIDIA_API_KEY`: NVIDIA API Key (default: ``)
-- `NVIDIA_USER_ID`: NVIDIA User ID (default: `llama-stack-user`)
 - `NVIDIA_APPEND_API_VERSION`: Whether to append the API version to the base_url (default: `True`)
 - `NVIDIA_DATASET_NAMESPACE`: NVIDIA Dataset Namespace (default: `default`)
-- `NVIDIA_ACCESS_POLICIES`: NVIDIA Access Policies (default: `{}`)
 - `NVIDIA_PROJECT_ID`: NVIDIA Project ID (default: `test-project`)
 - `NVIDIA_CUSTOMIZER_URL`: NVIDIA Customizer URL (default: `https://customizer.api.nvidia.com`)
 - `NVIDIA_OUTPUT_MODEL_DIR`: NVIDIA Output Model Directory (default: `test-example-model@v1`)
@@ -48,20 +46,91 @@ The following models are available by default:
 - `meta/llama-3.2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)`
 - `meta/llama-3.2-11b-vision-instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)`
 - `meta/llama-3.2-90b-vision-instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)`
+- `meta/llama-3.3-70b-instruct (aliases: meta-llama/Llama-3.3-70B-Instruct)`
 - `nvidia/llama-3.2-nv-embedqa-1b-v2 `
 - `nvidia/nv-embedqa-e5-v5 `
 - `nvidia/nv-embedqa-mistral-7b-v2 `
 - `snowflake/arctic-embed-l `
 
 
-### Prerequisite: API Keys
+## Prerequisites
+### NVIDIA API Keys
 
-Make sure you have access to a NVIDIA API Key. You can get one by visiting [https://build.nvidia.com/](https://build.nvidia.com/).
+Make sure you have access to a NVIDIA API Key. You can get one by visiting [https://build.nvidia.com/](https://build.nvidia.com/). Use this key for the `NVIDIA_API_KEY` environment variable.
 
+### Deploy NeMo Microservices Platform
+The NVIDIA NeMo microservices platform supports end-to-end microservice deployment of a complete AI flywheel on your Kubernetes cluster through the NeMo Microservices Helm Chart. Please reference the [NVIDIA NeMo Microservices documentation](https://docs.nvidia.com/nemo/microservices/latest/about/index.html) for platform prerequisites and instructions to install and deploy the platform.
+
+## Supported Services
+Each Llama Stack API corresponds to a specific NeMo microservice. The core microservices (Customizer, Evaluator, Guardrails) are exposed by the same endpoint. The platform components (Data Store) are each exposed by separate endpoints.
+
+### Inference: NVIDIA NIM
+NVIDIA NIM is used for running inference with registered models. There are two ways to access NVIDIA NIMs:
+  1. Hosted (default): Preview APIs hosted at https://integrate.api.nvidia.com (Requires an API key)
+  2. Self-hosted: NVIDIA NIMs that run on your own infrastructure.
+
+The deployed platform includes the NIM Proxy microservice, which is the service that provides to access your NIMs (for example, to run inference on a model). Set the `NVIDIA_BASE_URL` environment variable to use your NVIDIA NIM Proxy deployment.
+
+### Datasetio API: NeMo Data Store
+The NeMo Data Store microservice serves as the default file storage solution for the NeMo microservices platform. It exposts APIs compatible with the Hugging Face Hub client (`HfApi`), so you can use the client to interact with Data Store. The `NVIDIA_DATASETS_URL` environment variable should point to your NeMo Data Store endpoint.
+
+See the [NVIDIA Datasetio docs](/llama_stack/providers/remote/datasetio/nvidia/README.md) for supported features and example usage.
+
+### Eval API: NeMo Evaluator
+The NeMo Evaluator microservice supports evaluation of LLMs. Launching an Evaluation job with NeMo Evaluator requires an Evaluation Config (an object that contains metadata needed by the job). A Llama Stack Benchmark maps to an Evaluation Config, so registering a Benchmark creates an Evaluation Config in NeMo Evaluator. The `NVIDIA_EVALUATOR_URL` environment variable should point to your NeMo Microservices endpoint.
+
+See the [NVIDIA Eval docs](/llama_stack/providers/remote/eval/nvidia/README.md) for supported features and example usage.
+
+### Post-Training API: NeMo Customizer
+The NeMo Customizer microservice supports fine-tuning models. You can reference [this list of supported models](/llama_stack/providers/remote/post_training/nvidia/models.py) that can be fine-tuned using Llama Stack. The `NVIDIA_CUSTOMIZER_URL` environment variable should point to your NeMo Microservices endpoint.
+
+See the [NVIDIA Post-Training docs](/llama_stack/providers/remote/post_training/nvidia/README.md) for supported features and example usage.
+
+### Safety API: NeMo Guardrails
+The NeMo Guardrails microservice sits between your application and the LLM, and adds checks and content moderation to a model. The `GUARDRAILS_SERVICE_URL` environment variable should point to your NeMo Microservices endpoint.
+
+See the NVIDIA Safety docs for supported features and example usage.
+
+## Deploying models
+In order to use a registered model with the Llama Stack APIs, ensure the corresponding NIM is deployed to your environment. For example, you can use the NIM Proxy microservice to deploy `meta/llama-3.2-1b-instruct`.
+
+Note: For improved inference speeds, we need to use NIM with `fast_outlines` guided decoding system (specified in the request body). This is the default if you deployed the platform with the NeMo Microservices Helm Chart.
+```sh
+# URL to NeMo NIM Proxy service
+export NEMO_URL="http://nemo.test"
+
+curl --location "$NEMO_URL/v1/deployment/model-deployments" \
+   -H 'accept: application/json' \
+   -H 'Content-Type: application/json' \
+   -d '{
+      "name": "llama-3.2-1b-instruct",
+      "namespace": "meta",
+      "config": {
+         "model": "meta/llama-3.2-1b-instruct",
+         "nim_deployment": {
+            "image_name": "nvcr.io/nim/meta/llama-3.2-1b-instruct",
+            "image_tag": "1.8.3",
+            "pvc_size": "25Gi",
+            "gpu": 1,
+            "additional_envs": {
+               "NIM_GUIDED_DECODING_BACKEND": "fast_outlines"
+            }
+         }
+      }
+   }'
+```
+This NIM deployment should take approximately 10 minutes to go live. [See the docs](https://docs.nvidia.com/nemo/microservices/latest/get-started/tutorials/deploy-nims.html) for more information on how to deploy a NIM and verify it's available for inference.
+
+You can also remove a deployed NIM to free up GPU resources, if needed.
+```sh
+export NEMO_URL="http://nemo.test"
+
+curl -X DELETE "$NEMO_URL/v1/deployment/model-deployments/meta/llama-3.1-8b-instruct"
+```
 
 ## Running Llama Stack with NVIDIA
 
-You can do this via Conda (build code) or Docker which has a pre-built image.
+You can do this via Conda or venv (build code), or Docker which has a pre-built image.
 
 ### Via Docker
 
@@ -83,9 +152,23 @@ docker run \
 ### Via Conda
 
 ```bash
+INFERENCE_MODEL=meta-llama/Llama-3.1-8b-Instruct
 llama stack build --template nvidia --image-type conda
 llama stack run ./run.yaml \
   --port 8321 \
-  --env NVIDIA_API_KEY=$NVIDIA_API_KEY
+  --env NVIDIA_API_KEY=$NVIDIA_API_KEY \
+  --env INFERENCE_MODEL=$INFERENCE_MODEL
+```
+
+### Via venv
+
+If you've set up your local development environment, you can also build the image using your local virtual environment.
+
+```bash
+INFERENCE_MODEL=meta-llama/Llama-3.1-8b-Instruct
+llama stack build --template nvidia --image-type venv
+llama stack run ./run.yaml \
+  --port 8321 \
+  --env NVIDIA_API_KEY=$NVIDIA_API_KEY \
   --env INFERENCE_MODEL=$INFERENCE_MODEL
 ```
diff --git a/docs/source/distributions/self_hosted_distro/remote-vllm.md b/docs/source/distributions/self_hosted_distro/remote-vllm.md
index e18b5bf40..46df56008 100644
--- a/docs/source/distributions/self_hosted_distro/remote-vllm.md
+++ b/docs/source/distributions/self_hosted_distro/remote-vllm.md
@@ -41,10 +41,10 @@ The following environment variables can be configured:
 
 ## Setting up vLLM server
 
-In the following sections, we'll use either AMD and NVIDIA GPUs to serve as hardware accelerators for the vLLM
+In the following sections, we'll use AMD, NVIDIA or Intel GPUs to serve as hardware accelerators for the vLLM
 server, which acts as both the LLM inference provider and the safety provider. Note that vLLM also
 [supports many other hardware accelerators](https://docs.vllm.ai/en/latest/getting_started/installation.html) and
-that we only use GPUs here for demonstration purposes.
+that we only use GPUs here for demonstration purposes. Note that if you run into issues, you can include the environment variable `--env VLLM_DEBUG_LOG_API_SERVER_RESPONSE=true` (available in vLLM v0.8.3 and above) in the `docker run` command to enable log response from API server for debugging.
 
 ### Setting up vLLM server on AMD GPU
 
@@ -162,6 +162,55 @@ docker run \
     --port $SAFETY_PORT
 ```
 
+### Setting up vLLM server on Intel GPU
+
+Refer to [vLLM Documentation for XPU](https://docs.vllm.ai/en/v0.8.2/getting_started/installation/gpu.html?device=xpu) to get a vLLM endpoint. In addition to vLLM side setup which guides towards installing vLLM from sources orself-building vLLM Docker container, Intel provides prebuilt vLLM container to use on systems with Intel GPUs supported by PyTorch XPU backend:
+- [intel/vllm](https://hub.docker.com/r/intel/vllm)
+
+Here is a sample script to start a vLLM server locally via Docker using Intel provided container:
+
+```bash
+export INFERENCE_PORT=8000
+export INFERENCE_MODEL=meta-llama/Llama-3.2-1B-Instruct
+export ZE_AFFINITY_MASK=0
+
+docker run \
+    --pull always \
+    --device /dev/dri \
+    -v /dev/dri/by-path:/dev/dri/by-path \
+    -v ~/.cache/huggingface:/root/.cache/huggingface \
+    --env "HUGGING_FACE_HUB_TOKEN=$HF_TOKEN" \
+    --env ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK \
+    -p $INFERENCE_PORT:$INFERENCE_PORT \
+    --ipc=host \
+    intel/vllm:xpu \
+    --gpu-memory-utilization 0.7 \
+    --model $INFERENCE_MODEL \
+    --port $INFERENCE_PORT
+```
+
+If you are using Llama Stack Safety / Shield APIs, then you will need to also run another instance of a vLLM with a corresponding safety model like `meta-llama/Llama-Guard-3-1B` using a script like:
+
+```bash
+export SAFETY_PORT=8081
+export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
+export ZE_AFFINITY_MASK=1
+
+docker run \
+    --pull always \
+    --device /dev/dri \
+    -v /dev/dri/by-path:/dev/dri/by-path \
+    -v ~/.cache/huggingface:/root/.cache/huggingface \
+    --env "HUGGING_FACE_HUB_TOKEN=$HF_TOKEN" \
+    --env ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK \
+    -p $SAFETY_PORT:$SAFETY_PORT \
+    --ipc=host \
+    intel/vllm:xpu \
+    --gpu-memory-utilization 0.7 \
+    --model $SAFETY_MODEL \
+    --port $SAFETY_PORT
+```
+
 ## Running Llama Stack
 
 Now you are ready to run Llama Stack with vLLM as the inference provider. You can do this via Conda (build code) or Docker which has a pre-built image.
diff --git a/docs/source/providers/external.md b/docs/source/providers/external.md
index 90fc77979..5aab5ee0f 100644
--- a/docs/source/providers/external.md
+++ b/docs/source/providers/external.md
@@ -50,9 +50,10 @@ Llama Stack supports two types of external providers:
 
 Here's a list of known external providers that you can use with Llama Stack:
 
-| Type | Name | Description | Repository |
-|------|------|-------------|------------|
-| Remote | KubeFlow Training | Train models with KubeFlow | [llama-stack-provider-kft](https://github.com/opendatahub-io/llama-stack-provider-kft) |
+| Name | Description | API | Type | Repository |
+|------|-------------|-----|------|------------|
+| KubeFlow Training | Train models with KubeFlow | Post Training | Remote | [llama-stack-provider-kft](https://github.com/opendatahub-io/llama-stack-provider-kft) |
+| RamaLama | Inference models with RamaLama | Inference | Remote | [ramalama-stack](https://github.com/containers/ramalama-stack) |
 
 ### Remote Provider Specification
 
diff --git a/docs/zero_to_hero_guide/00_Inference101.ipynb b/docs/zero_to_hero_guide/00_Inference101.ipynb
index b3b781375..4f71f9f89 100644
--- a/docs/zero_to_hero_guide/00_Inference101.ipynb
+++ b/docs/zero_to_hero_guide/00_Inference101.ipynb
@@ -389,5 +389,7 @@
       "pygments_lexer": "ipython3",
       "version": "3.10.15"
     }
-  }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 5
 }
diff --git a/docs/zero_to_hero_guide/01_Local_Cloud_Inference101.ipynb b/docs/zero_to_hero_guide/01_Local_Cloud_Inference101.ipynb
index d66e1b4f5..19a7fe3be 100644
--- a/docs/zero_to_hero_guide/01_Local_Cloud_Inference101.ipynb
+++ b/docs/zero_to_hero_guide/01_Local_Cloud_Inference101.ipynb
@@ -256,5 +256,7 @@
       "pygments_lexer": "ipython3",
       "version": "3.10.15"
     }
-  }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 5
 }
diff --git a/docs/zero_to_hero_guide/02_Prompt_Engineering101.ipynb b/docs/zero_to_hero_guide/02_Prompt_Engineering101.ipynb
index 7fccf8c51..f3566eeb3 100644
--- a/docs/zero_to_hero_guide/02_Prompt_Engineering101.ipynb
+++ b/docs/zero_to_hero_guide/02_Prompt_Engineering101.ipynb
@@ -301,5 +301,7 @@
       "pygments_lexer": "ipython3",
       "version": "3.12.2"
     }
-  }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 5
 }
diff --git a/docs/zero_to_hero_guide/03_Image_Chat101.ipynb b/docs/zero_to_hero_guide/03_Image_Chat101.ipynb
index 58353e813..ae10d8808 100644
--- a/docs/zero_to_hero_guide/03_Image_Chat101.ipynb
+++ b/docs/zero_to_hero_guide/03_Image_Chat101.ipynb
@@ -200,5 +200,7 @@
       "pygments_lexer": "ipython3",
       "version": "3.12.2"
     }
-  }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 5
 }
diff --git a/docs/zero_to_hero_guide/04_Tool_Calling101.ipynb b/docs/zero_to_hero_guide/04_Tool_Calling101.ipynb
index c3a383e8c..de3754b21 100644
--- a/docs/zero_to_hero_guide/04_Tool_Calling101.ipynb
+++ b/docs/zero_to_hero_guide/04_Tool_Calling101.ipynb
@@ -355,5 +355,7 @@
       "pygments_lexer": "ipython3",
       "version": "3.10.15"
     }
-  }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 5
 }
diff --git a/docs/zero_to_hero_guide/05_Memory101.ipynb b/docs/zero_to_hero_guide/05_Memory101.ipynb
index bfeb40adc..66956259f 100644
--- a/docs/zero_to_hero_guide/05_Memory101.ipynb
+++ b/docs/zero_to_hero_guide/05_Memory101.ipynb
@@ -398,5 +398,7 @@
       "pygments_lexer": "ipython3",
       "version": "3.10.15"
     }
-  }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 5
 }
diff --git a/docs/zero_to_hero_guide/06_Safety101.ipynb b/docs/zero_to_hero_guide/06_Safety101.ipynb
index c8c1fe9c7..5d7763924 100644
--- a/docs/zero_to_hero_guide/06_Safety101.ipynb
+++ b/docs/zero_to_hero_guide/06_Safety101.ipynb
@@ -132,5 +132,7 @@
       "pygments_lexer": "ipython3",
       "version": "3.11.10"
     }
-  }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 5
 }
diff --git a/docs/zero_to_hero_guide/07_Agents101.ipynb b/docs/zero_to_hero_guide/07_Agents101.ipynb
index 8c988e1e3..b6df2a4c8 100644
--- a/docs/zero_to_hero_guide/07_Agents101.ipynb
+++ b/docs/zero_to_hero_guide/07_Agents101.ipynb
@@ -188,5 +188,7 @@
       "pygments_lexer": "ipython3",
       "version": "3.10.15"
     }
-  }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 5
 }
diff --git a/install.sh b/install.sh
new file mode 100755
index 000000000..cf0437126
--- /dev/null
+++ b/install.sh
@@ -0,0 +1,86 @@
+#!/usr/bin/env bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+set -Eeuo pipefail
+
+PORT=8321
+OLLAMA_PORT=11434
+MODEL_ALIAS="llama3.2:3b"
+SERVER_IMAGE="llamastack/distribution-ollama:0.2.2"
+WAIT_TIMEOUT=300
+
+log(){ printf "\e[1;32m%s\e[0m\n" "$*"; }
+die(){ printf "\e[1;31m❌ %s\e[0m\n" "$*" >&2; exit 1; }
+
+if command -v docker &> /dev/null; then
+  ENGINE="docker"
+  HOST_DNS="host.docker.internal"
+elif command -v podman &> /dev/null; then
+  ENGINE="podman"
+  HOST_DNS="host.containers.internal"
+else
+  die "Docker or Podman is required. Install Docker: https://docs.docker.com/get-docker/ or Podman: https://podman.io/getting-started/installation"
+fi
+
+# Clean up any leftovers from earlier runs
+for name in ollama-server llama-stack; do
+  ids=$($ENGINE ps -aq --filter "name=^${name}$")
+  if [ -n "$ids" ]; then
+    log "⚠️   Found existing container(s) for '${name}', removing..."
+    $ENGINE rm -f "$ids"
+  fi
+done
+
+###############################################################################
+# 1. Ollama
+###############################################################################
+log "🦙  Starting Ollama…"
+$ENGINE run -d --name ollama-server \
+  -p "${OLLAMA_PORT}:11434" \
+  ollama/ollama > /dev/null 2>&1
+
+log "⏳  Waiting for Ollama daemon…"
+if ! timeout "$WAIT_TIMEOUT" bash -c \
+    "until curl -fsS http://localhost:${OLLAMA_PORT}/ 2>/dev/null | grep -q 'Ollama'; do sleep 1; done"; then
+  log "❌  Ollama daemon did not become ready in ${WAIT_TIMEOUT}s; dumping container logs:"
+  $ENGINE logs ollama-server --tail=200
+  die "Ollama startup failed"
+fi
+
+log "📦  Ensuring model is pulled: ${MODEL_ALIAS}..."
+$ENGINE exec ollama-server ollama pull "${MODEL_ALIAS}" > /dev/null 2>&1
+
+###############################################################################
+# 2. Llama‑Stack
+###############################################################################
+log "🦙📦  Starting Llama‑Stack…"
+$ENGINE run -d --name llama-stack \
+  -p "${PORT}:${PORT}" \
+  --add-host="${HOST_DNS}:host-gateway" \
+  "${SERVER_IMAGE}" \
+  --port "${PORT}" \
+  --env INFERENCE_MODEL="${MODEL_ALIAS}" \
+  --env OLLAMA_URL="http://${HOST_DNS}:${OLLAMA_PORT}" > /dev/null 2>&1
+
+log "⏳  Waiting for Llama-Stack API…"
+if ! timeout "$WAIT_TIMEOUT" bash -c \
+  "until curl -fsS http://localhost:${PORT}/v1/health 2>/dev/null | grep -q 'OK'; do sleep 1; done"; then
+  log "❌  Llama-Stack did not become ready in ${WAIT_TIMEOUT}s; dumping container logs:"
+  $ENGINE logs llama-stack --tail=200
+  die "Llama-Stack startup failed"
+fi
+
+###############################################################################
+# Done
+###############################################################################
+log ""
+log "🎉  Llama‑Stack is ready!"
+log "👉  API endpoint: http://localhost:${PORT}"
+log "📖 Documentation: https://llama-stack.readthedocs.io/en/latest/references/index.html"
+log "💻 To access the llama‑stack CLI, exec into the container:"
+log "   $ENGINE exec -ti llama-stack bash"
+log ""
diff --git a/llama_stack/apis/agents/agents.py b/llama_stack/apis/agents/agents.py
index e13c4960b..dec43280b 100644
--- a/llama_stack/apis/agents/agents.py
+++ b/llama_stack/apis/agents/agents.py
@@ -225,8 +225,18 @@ class AgentConfigCommon(BaseModel):
 
 @json_schema_type
 class AgentConfig(AgentConfigCommon):
+    """Configuration for an agent.
+
+    :param model: The model identifier to use for the agent
+    :param instructions: The system instructions for the agent
+    :param name: Optional name for the agent, used in telemetry and identification
+    :param enable_session_persistence: Optional flag indicating whether session data has to be persisted
+    :param response_format: Optional response format configuration
+    """
+
     model: str
     instructions: str
+    name: Optional[str] = None
     enable_session_persistence: Optional[bool] = False
     response_format: Optional[ResponseFormat] = None
 
diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py
index 596efb136..309171f20 100644
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@@ -526,9 +526,9 @@ class OpenAIAssistantMessageParam(BaseModel):
     """
 
     role: Literal["assistant"] = "assistant"
-    content: OpenAIChatCompletionMessageContent
+    content: Optional[OpenAIChatCompletionMessageContent] = None
     name: Optional[str] = None
-    tool_calls: Optional[List[OpenAIChatCompletionToolCall]] = Field(default_factory=list)
+    tool_calls: Optional[List[OpenAIChatCompletionToolCall]] = None
 
 
 @json_schema_type
diff --git a/llama_stack/cli/stack/_build.py b/llama_stack/cli/stack/_build.py
index 3251bc632..2787a93d5 100644
--- a/llama_stack/cli/stack/_build.py
+++ b/llama_stack/cli/stack/_build.py
@@ -136,12 +136,13 @@ def run_stack_build_command(args: argparse.Namespace) -> None:
         )
 
         image_type = prompt(
-            f"> Enter the image type you want your Llama Stack to be built as ({' or '.join(e.value for e in ImageType)}): ",
+            "> Enter the image type you want your Llama Stack to be built as (use <TAB> to see options): ",
+            completer=WordCompleter([e.value for e in ImageType]),
+            complete_while_typing=True,
             validator=Validator.from_callable(
                 lambda x: x in [e.value for e in ImageType],
-                error_message=f"Invalid image type, please enter {' or '.join(e.value for e in ImageType)}",
+                error_message="Invalid image type. Use <TAB> to see options",
             ),
-            default=ImageType.CONDA.value,
         )
 
         if image_type == ImageType.CONDA.value:
@@ -210,16 +211,9 @@ def run_stack_build_command(args: argparse.Namespace) -> None:
                 )
                 sys.exit(1)
 
-        if build_config.image_type == LlamaStackImageType.CONTAINER.value and not args.image_name:
-            cprint(
-                "Please specify --image-name when building a container from a config file",
-                color="red",
-            )
-            sys.exit(1)
-
     if args.print_deps_only:
         print(f"# Dependencies for {args.template or args.config or image_name}")
-        normal_deps, special_deps = get_provider_dependencies(build_config.distribution_spec.providers)
+        normal_deps, special_deps = get_provider_dependencies(build_config)
         normal_deps += SERVER_DEPENDENCIES
         print(f"uv pip install {' '.join(normal_deps)}")
         for special_dep in special_deps:
@@ -235,10 +229,14 @@ def run_stack_build_command(args: argparse.Namespace) -> None:
         )
 
     except (Exception, RuntimeError) as exc:
+        import traceback
+
         cprint(
             f"Error building stack: {exc}",
             color="red",
         )
+        cprint("Stack trace:", color="red")
+        traceback.print_exc()
         sys.exit(1)
     if run_config is None:
         cprint(
@@ -270,9 +268,10 @@ def _generate_run_config(
         image_name=image_name,
         apis=apis,
         providers={},
+        external_providers_dir=build_config.external_providers_dir if build_config.external_providers_dir else None,
     )
     # build providers dict
-    provider_registry = get_provider_registry()
+    provider_registry = get_provider_registry(build_config)
     for api in apis:
         run_config.providers[api] = []
         provider_types = build_config.distribution_spec.providers[api]
@@ -286,8 +285,22 @@ def _generate_run_config(
             if p.deprecation_error:
                 raise InvalidProviderError(p.deprecation_error)
 
-            config_type = instantiate_class_type(provider_registry[Api(api)][provider_type].config_class)
-            if hasattr(config_type, "sample_run_config"):
+            try:
+                config_type = instantiate_class_type(provider_registry[Api(api)][provider_type].config_class)
+            except ModuleNotFoundError:
+                # HACK ALERT:
+                # This code executes after building is done, the import cannot work since the
+                # package is either available in the venv or container - not available on the host.
+                # TODO: use a "is_external" flag in ProviderSpec to check if the provider is
+                # external
+                cprint(
+                    f"Failed to import provider {provider_type} for API {api} - assuming it's external, skipping",
+                    color="yellow",
+                )
+                # Set config_type to None to avoid UnboundLocalError
+                config_type = None
+
+            if config_type is not None and hasattr(config_type, "sample_run_config"):
                 config = config_type.sample_run_config(__distro_dir__=f"~/.llama/distributions/{image_name}")
             else:
                 config = {}
@@ -305,11 +318,15 @@ def _generate_run_config(
         to_write = json.loads(run_config.model_dump_json())
         f.write(yaml.dump(to_write, sort_keys=False))
 
-    # this path is only invoked when no template is provided
-    cprint(
-        f"You can now run your stack with `llama stack run {run_config_file}`",
-        color="green",
-    )
+    # Only print this message for non-container builds since it will be displayed before the
+    # container is built
+    # For non-container builds, the run.yaml is generated at the very end of the build process so it
+    # makes sense to display this message
+    if build_config.image_type != LlamaStackImageType.CONTAINER.value:
+        cprint(
+            f"You can now run your stack with `llama stack run {run_config_file}`",
+            color="green",
+        )
     return run_config_file
 
 
@@ -319,6 +336,7 @@ def _run_stack_build_command_from_build_config(
     template_name: Optional[str] = None,
     config_path: Optional[str] = None,
 ) -> str:
+    image_name = image_name or build_config.image_name
     if build_config.image_type == LlamaStackImageType.CONTAINER.value:
         if template_name:
             image_name = f"distribution-{template_name}"
@@ -342,6 +360,13 @@ def _run_stack_build_command_from_build_config(
         build_file_path = build_dir / f"{image_name}-build.yaml"
 
     os.makedirs(build_dir, exist_ok=True)
+    run_config_file = None
+    # Generate the run.yaml so it can be included in the container image with the proper entrypoint
+    # Only do this if we're building a container image and we're not using a template
+    if build_config.image_type == LlamaStackImageType.CONTAINER.value and not template_name and config_path:
+        cprint("Generating run.yaml file", color="green")
+        run_config_file = _generate_run_config(build_config, build_dir, image_name)
+
     with open(build_file_path, "w") as f:
         to_write = json.loads(build_config.model_dump_json())
         f.write(yaml.dump(to_write, sort_keys=False))
@@ -350,7 +375,8 @@ def _run_stack_build_command_from_build_config(
         build_config,
         build_file_path,
         image_name,
-        template_or_config=template_name or config_path,
+        template_or_config=template_name or config_path or str(build_file_path),
+        run_config=run_config_file,
     )
     if return_code != 0:
         raise RuntimeError(f"Failed to build image {image_name}")
diff --git a/llama_stack/distribution/build.py b/llama_stack/distribution/build.py
index a8ee372da..9664449f3 100644
--- a/llama_stack/distribution/build.py
+++ b/llama_stack/distribution/build.py
@@ -7,16 +7,16 @@
 import importlib.resources
 import logging
 from pathlib import Path
-from typing import Dict, List
 
 from pydantic import BaseModel
 from termcolor import cprint
 
-from llama_stack.distribution.datatypes import BuildConfig, Provider
+from llama_stack.distribution.datatypes import BuildConfig
 from llama_stack.distribution.distribution import get_provider_registry
 from llama_stack.distribution.utils.exec import run_command
 from llama_stack.distribution.utils.image_types import LlamaStackImageType
 from llama_stack.providers.datatypes import Api
+from llama_stack.templates.template import DistributionTemplate
 
 log = logging.getLogger(__name__)
 
@@ -37,19 +37,24 @@ class ApiInput(BaseModel):
 
 
 def get_provider_dependencies(
-    config_providers: Dict[str, List[Provider]],
+    config: BuildConfig | DistributionTemplate,
 ) -> tuple[list[str], list[str]]:
     """Get normal and special dependencies from provider configuration."""
-    all_providers = get_provider_registry()
+    # Extract providers based on config type
+    if isinstance(config, DistributionTemplate):
+        providers = config.providers
+    elif isinstance(config, BuildConfig):
+        providers = config.distribution_spec.providers
     deps = []
+    registry = get_provider_registry(config)
 
-    for api_str, provider_or_providers in config_providers.items():
-        providers_for_api = all_providers[Api(api_str)]
+    for api_str, provider_or_providers in providers.items():
+        providers_for_api = registry[Api(api_str)]
 
         providers = provider_or_providers if isinstance(provider_or_providers, list) else [provider_or_providers]
 
         for provider in providers:
-            # Providers from BuildConfig and RunConfig are subtly different – not great
+            # Providers from BuildConfig and RunConfig are subtly different – not great
             provider_type = provider if isinstance(provider, str) else provider.provider_type
 
             if provider_type not in providers_for_api:
@@ -71,8 +76,8 @@ def get_provider_dependencies(
     return list(set(normal_deps)), list(set(special_deps))
 
 
-def print_pip_install_help(providers: Dict[str, List[Provider]]):
-    normal_deps, special_deps = get_provider_dependencies(providers)
+def print_pip_install_help(config: BuildConfig):
+    normal_deps, special_deps = get_provider_dependencies(config)
 
     cprint(
         f"Please install needed dependencies using the following commands:\n\nuv pip install {' '.join(normal_deps)}",
@@ -88,10 +93,11 @@ def build_image(
     build_file_path: Path,
     image_name: str,
     template_or_config: str,
+    run_config: str | None = None,
 ):
     container_base = build_config.distribution_spec.container_image or "python:3.10-slim"
 
-    normal_deps, special_deps = get_provider_dependencies(build_config.distribution_spec.providers)
+    normal_deps, special_deps = get_provider_dependencies(build_config)
     normal_deps += SERVER_DEPENDENCIES
 
     if build_config.image_type == LlamaStackImageType.CONTAINER.value:
@@ -103,6 +109,11 @@ def build_image(
             container_base,
             " ".join(normal_deps),
         ]
+
+        # When building from a config file (not a template), include the run config path in the
+        # build arguments
+        if run_config is not None:
+            args.append(run_config)
     elif build_config.image_type == LlamaStackImageType.CONDA.value:
         script = str(importlib.resources.files("llama_stack") / "distribution/build_conda_env.sh")
         args = [
diff --git a/llama_stack/distribution/build_container.sh b/llama_stack/distribution/build_container.sh
index ed83b7bff..ad316d45e 100755
--- a/llama_stack/distribution/build_container.sh
+++ b/llama_stack/distribution/build_container.sh
@@ -19,12 +19,16 @@ UV_HTTP_TIMEOUT=${UV_HTTP_TIMEOUT:-500}
 # mounting is not supported by docker buildx, so we use COPY instead
 USE_COPY_NOT_MOUNT=${USE_COPY_NOT_MOUNT:-}
 
+# Path to the run.yaml file in the container
+RUN_CONFIG_PATH=/app/run.yaml
+
+BUILD_CONTEXT_DIR=$(pwd)
+
 if [ "$#" -lt 4 ]; then
   # This only works for templates
-  echo "Usage: $0 <template_or_config> <image_name> <container_base> <pip_dependencies> [<special_pip_deps>]" >&2
+  echo "Usage: $0 <template_or_config> <image_name> <container_base> <pip_dependencies> [<run_config>] [<special_pip_deps>]" >&2
   exit 1
 fi
-
 set -euo pipefail
 
 template_or_config="$1"
@@ -35,8 +39,27 @@ container_base="$1"
 shift
 pip_dependencies="$1"
 shift
-special_pip_deps="${1:-}"
 
+# Handle optional arguments
+run_config=""
+special_pip_deps=""
+
+# Check if there are more arguments
+# The logics is becoming cumbersom, we should refactor it if we can do better
+if [ $# -gt 0 ]; then
+  # Check if the argument ends with .yaml
+  if [[ "$1" == *.yaml ]]; then
+    run_config="$1"
+    shift
+    # If there's another argument after .yaml, it must be special_pip_deps
+    if [ $# -gt 0 ]; then
+      special_pip_deps="$1"
+    fi
+  else
+    # If it's not .yaml, it must be special_pip_deps
+    special_pip_deps="$1"
+  fi
+fi
 
 # Define color codes
 RED='\033[0;31m'
@@ -72,9 +95,13 @@ if [[ $container_base == *"registry.access.redhat.com/ubi9"* ]]; then
 FROM $container_base
 WORKDIR /app
 
-RUN dnf -y update && dnf install -y iputils net-tools wget \
+# We install the Python 3.11 dev headers and build tools so that any
+# C‑extension wheels (e.g. polyleven, faiss‑cpu) can compile successfully.
+
+RUN dnf -y update && dnf install -y iputils git net-tools wget \
     vim-minimal python3.11 python3.11-pip python3.11-wheel \
-    python3.11-setuptools && ln -s /bin/pip3.11 /bin/pip && ln -s /bin/python3.11 /bin/python && dnf clean all
+    python3.11-setuptools python3.11-devel gcc make && \
+    ln -s /bin/pip3.11 /bin/pip && ln -s /bin/python3.11 /bin/python && dnf clean all
 
 ENV UV_SYSTEM_PYTHON=1
 RUN pip install uv
@@ -86,7 +113,7 @@ WORKDIR /app
 
 RUN apt-get update && apt-get install -y \
        iputils-ping net-tools iproute2 dnsutils telnet \
-       curl wget telnet \
+       curl wget telnet git\
        procps psmisc lsof \
        traceroute \
        bubblewrap \
@@ -115,6 +142,45 @@ EOF
   done
 fi
 
+# Function to get Python command
+get_python_cmd() {
+    if is_command_available python; then
+        echo "python"
+    elif is_command_available python3; then
+        echo "python3"
+    else
+        echo "Error: Neither python nor python3 is installed. Please install Python to continue." >&2
+        exit 1
+    fi
+}
+
+if [ -n "$run_config" ]; then
+  # Copy the run config to the build context since it's an absolute path
+  cp "$run_config" "$BUILD_CONTEXT_DIR/run.yaml"
+  add_to_container << EOF
+COPY run.yaml $RUN_CONFIG_PATH
+EOF
+
+  # Parse the run.yaml configuration to identify external provider directories
+  # If external providers are specified, copy their directory to the container
+  # and update the configuration to reference the new container path
+  python_cmd=$(get_python_cmd)
+  external_providers_dir=$($python_cmd -c "import yaml; config = yaml.safe_load(open('$run_config')); print(config.get('external_providers_dir') or '')")
+  if [ -n "$external_providers_dir" ]; then
+    echo "Copying external providers directory: $external_providers_dir"
+    add_to_container << EOF
+COPY $external_providers_dir /app/providers.d
+EOF
+    # Edit the run.yaml file to change the external_providers_dir to /app/providers.d
+    if [ "$(uname)" = "Darwin" ]; then
+      sed -i.bak -e 's|external_providers_dir:.*|external_providers_dir: /app/providers.d|' "$BUILD_CONTEXT_DIR/run.yaml"
+      rm -f "$BUILD_CONTEXT_DIR/run.yaml.bak"
+    else
+      sed -i 's|external_providers_dir:.*|external_providers_dir: /app/providers.d|' "$BUILD_CONTEXT_DIR/run.yaml"
+    fi
+  fi
+fi
+
 stack_mount="/app/llama-stack-source"
 client_mount="/app/llama-stack-client-source"
 
@@ -174,15 +240,16 @@ fi
 RUN pip uninstall -y uv
 EOF
 
-# if template_or_config ends with .yaml, it is not a template and we should not use the --template flag
-if [[ "$template_or_config" != *.yaml ]]; then
+# If a run config is provided, we use the --config flag
+if [[ -n "$run_config" ]]; then
+  add_to_container << EOF
+ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server", "--config", "$RUN_CONFIG_PATH"]
+EOF
+# If a template is provided (not a yaml file), we use the --template flag
+elif [[ "$template_or_config" != *.yaml ]]; then
   add_to_container << EOF
 ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server", "--template", "$template_or_config"]
 EOF
-else
-  add_to_container << EOF
-ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server"]
-EOF
 fi
 
 # Add other require item commands genearic to all containers
@@ -254,9 +321,10 @@ $CONTAINER_BINARY build \
   "${CLI_ARGS[@]}" \
   -t "$image_tag" \
   -f "$TEMP_DIR/Containerfile" \
-  "."
+  "$BUILD_CONTEXT_DIR"
 
 # clean up tmp/configs
+rm -f "$BUILD_CONTEXT_DIR/run.yaml"
 set +x
 
 echo "Success!"
diff --git a/llama_stack/distribution/datatypes.py b/llama_stack/distribution/datatypes.py
index b24b0ec50..38353c1ff 100644
--- a/llama_stack/distribution/datatypes.py
+++ b/llama_stack/distribution/datatypes.py
@@ -326,3 +326,12 @@ class BuildConfig(BaseModel):
         default="conda",
         description="Type of package to build (conda | container | venv)",
     )
+    image_name: Optional[str] = Field(
+        default=None,
+        description="Name of the distribution to build",
+    )
+    external_providers_dir: Optional[str] = Field(
+        default=None,
+        description="Path to directory containing external provider implementations. The providers packages will be resolved from this directory. "
+        "pip_packages MUST contain the provider package name.",
+    )
diff --git a/llama_stack/distribution/distribution.py b/llama_stack/distribution/distribution.py
index d4447139c..f948ddf1c 100644
--- a/llama_stack/distribution/distribution.py
+++ b/llama_stack/distribution/distribution.py
@@ -12,7 +12,6 @@ from typing import Any, Dict, List
 import yaml
 from pydantic import BaseModel
 
-from llama_stack.distribution.datatypes import StackRunConfig
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import (
     AdapterSpec,
@@ -97,7 +96,9 @@ def _load_inline_provider_spec(spec_data: Dict[str, Any], api: Api, provider_nam
     return spec
 
 
-def get_provider_registry(config: StackRunConfig | None = None) -> Dict[Api, Dict[str, ProviderSpec]]:
+def get_provider_registry(
+    config=None,
+) -> Dict[Api, Dict[str, ProviderSpec]]:
     """Get the provider registry, optionally including external providers.
 
     This function loads both built-in providers and external providers from YAML files.
@@ -122,7 +123,7 @@ def get_provider_registry(config: StackRunConfig | None = None) -> Dict[Api, Dic
           llama-guard.yaml
 
     Args:
-        config: Optional StackRunConfig containing the external providers directory path
+        config: Optional object containing the external providers directory path
 
     Returns:
         A dictionary mapping APIs to their available providers
@@ -142,7 +143,8 @@ def get_provider_registry(config: StackRunConfig | None = None) -> Dict[Api, Dic
         except ImportError as e:
             logger.warning(f"Failed to import module {name}: {e}")
 
-    if config and config.external_providers_dir:
+    # Check if config has the external_providers_dir attribute
+    if config and hasattr(config, "external_providers_dir") and config.external_providers_dir:
         external_providers_dir = os.path.abspath(config.external_providers_dir)
         if not os.path.exists(external_providers_dir):
             raise FileNotFoundError(f"External providers directory not found: {external_providers_dir}")
diff --git a/llama_stack/distribution/routers/routers.py b/llama_stack/distribution/routers/routers.py
index 17aecdaf8..d88df00bd 100644
--- a/llama_stack/distribution/routers/routers.py
+++ b/llama_stack/distribution/routers/routers.py
@@ -8,6 +8,11 @@ import asyncio
 import time
 from typing import Any, AsyncGenerator, AsyncIterator, Dict, List, Optional, Union
 
+from openai.types.chat import ChatCompletionToolChoiceOptionParam as OpenAIChatCompletionToolChoiceOptionParam
+from openai.types.chat import ChatCompletionToolParam as OpenAIChatCompletionToolParam
+from pydantic import Field, TypeAdapter
+from typing_extensions import Annotated
+
 from llama_stack.apis.common.content_types import (
     URL,
     InterleavedContent,
@@ -526,7 +531,7 @@ class InferenceRouter(Inference):
     async def openai_chat_completion(
         self,
         model: str,
-        messages: List[OpenAIMessageParam],
+        messages: Annotated[List[OpenAIMessageParam], Field(..., min_length=1)],
         frequency_penalty: Optional[float] = None,
         function_call: Optional[Union[str, Dict[str, Any]]] = None,
         functions: Optional[List[Dict[str, Any]]] = None,
@@ -558,6 +563,16 @@ class InferenceRouter(Inference):
         if model_obj.model_type == ModelType.embedding:
             raise ValueError(f"Model '{model}' is an embedding model and does not support chat completions")
 
+        # Use the OpenAI client for a bit of extra input validation without
+        # exposing the OpenAI client itself as part of our API surface
+        if tool_choice:
+            TypeAdapter(OpenAIChatCompletionToolChoiceOptionParam).validate_python(tool_choice)
+            if tools is None:
+                raise ValueError("'tool_choice' is only allowed when 'tools' is also provided")
+        if tools:
+            for tool in tools:
+                TypeAdapter(OpenAIChatCompletionToolParam).validate_python(tool)
+
         params = dict(
             model=model_obj.identifier,
             messages=messages,
diff --git a/llama_stack/distribution/server/server.py b/llama_stack/distribution/server/server.py
index 9bbb2ce88..6e9941d1c 100644
--- a/llama_stack/distribution/server/server.py
+++ b/llama_stack/distribution/server/server.py
@@ -22,6 +22,7 @@ from fastapi import Body, FastAPI, HTTPException, Request
 from fastapi import Path as FastapiPath
 from fastapi.exceptions import RequestValidationError
 from fastapi.responses import JSONResponse, StreamingResponse
+from openai import BadRequestError
 from pydantic import BaseModel, ValidationError
 from typing_extensions import Annotated
 
@@ -92,7 +93,7 @@ async def global_exception_handler(request: Request, exc: Exception):
 
 def translate_exception(exc: Exception) -> Union[HTTPException, RequestValidationError]:
     if isinstance(exc, ValidationError):
-        exc = RequestValidationError(exc.raw_errors)
+        exc = RequestValidationError(exc.errors())
 
     if isinstance(exc, RequestValidationError):
         return HTTPException(
@@ -110,6 +111,8 @@ def translate_exception(exc: Exception) -> Union[HTTPException, RequestValidatio
         )
     elif isinstance(exc, ValueError):
         return HTTPException(status_code=400, detail=f"Invalid value: {str(exc)}")
+    elif isinstance(exc, BadRequestError):
+        return HTTPException(status_code=400, detail=str(exc))
     elif isinstance(exc, PermissionError):
         return HTTPException(status_code=403, detail=f"Permission denied: {str(exc)}")
     elif isinstance(exc, TimeoutError):
@@ -162,14 +165,17 @@ async def maybe_await(value):
     return value
 
 
-async def sse_generator(event_gen):
+async def sse_generator(event_gen_coroutine):
+    event_gen = None
     try:
-        async for item in await event_gen:
+        event_gen = await event_gen_coroutine
+        async for item in event_gen:
             yield create_sse_event(item)
             await asyncio.sleep(0.01)
     except asyncio.CancelledError:
         logger.info("Generator cancelled")
-        await event_gen.aclose()
+        if event_gen:
+            await event_gen.aclose()
     except Exception as e:
         logger.exception("Error in sse_generator")
         yield create_sse_event(
@@ -455,6 +461,7 @@ def main(args: Optional[argparse.Namespace] = None):
                 raise ValueError(f"Could not find method {endpoint.name} on {impl}!!")
 
             impl_method = getattr(impl, endpoint.name)
+            logger.debug(f"{endpoint.method.upper()} {endpoint.route}")
 
             with warnings.catch_warnings():
                 warnings.filterwarnings("ignore", category=UserWarning, module="pydantic._internal._fields")
diff --git a/llama_stack/distribution/ui/page/playground/rag.py b/llama_stack/distribution/ui/page/playground/rag.py
index 392c9afe2..696d89bc2 100644
--- a/llama_stack/distribution/ui/page/playground/rag.py
+++ b/llama_stack/distribution/ui/page/playground/rag.py
@@ -24,6 +24,13 @@ def rag_chat_page():
     def should_disable_input():
         return "displayed_messages" in st.session_state and len(st.session_state.displayed_messages) > 0
 
+    def log_message(message):
+        with st.chat_message(message["role"]):
+            if "tool_output" in message and message["tool_output"]:
+                with st.expander(label="Tool Output", expanded=False, icon="🛠"):
+                    st.write(message["tool_output"])
+            st.markdown(message["content"])
+
     with st.sidebar:
         # File/Directory Upload Section
         st.subheader("Upload Documents", divider=True)
@@ -146,8 +153,7 @@ def rag_chat_page():
 
     # Display chat history
     for message in st.session_state.displayed_messages:
-        with st.chat_message(message["role"]):
-            st.markdown(message["content"])
+        log_message(message)
 
     if temperature > 0.0:
         strategy = {
@@ -201,7 +207,7 @@ def rag_chat_page():
 
         # Display assistant response
         with st.chat_message("assistant"):
-            retrieval_message_placeholder = st.empty()
+            retrieval_message_placeholder = st.expander(label="Tool Output", expanded=False, icon="🛠")
             message_placeholder = st.empty()
             full_response = ""
             retrieval_response = ""
@@ -209,14 +215,16 @@ def rag_chat_page():
                 log.print()
                 if log.role == "tool_execution":
                     retrieval_response += log.content.replace("====", "").strip()
-                    retrieval_message_placeholder.info(retrieval_response)
+                    retrieval_message_placeholder.write(retrieval_response)
                 else:
                     full_response += log.content
                     message_placeholder.markdown(full_response + "▌")
             message_placeholder.markdown(full_response)
 
             st.session_state.messages.append({"role": "assistant", "content": full_response})
-            st.session_state.displayed_messages.append({"role": "assistant", "content": full_response})
+            st.session_state.displayed_messages.append(
+                {"role": "assistant", "content": full_response, "tool_output": retrieval_response}
+            )
 
     def direct_process_prompt(prompt):
         # Add the system prompt in the beginning of the conversation
@@ -230,15 +238,14 @@ def rag_chat_page():
         prompt_context = rag_response.content
 
         with st.chat_message("assistant"):
+            with st.expander(label="Retrieval Output", expanded=False):
+                st.write(prompt_context)
+
             retrieval_message_placeholder = st.empty()
             message_placeholder = st.empty()
             full_response = ""
             retrieval_response = ""
 
-            # Display the retrieved content
-            retrieval_response += str(prompt_context)
-            retrieval_message_placeholder.info(retrieval_response)
-
             # Construct the extended prompt
             extended_prompt = f"Please answer the following query using the context below.\n\nCONTEXT:\n{prompt_context}\n\nQUERY:\n{prompt}"
 
diff --git a/llama_stack/distribution/ui/page/playground/tools.py b/llama_stack/distribution/ui/page/playground/tools.py
index e987f617b..6c6a9fcfd 100644
--- a/llama_stack/distribution/ui/page/playground/tools.py
+++ b/llama_stack/distribution/ui/page/playground/tools.py
@@ -4,14 +4,23 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+import enum
+import json
 import uuid
 
 import streamlit as st
 from llama_stack_client import Agent
+from llama_stack_client.lib.agents.react.agent import ReActAgent
+from llama_stack_client.lib.agents.react.tool_parser import ReActOutput
 
 from llama_stack.distribution.ui.modules.api import llama_stack_api
 
 
+class AgentType(enum.Enum):
+    REGULAR = "Regular"
+    REACT = "ReAct"
+
+
 def tool_chat_page():
     st.title("🛠 Tools")
 
@@ -23,50 +32,117 @@ def tool_chat_page():
     tool_groups_list = [tool_group.identifier for tool_group in tool_groups]
     mcp_tools_list = [tool for tool in tool_groups_list if tool.startswith("mcp::")]
     builtin_tools_list = [tool for tool in tool_groups_list if not tool.startswith("mcp::")]
+    selected_vector_dbs = []
 
     def reset_agent():
         st.session_state.clear()
         st.cache_resource.clear()
 
     with st.sidebar:
+        st.title("Configuration")
         st.subheader("Model")
-        model = st.selectbox(label="models", options=model_list, on_change=reset_agent)
+        model = st.selectbox(label="Model", options=model_list, on_change=reset_agent, label_visibility="collapsed")
+
+        st.subheader("Available ToolGroups")
 
-        st.subheader("Builtin Tools")
         toolgroup_selection = st.pills(
-            label="Available ToolGroups", options=builtin_tools_list, selection_mode="multi", on_change=reset_agent
+            label="Built-in tools",
+            options=builtin_tools_list,
+            selection_mode="multi",
+            on_change=reset_agent,
+            format_func=lambda tool: "".join(tool.split("::")[1:]),
+            help="List of built-in tools from your llama stack server.",
         )
 
-        st.subheader("MCP Servers")
+        if "builtin::rag" in toolgroup_selection:
+            vector_dbs = llama_stack_api.client.vector_dbs.list() or []
+            if not vector_dbs:
+                st.info("No vector databases available for selection.")
+            vector_dbs = [vector_db.identifier for vector_db in vector_dbs]
+            selected_vector_dbs = st.multiselect(
+                label="Select Document Collections to use in RAG queries",
+                options=vector_dbs,
+                on_change=reset_agent,
+            )
+
         mcp_selection = st.pills(
-            label="Available MCP Servers", options=mcp_tools_list, selection_mode="multi", on_change=reset_agent
+            label="MCP Servers",
+            options=mcp_tools_list,
+            selection_mode="multi",
+            on_change=reset_agent,
+            format_func=lambda tool: "".join(tool.split("::")[1:]),
+            help="List of MCP servers registered to your llama stack server.",
         )
 
         toolgroup_selection.extend(mcp_selection)
 
-        active_tool_list = []
-        for toolgroup_id in toolgroup_selection:
-            active_tool_list.extend(
-                [
-                    f"{''.join(toolgroup_id.split('::')[1:])}:{t.identifier}"
-                    for t in client.tools.list(toolgroup_id=toolgroup_id)
-                ]
-            )
+        grouped_tools = {}
+        total_tools = 0
 
-        st.subheader(f"Active Tools: 🛠 {len(active_tool_list)}")
-        st.json(active_tool_list)
+        for toolgroup_id in toolgroup_selection:
+            tools = client.tools.list(toolgroup_id=toolgroup_id)
+            grouped_tools[toolgroup_id] = [tool.identifier for tool in tools]
+            total_tools += len(tools)
+
+        st.markdown(f"Active Tools: 🛠 {total_tools}")
+
+        for group_id, tools in grouped_tools.items():
+            with st.expander(f"🔧 Tools from `{group_id}`"):
+                for idx, tool in enumerate(tools, start=1):
+                    st.markdown(f"{idx}. `{tool.split(':')[-1]}`")
+
+        st.subheader("Agent Configurations")
+        st.subheader("Agent Type")
+        agent_type = st.radio(
+            "Select Agent Type",
+            [AgentType.REGULAR, AgentType.REACT],
+            format_func=lambda x: x.value,
+            on_change=reset_agent,
+        )
+
+        max_tokens = st.slider(
+            "Max Tokens",
+            min_value=0,
+            max_value=4096,
+            value=512,
+            step=64,
+            help="The maximum number of tokens to generate",
+            on_change=reset_agent,
+        )
+
+    for i, tool_name in enumerate(toolgroup_selection):
+        if tool_name == "builtin::rag":
+            tool_dict = dict(
+                name="builtin::rag",
+                args={
+                    "vector_db_ids": list(selected_vector_dbs),
+                },
+            )
+            toolgroup_selection[i] = tool_dict
 
     @st.cache_resource
     def create_agent():
-        return Agent(
-            client,
-            model=model,
-            instructions="You are a helpful assistant. When you use a tool always respond with a summary of the result.",
-            tools=toolgroup_selection,
-            sampling_params={
-                "strategy": {"type": "greedy"},
-            },
-        )
+        if "agent_type" in st.session_state and st.session_state.agent_type == AgentType.REACT:
+            return ReActAgent(
+                client=client,
+                model=model,
+                tools=toolgroup_selection,
+                response_format={
+                    "type": "json_schema",
+                    "json_schema": ReActOutput.model_json_schema(),
+                },
+                sampling_params={"strategy": {"type": "greedy"}, "max_tokens": max_tokens},
+            )
+        else:
+            return Agent(
+                client,
+                model=model,
+                instructions="You are a helpful assistant. When you use a tool always respond with a summary of the result.",
+                tools=toolgroup_selection,
+                sampling_params={"strategy": {"type": "greedy"}, "max_tokens": max_tokens},
+            )
+
+    st.session_state.agent_type = agent_type
 
     agent = create_agent()
 
@@ -95,6 +171,158 @@ def tool_chat_page():
         )
 
         def response_generator(turn_response):
+            if st.session_state.get("agent_type") == AgentType.REACT:
+                return _handle_react_response(turn_response)
+            else:
+                return _handle_regular_response(turn_response)
+
+        def _handle_react_response(turn_response):
+            current_step_content = ""
+            final_answer = None
+            tool_results = []
+
+            for response in turn_response:
+                if not hasattr(response.event, "payload"):
+                    yield (
+                        "\n\n🚨 :red[_Llama Stack server Error:_]\n"
+                        "The response received is missing an expected `payload` attribute.\n"
+                        "This could indicate a malformed response or an internal issue within the server.\n\n"
+                        f"Error details: {response}"
+                    )
+                    return
+
+                payload = response.event.payload
+
+                if payload.event_type == "step_progress" and hasattr(payload.delta, "text"):
+                    current_step_content += payload.delta.text
+                    continue
+
+                if payload.event_type == "step_complete":
+                    step_details = payload.step_details
+
+                    if step_details.step_type == "inference":
+                        yield from _process_inference_step(current_step_content, tool_results, final_answer)
+                        current_step_content = ""
+                    elif step_details.step_type == "tool_execution":
+                        tool_results = _process_tool_execution(step_details, tool_results)
+                        current_step_content = ""
+                    else:
+                        current_step_content = ""
+
+            if not final_answer and tool_results:
+                yield from _format_tool_results_summary(tool_results)
+
+        def _process_inference_step(current_step_content, tool_results, final_answer):
+            try:
+                react_output_data = json.loads(current_step_content)
+                thought = react_output_data.get("thought")
+                action = react_output_data.get("action")
+                answer = react_output_data.get("answer")
+
+                if answer and answer != "null" and answer is not None:
+                    final_answer = answer
+
+                if thought:
+                    with st.expander("🤔 Thinking...", expanded=False):
+                        st.markdown(f":grey[__{thought}__]")
+
+                if action and isinstance(action, dict):
+                    tool_name = action.get("tool_name")
+                    tool_params = action.get("tool_params")
+                    with st.expander(f'🛠 Action: Using tool "{tool_name}"', expanded=False):
+                        st.json(tool_params)
+
+                if answer and answer != "null" and answer is not None:
+                    yield f"\n\n✅ **Final Answer:**\n{answer}"
+
+            except json.JSONDecodeError:
+                yield f"\n\nFailed to parse ReAct step content:\n```json\n{current_step_content}\n```"
+            except Exception as e:
+                yield f"\n\nFailed to process ReAct step: {e}\n```json\n{current_step_content}\n```"
+
+            return final_answer
+
+        def _process_tool_execution(step_details, tool_results):
+            try:
+                if hasattr(step_details, "tool_responses") and step_details.tool_responses:
+                    for tool_response in step_details.tool_responses:
+                        tool_name = tool_response.tool_name
+                        content = tool_response.content
+                        tool_results.append((tool_name, content))
+                        with st.expander(f'⚙️ Observation (Result from "{tool_name}")', expanded=False):
+                            try:
+                                parsed_content = json.loads(content)
+                                st.json(parsed_content)
+                            except json.JSONDecodeError:
+                                st.code(content, language=None)
+                else:
+                    with st.expander("⚙️ Observation", expanded=False):
+                        st.markdown(":grey[_Tool execution step completed, but no response data found._]")
+            except Exception as e:
+                with st.expander("⚙️ Error in Tool Execution", expanded=False):
+                    st.markdown(f":red[_Error processing tool execution: {str(e)}_]")
+
+            return tool_results
+
+        def _format_tool_results_summary(tool_results):
+            yield "\n\n**Here's what I found:**\n"
+            for tool_name, content in tool_results:
+                try:
+                    parsed_content = json.loads(content)
+
+                    if tool_name == "web_search" and "top_k" in parsed_content:
+                        yield from _format_web_search_results(parsed_content)
+                    elif "results" in parsed_content and isinstance(parsed_content["results"], list):
+                        yield from _format_results_list(parsed_content["results"])
+                    elif isinstance(parsed_content, dict) and len(parsed_content) > 0:
+                        yield from _format_dict_results(parsed_content)
+                    elif isinstance(parsed_content, list) and len(parsed_content) > 0:
+                        yield from _format_list_results(parsed_content)
+                except json.JSONDecodeError:
+                    yield f"\n**{tool_name}** was used but returned complex data. Check the observation for details.\n"
+                except (TypeError, AttributeError, KeyError, IndexError) as e:
+                    print(f"Error processing {tool_name} result: {type(e).__name__}: {e}")
+
+        def _format_web_search_results(parsed_content):
+            for i, result in enumerate(parsed_content["top_k"], 1):
+                if i <= 3:
+                    title = result.get("title", "Untitled")
+                    url = result.get("url", "")
+                    content_text = result.get("content", "").strip()
+                    yield f"\n- **{title}**\n  {content_text}\n  [Source]({url})\n"
+
+        def _format_results_list(results):
+            for i, result in enumerate(results, 1):
+                if i <= 3:
+                    if isinstance(result, dict):
+                        name = result.get("name", result.get("title", "Result " + str(i)))
+                        description = result.get("description", result.get("content", result.get("summary", "")))
+                        yield f"\n- **{name}**\n  {description}\n"
+                    else:
+                        yield f"\n- {result}\n"
+
+        def _format_dict_results(parsed_content):
+            yield "\n```\n"
+            for key, value in list(parsed_content.items())[:5]:
+                if isinstance(value, str) and len(value) < 100:
+                    yield f"{key}: {value}\n"
+                else:
+                    yield f"{key}: [Complex data]\n"
+            yield "```\n"
+
+        def _format_list_results(parsed_content):
+            yield "\n"
+            for _, item in enumerate(parsed_content[:3], 1):
+                if isinstance(item, str):
+                    yield f"- {item}\n"
+                elif isinstance(item, dict) and "text" in item:
+                    yield f"- {item['text']}\n"
+                elif isinstance(item, dict) and len(item) > 0:
+                    first_value = next(iter(item.values()))
+                    if isinstance(first_value, str) and len(first_value) < 100:
+                        yield f"- {first_value}\n"
+
+        def _handle_regular_response(turn_response):
             for response in turn_response:
                 if hasattr(response.event, "payload"):
                     print(response.event.payload)
@@ -103,14 +331,18 @@ def tool_chat_page():
                             yield response.event.payload.delta.text
                     if response.event.payload.event_type == "step_complete":
                         if response.event.payload.step_details.step_type == "tool_execution":
-                            yield " 🛠 "
+                            if response.event.payload.step_details.tool_calls:
+                                tool_name = str(response.event.payload.step_details.tool_calls[0].tool_name)
+                                yield f'\n\n🛠 :grey[_Using "{tool_name}" tool:_]\n\n'
+                            else:
+                                yield "No tool_calls present in step_details"
                 else:
                     yield f"Error occurred in the Llama Stack Cluster: {response}"
 
         with st.chat_message("assistant"):
-            response = st.write_stream(response_generator(turn_response))
+            response_content = st.write_stream(response_generator(turn_response))
 
-        st.session_state.messages.append({"role": "assistant", "content": response})
+        st.session_state.messages.append({"role": "assistant", "content": response_content})
 
 
 tool_chat_page()
diff --git a/llama_stack/models/llama/llama4/chat_format.py b/llama_stack/models/llama/llama4/chat_format.py
index 9d60d00e9..1574eeb5e 100644
--- a/llama_stack/models/llama/llama4/chat_format.py
+++ b/llama_stack/models/llama/llama4/chat_format.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 import io
+import json
 import uuid
 from dataclasses import dataclass
 from typing import Dict, List, Optional, Tuple
@@ -299,8 +300,10 @@ class ChatFormat:
                     call_id=call_id,
                     tool_name=tool_name,
                     arguments=tool_arguments,
+                    arguments_json=json.dumps(tool_arguments),
                 )
             )
+            content = ""
 
         return RawMessage(
             role="assistant",
diff --git a/llama_stack/models/llama/llama4/prompt_format.md b/llama_stack/models/llama/llama4/prompt_format.md
index 698571093..350a5517a 100644
--- a/llama_stack/models/llama/llama4/prompt_format.md
+++ b/llama_stack/models/llama/llama4/prompt_format.md
@@ -64,7 +64,7 @@ This example passes an image that is smaller than the tile size, to show the til
 
 ##### Model Response Format
 ```
-The image depicts a dog standing on a skateboard, with its front paws positioned on the board and its back paws hanging off the back. The dog has a distinctive coat pattern, featuring a white face, brown and black fur, and white paws, and is standing on a skateboard with red wheels, set against a blurred background of a street or alleyway with a teal door and beige wall.<|eot|>
+The image depicts a dog standing on a skateboard, positioned centrally and facing the camera directly. The dog has a distinctive coat pattern featuring white, black, and brown fur, with floppy ears and a black nose, and is standing on a skateboard with red wheels.<|eot|>
 ```
 
 
@@ -91,7 +91,7 @@ Here is an example of how to pass an image to the model
 
 ##### Model Response Format
 ```
-This image shows a dog standing on a skateboard, with its front paws positioned near the front of the board and its back paws near the back. The dog has a white, black, and orange coat, and is standing on a gray skateboard with red wheels, in front of a blurred background that appears to be a street or alleyway.<|eot|>
+The image depicts a dog standing on a skateboard, with the dog positioned centrally and facing forward. The dog has a distinctive coat featuring a mix of white, brown, and black fur, and is wearing a collar as it stands on the skateboard, which has red wheels.<|eot|>
 ```
 
 
@@ -117,7 +117,7 @@ Here is an example of how to pass an image to the model
 
 ##### Model Response Format
 ```
-The first image shows a dog standing on a skateboard, while the second image shows a plate of spaghetti with tomato sauce, parmesan cheese, and parsley. The two images are unrelated, with the first image featuring a dog and the second image featuring a food dish, and they do not share any common elements or themes.<|eot|>
+The first image features a dog standing on a skateboard, while the second image showcases a plate of spaghetti with tomato sauce and cheese. The two images appear to be unrelated, with one depicting a playful scene of a dog on a skateboard and the other presenting a classic Italian dish.<|eom|>
 ```
 
 
@@ -135,13 +135,44 @@ We are continuing the format for zero shot function calling used in previous ver
 ```
 <|begin_of_text|><|header_start|>system<|header_end|>
 
-You are an expert in composing functions. You are given a question and a set of possible functions.
-Based on the question, you will need to make one or more function/tool calls to achieve the purpose.
-If none of the function can be used, point it out. If the given question lacks the parameters required by the function,
-also point it out. You should only return the function call in tools call sections.
+You are a helpful assistant and an expert in function composition. You can answer general questions using your internal knowledge OR invoke functions when necessary. Follow these strict guidelines:
+
+1. FUNCTION CALLS:
+- ONLY use functions that are EXPLICITLY listed in the function list below
+- If NO functions are listed (empty function list []), respond ONLY with internal knowledge or "I don't have access to [Unavailable service] information"
+- If a function is not in the list, respond ONLY with internal knowledge or "I don't have access to [Unavailable service] information"
+- If ALL required parameters are present AND the query EXACTLY matches a listed function's purpose: output ONLY the function call(s)
+- Use exact format: [func_name1(param1=value1, param2=value2), func_name2(...)]
+Examples:
+CORRECT: [get_weather(location="Vancouver"), calculate_route(start="Boston", end="New York")] <- Only if get_weather and calculate_route are in function list
+INCORRECT: get_weather(location="New York")
+INCORRECT: Let me check the weather: [get_weather(location="New York")]
+INCORRECT: [get_events(location="Singapore")] <- If function not in list
+
+2. RESPONSE RULES:
+- For pure function requests matching a listed function: ONLY output the function call(s)
+- For knowledge questions: ONLY output text
+- For missing parameters: ONLY request the specific missing parameters
+- For unavailable services (not in function list): output ONLY with internal knowledge or "I don't have access to [Unavailable service] information". Do NOT execute a function call.
+- If the query asks for information beyond what a listed function provides: output ONLY with internal knowledge about your limitations
+- NEVER combine text and function calls in the same response
+- NEVER suggest alternative functions when the requested service is unavailable
+- NEVER create or invent new functions not listed below
+
+3. STRICT BOUNDARIES:
+- ONLY use functions from the list below - no exceptions
+- NEVER use a function as an alternative to unavailable information
+- NEVER call functions not present in the function list
+- NEVER add explanatory text to function calls
+- NEVER respond with empty brackets
+- Use proper Python/JSON syntax for function calls
+- Check the function list carefully before responding
+
+4. TOOL RESPONSE HANDLING:
+- When receiving tool responses: provide concise, natural language responses
+- Don't repeat tool response verbatim
+- Don't add supplementary information
 
-If you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]
-You SHOULD NOT include any other text in the response.
 
 Here is a list of functions in JSON format that you can invoke.
 
@@ -151,9 +182,7 @@ Here is a list of functions in JSON format that you can invoke.
         "description": "Get weather info for places",
         "parameters": {
             "type": "dict",
-            "required": [
-                "city"
-            ],
+            "required": ["city"],
             "properties": {
                 "city": {
                     "type": "string",
@@ -167,7 +196,10 @@ Here is a list of functions in JSON format that you can invoke.
             }
         }
     }
-<|eot|><|header_start|>user<|header_end|>
+]
+
+You can answer general questions or invoke tools when necessary.
+In addition to tool calls, you should also augment your responses by using the tool outputs.<|eot|><|header_start|>user<|header_end|>
 
 What is the weather in SF and Seattle?<|eot|><|header_start|>assistant<|header_end|>
 
@@ -176,7 +208,7 @@ What is the weather in SF and Seattle?<|eot|><|header_start|>assistant<|header_e
 
 ##### Model Response Format
 ```
-[get_weather(city='SF'), get_weather(city='Seattle')]<|eot|>
+[get_weather(city="San Francisco"), get_weather(city="Seattle")]<|eot|>
 ```
 
 
@@ -273,5 +305,5 @@ Use tools to get latest trending songs<|eot|><|header_start|>assistant<|header_e
 
 ##### Model Response Format
 ```
-<function=trending_songs>{"n": "10"}</function><|eot|>
+<function=trending_songs>{"n": 10}</function><|eot|>
 ```
diff --git a/llama_stack/models/llama/llama4/prompt_templates/system_prompts.py b/llama_stack/models/llama/llama4/prompt_templates/system_prompts.py
new file mode 100644
index 000000000..139e204ad
--- /dev/null
+++ b/llama_stack/models/llama/llama4/prompt_templates/system_prompts.py
@@ -0,0 +1,144 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# top-level folder for each specific model found within the models/ directory at
+# the top-level of this source tree.
+
+import textwrap
+from typing import List, Optional
+
+from llama_stack.apis.inference import ToolDefinition, ToolParamDefinition
+from llama_stack.models.llama.llama3.prompt_templates.base import (
+    PromptTemplate,
+    PromptTemplateGeneratorBase,
+)
+
+
+class PythonListCustomToolGenerator(PromptTemplateGeneratorBase):  # noqa: N801
+    DEFAULT_PROMPT = textwrap.dedent(
+        """
+        You are a helpful assistant and an expert in function composition. You can answer general questions using your internal knowledge OR invoke functions when necessary. Follow these strict guidelines:
+
+        1. FUNCTION CALLS:
+        - ONLY use functions that are EXPLICITLY listed in the function list below
+        - If NO functions are listed (empty function list []), respond ONLY with internal knowledge or "I don't have access to [Unavailable service] information"
+        - If a function is not in the list, respond ONLY with internal knowledge or "I don't have access to [Unavailable service] information"
+        - If ALL required parameters are present AND the query EXACTLY matches a listed function's purpose: output ONLY the function call(s)
+        - Use exact format: [func_name1(param1=value1, param2=value2), func_name2(...)]
+        Examples:
+        CORRECT: [get_weather(location="Vancouver"), calculate_route(start="Boston", end="New York")] <- Only if get_weather and calculate_route are in function list
+        INCORRECT: get_weather(location="New York")
+        INCORRECT: Let me check the weather: [get_weather(location="New York")]
+        INCORRECT: [get_events(location="Singapore")] <- If function not in list
+
+        2. RESPONSE RULES:
+        - For pure function requests matching a listed function: ONLY output the function call(s)
+        - For knowledge questions: ONLY output text
+        - For missing parameters: ONLY request the specific missing parameters
+        - For unavailable services (not in function list): output ONLY with internal knowledge or "I don't have access to [Unavailable service] information". Do NOT execute a function call.
+        - If the query asks for information beyond what a listed function provides: output ONLY with internal knowledge about your limitations
+        - NEVER combine text and function calls in the same response
+        - NEVER suggest alternative functions when the requested service is unavailable
+        - NEVER create or invent new functions not listed below
+
+        3. STRICT BOUNDARIES:
+        - ONLY use functions from the list below - no exceptions
+        - NEVER use a function as an alternative to unavailable information
+        - NEVER call functions not present in the function list
+        - NEVER add explanatory text to function calls
+        - NEVER respond with empty brackets
+        - Use proper Python/JSON syntax for function calls
+        - Check the function list carefully before responding
+
+        4. TOOL RESPONSE HANDLING:
+        - When receiving tool responses: provide concise, natural language responses
+        - Don't repeat tool response verbatim
+        - Don't add supplementary information
+
+
+        {{ function_description }}
+        """.strip("\n")
+    )
+
+    def gen(self, custom_tools: List[ToolDefinition], system_prompt: Optional[str] = None) -> PromptTemplate:
+        system_prompt = system_prompt or self.DEFAULT_PROMPT
+        return PromptTemplate(
+            system_prompt,
+            {"function_description": self._gen_function_description(custom_tools)},
+        )
+
+    def _gen_function_description(self, custom_tools: List[ToolDefinition]) -> PromptTemplate:
+        template_str = textwrap.dedent(
+            """
+            Here is a list of functions in JSON format that you can invoke.
+
+            [
+                {% for t in tools -%}
+                {# manually setting up JSON because jinja sorts keys in unexpected ways -#}
+                {%- set tname = t.tool_name -%}
+                {%- set tdesc = t.description -%}
+                {%- set tparams = t.parameters -%}
+                {%- set required_params = [] -%}
+                {%- for name, param in tparams.items() if param.required == true -%}
+                    {%- set _ = required_params.append(name) -%}
+                {%- endfor -%}
+                {
+                    "name": "{{tname}}",
+                    "description": "{{tdesc}}",
+                    "parameters": {
+                        "type": "dict",
+                        "required": {{ required_params | tojson }},
+                        "properties": {
+                            {%- for name, param in tparams.items() %}
+                            "{{name}}": {
+                                "type": "{{param.param_type}}",
+                                "description": "{{param.description}}"{% if param.default %},
+                                "default": "{{param.default}}"{% endif %}
+                            }{% if not loop.last %},{% endif %}
+                            {%- endfor %}
+                        }
+                    }
+                }{% if not loop.last %},
+                {% endif -%}
+                {%- endfor %}
+            ]
+
+            You can answer general questions or invoke tools when necessary.
+            In addition to tool calls, you should also augment your responses by using the tool outputs.
+
+            """
+        )
+        return PromptTemplate(
+            template_str.strip("\n"),
+            {"tools": [t.model_dump() for t in custom_tools]},
+        ).render()
+
+    def data_examples(self) -> List[List[ToolDefinition]]:
+        return [
+            [
+                ToolDefinition(
+                    tool_name="get_weather",
+                    description="Get weather info for places",
+                    parameters={
+                        "city": ToolParamDefinition(
+                            param_type="string",
+                            description="The name of the city to get the weather for",
+                            required=True,
+                        ),
+                        "metric": ToolParamDefinition(
+                            param_type="string",
+                            description="The metric for weather. Options are: celsius, fahrenheit",
+                            required=False,
+                            default="celsius",
+                        ),
+                    },
+                ),
+            ]
+        ]
diff --git a/llama_stack/models/llama/llama4/prompts.py b/llama_stack/models/llama/llama4/prompts.py
index 13b96359a..fe9a59130 100644
--- a/llama_stack/models/llama/llama4/prompts.py
+++ b/llama_stack/models/llama/llama4/prompts.py
@@ -9,6 +9,10 @@ from io import BytesIO
 from pathlib import Path
 from typing import List
 
+from llama_stack.models.llama.llama4.prompt_templates.system_prompts import (
+    PythonListCustomToolGenerator,
+)
+
 from ..datatypes import RawMediaItem, RawMessage, RawTextItem
 from ..prompt_format import (
     Llama4UseCase,
@@ -177,39 +181,9 @@ def usecases(base_model: bool = False) -> List[UseCase | str]:
                     [
                         RawMessage(
                             role="system",
-                            content="""You are an expert in composing functions. You are given a question and a set of possible functions.
-Based on the question, you will need to make one or more function/tool calls to achieve the purpose.
-If none of the function can be used, point it out. If the given question lacks the parameters required by the function,
-also point it out. You should only return the function call in tools call sections.
-
-If you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]
-You SHOULD NOT include any other text in the response.
-
-Here is a list of functions in JSON format that you can invoke.
-
-[
-    {
-        "name": "get_weather",
-        "description": "Get weather info for places",
-        "parameters": {
-            "type": "dict",
-            "required": [
-                "city"
-            ],
-            "properties": {
-                "city": {
-                    "type": "string",
-                    "description": "The name of the city to get the weather for"
-                },
-                "metric": {
-                    "type": "string",
-                    "description": "The metric for weather. Options are: celsius, fahrenheit",
-                    "default": "celsius"
-                }
-            }
-        }
-    }
-""",
+                            content=PythonListCustomToolGenerator()
+                            .gen(PythonListCustomToolGenerator().data_examples()[0])
+                            .render(),
                         ),
                         RawMessage(
                             role="user",
diff --git a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
index f441d6eb6..b5714b438 100644
--- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
+++ b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
@@ -178,6 +178,8 @@ class ChatAgent(ShieldRunnerMixin):
             span.set_attribute("request", request.model_dump_json())
             turn_id = str(uuid.uuid4())
             span.set_attribute("turn_id", turn_id)
+            if self.agent_config.name:
+                span.set_attribute("agent_name", self.agent_config.name)
 
         await self._initialize_tools(request.toolgroups)
         async for chunk in self._run_turn(request, turn_id):
@@ -190,6 +192,8 @@ class ChatAgent(ShieldRunnerMixin):
             span.set_attribute("session_id", request.session_id)
             span.set_attribute("request", request.model_dump_json())
             span.set_attribute("turn_id", request.turn_id)
+            if self.agent_config.name:
+                span.set_attribute("agent_name", self.agent_config.name)
 
         await self._initialize_tools()
         async for chunk in self._run_turn(request):
@@ -498,6 +502,8 @@ class ChatAgent(ShieldRunnerMixin):
             stop_reason = None
 
             async with tracing.span("inference") as span:
+                if self.agent_config.name:
+                    span.set_attribute("agent_name", self.agent_config.name)
                 async for chunk in await self.inference_api.chat_completion(
                     self.agent_config.model,
                     input_messages,
diff --git a/llama_stack/providers/inline/inference/meta_reference/inference.py b/llama_stack/providers/inline/inference/meta_reference/inference.py
index 2b9a27982..1bc098fab 100644
--- a/llama_stack/providers/inline/inference/meta_reference/inference.py
+++ b/llama_stack/providers/inline/inference/meta_reference/inference.py
@@ -253,7 +253,8 @@ class MetaReferenceInferenceImpl(
         def impl():
             stop_reason = None
 
-            for token_result in self.generator.completion(request):
+            for token_results in self.generator.completion([request]):
+                token_result = token_results[0]
                 if token_result.token == tokenizer.eot_id:
                     stop_reason = StopReason.end_of_turn
                     text = ""
@@ -515,7 +516,8 @@ class MetaReferenceInferenceImpl(
             stop_reason = None
             ipython = False
 
-            for token_result in self.generator.chat_completion(request):
+            for token_results in self.generator.chat_completion([request]):
+                token_result = token_results[0]
                 if os.environ.get("LLAMA_MODELS_DEBUG", "0") == "1":
                     cprint(token_result.text, "cyan", end="")
                 if os.environ.get("LLAMA_MODELS_DEBUG", "0") == "2":
diff --git a/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py b/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py
index 8752f06f3..8c0ffc632 100644
--- a/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py
+++ b/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py
@@ -69,7 +69,10 @@ class CancelSentinel(BaseModel):
 
 class TaskRequest(BaseModel):
     type: Literal[ProcessingMessageName.task_request] = ProcessingMessageName.task_request
-    task: Tuple[str, List[CompletionRequestWithRawContent] | List[ChatCompletionRequestWithRawContent]]
+    task: Tuple[
+        str,
+        List[CompletionRequestWithRawContent] | List[ChatCompletionRequestWithRawContent],
+    ]
 
 
 class TaskResponse(BaseModel):
@@ -231,10 +234,10 @@ def worker_process_entrypoint(
     while True:
         try:
             task = req_gen.send(result)
-            if isinstance(task, str) and task == EndSentinel():
+            if isinstance(task, EndSentinel):
                 break
 
-            assert isinstance(task, TaskRequest)
+            assert isinstance(task, TaskRequest), task
             result = model(task.task)
         except StopIteration:
             break
@@ -331,7 +334,10 @@ class ModelParallelProcessGroup:
 
     def run_inference(
         self,
-        req: Tuple[str, List[CompletionRequestWithRawContent] | List[ChatCompletionRequestWithRawContent]],
+        req: Tuple[
+            str,
+            List[CompletionRequestWithRawContent] | List[ChatCompletionRequestWithRawContent],
+        ],
     ) -> Generator:
         assert not self.running, "inference already running"
 
diff --git a/llama_stack/providers/inline/tool_runtime/rag/memory.py b/llama_stack/providers/inline/tool_runtime/rag/memory.py
index 97c53d454..8d4689e5d 100644
--- a/llama_stack/providers/inline/tool_runtime/rag/memory.py
+++ b/llama_stack/providers/inline/tool_runtime/rag/memory.py
@@ -33,6 +33,7 @@ from llama_stack.apis.tools import (
 )
 from llama_stack.apis.vector_io import QueryChunksResponse, VectorIO
 from llama_stack.providers.datatypes import ToolsProtocolPrivate
+from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
 from llama_stack.providers.utils.memory.vector_store import (
     content_from_doc,
     make_overlapped_chunks,
@@ -153,6 +154,11 @@ class MemoryToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime, RAGToolRuntime):
                 )
             )
         picked.append(TextContentItem(text="END of knowledge_search tool results.\n"))
+        picked.append(
+            TextContentItem(
+                text=f'The above results were retrieved to help answer the user\'s query: "{interleaved_content_as_str(content)}". Use them as supporting information only in answering this query.\n',
+            )
+        )
 
         return RAGQueryResult(
             content=picked,
diff --git a/llama_stack/providers/registry/inference.py b/llama_stack/providers/registry/inference.py
index 3c54cabcf..4040f0d80 100644
--- a/llama_stack/providers/registry/inference.py
+++ b/llama_stack/providers/registry/inference.py
@@ -288,4 +288,14 @@ def available_providers() -> List[ProviderSpec]:
                 provider_data_validator="llama_stack.providers.remote.inference.passthrough.PassthroughProviderDataValidator",
             ),
         ),
+        remote_provider_spec(
+            api=Api.inference,
+            adapter=AdapterSpec(
+                adapter_type="watsonx",
+                pip_packages=["ibm_watson_machine_learning"],
+                module="llama_stack.providers.remote.inference.watsonx",
+                config_class="llama_stack.providers.remote.inference.watsonx.WatsonXConfig",
+                provider_data_validator="llama_stack.providers.remote.inference.watsonx.WatsonXProviderDataValidator",
+            ),
+        ),
     ]
diff --git a/llama_stack/providers/remote/eval/nvidia/README.md b/llama_stack/providers/remote/eval/nvidia/README.md
index 08aac78fc..cebc77920 100644
--- a/llama_stack/providers/remote/eval/nvidia/README.md
+++ b/llama_stack/providers/remote/eval/nvidia/README.md
@@ -77,7 +77,7 @@ POST /eval/benchmarks/{benchmark_id}/jobs
   "benchmark_config": {
     "eval_candidate": {
       "type": "model",
-      "model": "meta/llama-3.1-8b-instruct",
+      "model": "meta-llama/Llama3.1-8B-Instruct",
       "sampling_params": {
         "max_tokens": 100,
         "temperature": 0.7
@@ -91,7 +91,7 @@ POST /eval/benchmarks/{benchmark_id}/jobs
 Response example:
 ```json
 {
-    "job_id": "1234",
+    "job_id": "eval-1234",
     "status": "in_progress"
 }
 ```
@@ -101,6 +101,14 @@ Response example:
 GET /eval/benchmarks/{benchmark_id}/jobs/{job_id}
 ```
 
+Response example:
+```json
+{
+  "job_id": "eval-1234",
+  "status": "in_progress"
+}
+```
+
 ### Example for cancelling a job
 ```
 POST /eval/benchmarks/{benchmark_id}/jobs/{job_id}/cancel
diff --git a/llama_stack/providers/remote/eval/nvidia/config.py b/llama_stack/providers/remote/eval/nvidia/config.py
index acd6e6e36..b660fcd68 100644
--- a/llama_stack/providers/remote/eval/nvidia/config.py
+++ b/llama_stack/providers/remote/eval/nvidia/config.py
@@ -14,10 +14,10 @@ class NVIDIAEvalConfig(BaseModel):
      Configuration for the NVIDIA NeMo Evaluator microservice endpoint.
 
     Attributes:
-        evaluator_service_url (str): A base url for accessing the NVIDIA evaluation endpoint, e.g. http://localhost:8000.
+        evaluator_url (str): A base url for accessing the NVIDIA evaluation endpoint, e.g. http://localhost:8000.
     """
 
-    evaluator_service_url: str = Field(
+    evaluator_url: str = Field(
         default_factory=lambda: os.getenv("NVIDIA_EVALUATOR_URL", "http://0.0.0.0:7331"),
         description="The url for accessing the evaluator service",
     )
@@ -25,5 +25,5 @@ class NVIDIAEvalConfig(BaseModel):
     @classmethod
     def sample_run_config(cls, **kwargs) -> Dict[str, Any]:
         return {
-            "evaluator_service_url": "${env.NVIDIA_EVALUATOR_URL:http://localhost:7331}",
+            "evaluator_url": "${env.NVIDIA_EVALUATOR_URL:http://localhost:7331}",
         }
diff --git a/llama_stack/providers/remote/eval/nvidia/eval.py b/llama_stack/providers/remote/eval/nvidia/eval.py
index 5d25c66f8..e1a3b5355 100644
--- a/llama_stack/providers/remote/eval/nvidia/eval.py
+++ b/llama_stack/providers/remote/eval/nvidia/eval.py
@@ -53,13 +53,13 @@ class NVIDIAEvalImpl(
 
     async def _evaluator_get(self, path):
         """Helper for making GET requests to the evaluator service."""
-        response = requests.get(url=f"{self.config.evaluator_service_url}{path}")
+        response = requests.get(url=f"{self.config.evaluator_url}{path}")
         response.raise_for_status()
         return response.json()
 
     async def _evaluator_post(self, path, data):
         """Helper for making POST requests to the evaluator service."""
-        response = requests.post(url=f"{self.config.evaluator_service_url}{path}", json=data)
+        response = requests.post(url=f"{self.config.evaluator_url}{path}", json=data)
         response.raise_for_status()
         return response.json()
 
diff --git a/llama_stack/providers/remote/inference/fireworks/fireworks.py b/llama_stack/providers/remote/inference/fireworks/fireworks.py
index 48c163c87..58678a9cc 100644
--- a/llama_stack/providers/remote/inference/fireworks/fireworks.py
+++ b/llama_stack/providers/remote/inference/fireworks/fireworks.py
@@ -362,6 +362,39 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProv
         user: Optional[str] = None,
     ) -> Union[OpenAIChatCompletion, AsyncIterator[OpenAIChatCompletionChunk]]:
         model_obj = await self.model_store.get_model(model)
+
+        # Divert Llama Models through Llama Stack inference APIs because
+        # Fireworks chat completions OpenAI-compatible API does not support
+        # tool calls properly.
+        llama_model = self.get_llama_model(model_obj.provider_resource_id)
+        if llama_model:
+            return await OpenAIChatCompletionToLlamaStackMixin.openai_chat_completion(
+                self,
+                model=model,
+                messages=messages,
+                frequency_penalty=frequency_penalty,
+                function_call=function_call,
+                functions=functions,
+                logit_bias=logit_bias,
+                logprobs=logprobs,
+                max_completion_tokens=max_completion_tokens,
+                max_tokens=max_tokens,
+                n=n,
+                parallel_tool_calls=parallel_tool_calls,
+                presence_penalty=presence_penalty,
+                response_format=response_format,
+                seed=seed,
+                stop=stop,
+                stream=stream,
+                stream_options=stream_options,
+                temperature=temperature,
+                tool_choice=tool_choice,
+                tools=tools,
+                top_logprobs=top_logprobs,
+                top_p=top_p,
+                user=user,
+            )
+
         params = await prepare_openai_completion_params(
             messages=messages,
             frequency_penalty=frequency_penalty,
@@ -387,11 +420,4 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProv
             user=user,
         )
 
-        # Divert Llama Models through Llama Stack inference APIs because
-        # Fireworks chat completions OpenAI-compatible API does not support
-        # tool calls properly.
-        llama_model = self.get_llama_model(model_obj.provider_resource_id)
-        if llama_model:
-            return await OpenAIChatCompletionToLlamaStackMixin.openai_chat_completion(self, model=model, **params)
-
         return await self._get_openai_client().chat.completions.create(model=model_obj.provider_resource_id, **params)
diff --git a/llama_stack/providers/remote/inference/nvidia/NVIDIA.md b/llama_stack/providers/remote/inference/nvidia/NVIDIA.md
new file mode 100644
index 000000000..a353c67f5
--- /dev/null
+++ b/llama_stack/providers/remote/inference/nvidia/NVIDIA.md
@@ -0,0 +1,85 @@
+# NVIDIA Inference Provider for LlamaStack
+
+This provider enables running inference using NVIDIA NIM.
+
+## Features
+- Endpoints for completions, chat completions, and embeddings for registered models
+
+## Getting Started
+
+### Prerequisites
+
+- LlamaStack with NVIDIA configuration
+- Access to NVIDIA NIM deployment
+- NIM for model to use for inference is deployed
+
+### Setup
+
+Build the NVIDIA environment:
+
+```bash
+llama stack build --template nvidia --image-type conda
+```
+
+### Basic Usage using the LlamaStack Python Client
+
+#### Initialize the client
+
+```python
+import os
+
+os.environ["NVIDIA_API_KEY"] = (
+    ""  # Required if using hosted NIM endpoint. If self-hosted, not required.
+)
+os.environ["NVIDIA_BASE_URL"] = "http://nim.test"  # NIM URL
+
+from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
+
+client = LlamaStackAsLibraryClient("nvidia")
+client.initialize()
+```
+
+### Create Completion
+
+```python
+response = client.completion(
+    model_id="meta-llama/Llama-3.1-8b-Instruct",
+    content="Complete the sentence using one word: Roses are red, violets are :",
+    stream=False,
+    sampling_params={
+        "max_tokens": 50,
+    },
+)
+print(f"Response: {response.content}")
+```
+
+### Create Chat Completion
+
+```python
+response = client.chat_completion(
+    model_id="meta-llama/Llama-3.1-8b-Instruct",
+    messages=[
+        {
+            "role": "system",
+            "content": "You must respond to each message with only one word",
+        },
+        {
+            "role": "user",
+            "content": "Complete the sentence using one word: Roses are red, violets are:",
+        },
+    ],
+    stream=False,
+    sampling_params={
+        "max_tokens": 50,
+    },
+)
+print(f"Response: {response.completion_message.content}")
+```
+
+### Create Embeddings
+```python
+response = client.embeddings(
+    model_id="meta-llama/Llama-3.1-8b-Instruct", contents=["foo", "bar", "baz"]
+)
+print(f"Embeddings: {response.embeddings}")
+```
diff --git a/llama_stack/providers/remote/inference/nvidia/models.py b/llama_stack/providers/remote/inference/nvidia/models.py
index 964125148..127a6ca59 100644
--- a/llama_stack/providers/remote/inference/nvidia/models.py
+++ b/llama_stack/providers/remote/inference/nvidia/models.py
@@ -48,6 +48,10 @@ MODEL_ENTRIES = [
         "meta/llama-3.2-90b-vision-instruct",
         CoreModelId.llama3_2_90b_vision_instruct.value,
     ),
+    build_hf_repo_model_entry(
+        "meta/llama-3.3-70b-instruct",
+        CoreModelId.llama3_3_70b_instruct.value,
+    ),
     # NeMo Retriever Text Embedding models -
     #
     # https://docs.nvidia.com/nim/nemo-retriever/text-embedding/latest/support-matrix.html
diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py
index 7cc44d93d..4a62ad6cb 100644
--- a/llama_stack/providers/remote/inference/nvidia/nvidia.py
+++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py
@@ -129,6 +129,14 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
             base_url = special_model_urls[provider_model_id]
         return _get_client_for_base_url(base_url)
 
+    async def _get_provider_model_id(self, model_id: str) -> str:
+        if not self.model_store:
+            raise RuntimeError("Model store is not set")
+        model = await self.model_store.get_model(model_id)
+        if model is None:
+            raise ValueError(f"Model {model_id} is unknown")
+        return model.provider_model_id
+
     async def completion(
         self,
         model_id: str,
@@ -147,7 +155,7 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
         # removing this health check as NeMo customizer endpoint health check is returning 404
         # await check_health(self._config)  # this raises errors
 
-        provider_model_id = self.get_provider_model_id(model_id)
+        provider_model_id = await self._get_provider_model_id(model_id)
         request = convert_completion_request(
             request=CompletionRequest(
                 model=provider_model_id,
@@ -191,7 +199,7 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
         #
         flat_contents = [content.text if isinstance(content, TextContentItem) else content for content in contents]
         input = [content.text if isinstance(content, TextContentItem) else content for content in flat_contents]
-        model = self.get_provider_model_id(model_id)
+        provider_model_id = await self._get_provider_model_id(model_id)
 
         extra_body = {}
 
@@ -214,8 +222,8 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
             extra_body["input_type"] = task_type_options[task_type]
 
         try:
-            response = await self._get_client(model).embeddings.create(
-                model=model,
+            response = await self._get_client(provider_model_id).embeddings.create(
+                model=provider_model_id,
                 input=input,
                 extra_body=extra_body,
             )
@@ -249,11 +257,10 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
 
         # await check_health(self._config)  # this raises errors
 
-        provider_model_id = self.get_provider_model_id(model_id)
-        print(f"provider_model_id: {provider_model_id}")
+        provider_model_id = await self._get_provider_model_id(model_id)
         request = await convert_chat_completion_request(
             request=ChatCompletionRequest(
-                model=self.get_provider_model_id(model_id),
+                model=provider_model_id,
                 messages=messages,
                 sampling_params=sampling_params,
                 response_format=response_format,
@@ -298,7 +305,7 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
         guided_choice: Optional[List[str]] = None,
         prompt_logprobs: Optional[int] = None,
     ) -> OpenAICompletion:
-        provider_model_id = self.get_provider_model_id(model)
+        provider_model_id = await self._get_provider_model_id(model)
 
         params = await prepare_openai_completion_params(
             model=provider_model_id,
@@ -351,7 +358,7 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
         top_p: Optional[float] = None,
         user: Optional[str] = None,
     ) -> Union[OpenAIChatCompletion, AsyncIterator[OpenAIChatCompletionChunk]]:
-        provider_model_id = self.get_provider_model_id(model)
+        provider_model_id = await self._get_provider_model_id(model)
 
         params = await prepare_openai_completion_params(
             model=provider_model_id,
diff --git a/llama_stack/providers/remote/inference/together/together.py b/llama_stack/providers/remote/inference/together/together.py
index 001e6aac4..48e41f5b0 100644
--- a/llama_stack/providers/remote/inference/together/together.py
+++ b/llama_stack/providers/remote/inference/together/together.py
@@ -76,8 +76,11 @@ class TogetherInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProvi
 
     async def shutdown(self) -> None:
         if self._client:
-            await self._client.close()
+            # Together client has no close method, so just set to None
             self._client = None
+        if self._openai_client:
+            await self._openai_client.close()
+            self._openai_client = None
 
     async def completion(
         self,
@@ -359,7 +362,7 @@ class TogetherInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProvi
             top_p=top_p,
             user=user,
         )
-        if params.get("stream", True):
+        if params.get("stream", False):
             return self._stream_openai_chat_completion(params)
         return await self._get_openai_client().chat.completions.create(**params)  # type: ignore
 
diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py
index 2b9eae1e9..8cfef2ee0 100644
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@@ -231,12 +231,7 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
         self.client = None
 
     async def initialize(self) -> None:
-        log.info(f"Initializing VLLM client with base_url={self.config.url}")
-        self.client = AsyncOpenAI(
-            base_url=self.config.url,
-            api_key=self.config.api_token,
-            http_client=None if self.config.tls_verify else httpx.AsyncClient(verify=False),
-        )
+        pass
 
     async def shutdown(self) -> None:
         pass
@@ -249,6 +244,20 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
             raise ValueError("Model store not set")
         return await self.model_store.get_model(model_id)
 
+    def _lazy_initialize_client(self):
+        if self.client is not None:
+            return
+
+        log.info(f"Initializing vLLM client with base_url={self.config.url}")
+        self.client = self._create_client()
+
+    def _create_client(self):
+        return AsyncOpenAI(
+            base_url=self.config.url,
+            api_key=self.config.api_token,
+            http_client=None if self.config.tls_verify else httpx.AsyncClient(verify=False),
+        )
+
     async def completion(
         self,
         model_id: str,
@@ -258,6 +267,7 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
         stream: Optional[bool] = False,
         logprobs: Optional[LogProbConfig] = None,
     ) -> CompletionResponse | AsyncGenerator[CompletionResponseStreamChunk, None]:
+        self._lazy_initialize_client()
         if sampling_params is None:
             sampling_params = SamplingParams()
         model = await self._get_model(model_id)
@@ -287,6 +297,7 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
         logprobs: Optional[LogProbConfig] = None,
         tool_config: Optional[ToolConfig] = None,
     ) -> ChatCompletionResponse | AsyncGenerator[ChatCompletionResponseStreamChunk, None]:
+        self._lazy_initialize_client()
         if sampling_params is None:
             sampling_params = SamplingParams()
         model = await self._get_model(model_id)
@@ -357,9 +368,12 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
             yield chunk
 
     async def register_model(self, model: Model) -> Model:
-        assert self.client is not None
+        # register_model is called during Llama Stack initialization, hence we cannot init self.client if not initialized yet.
+        # self.client should only be created after the initialization is complete to avoid asyncio cross-context errors.
+        # Changing this may lead to unpredictable behavior.
+        client = self._create_client() if self.client is None else self.client
         model = await self.register_helper.register_model(model)
-        res = await self.client.models.list()
+        res = await client.models.list()
         available_models = [m.id async for m in res]
         if model.provider_resource_id not in available_models:
             raise ValueError(
@@ -374,7 +388,8 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
             options["max_tokens"] = self.config.max_tokens
 
         input_dict: dict[str, Any] = {}
-        if isinstance(request, ChatCompletionRequest) and request.tools is not None:
+        # Only include the 'tools' param if there is any. It can break things if an empty list is sent to the vLLM.
+        if isinstance(request, ChatCompletionRequest) and request.tools:
             input_dict = {"tools": _convert_to_vllm_tools_in_request(request.tools)}
 
         if isinstance(request, ChatCompletionRequest):
@@ -409,6 +424,7 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
         output_dimension: Optional[int] = None,
         task_type: Optional[EmbeddingTaskType] = None,
     ) -> EmbeddingsResponse:
+        self._lazy_initialize_client()
         assert self.client is not None
         model = await self._get_model(model_id)
 
@@ -448,6 +464,7 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
         guided_choice: Optional[List[str]] = None,
         prompt_logprobs: Optional[int] = None,
     ) -> OpenAICompletion:
+        self._lazy_initialize_client()
         model_obj = await self._get_model(model)
 
         extra_body: Dict[str, Any] = {}
@@ -504,6 +521,7 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
         top_p: Optional[float] = None,
         user: Optional[str] = None,
     ) -> Union[OpenAIChatCompletion, AsyncIterator[OpenAIChatCompletionChunk]]:
+        self._lazy_initialize_client()
         model_obj = await self._get_model(model)
         params = await prepare_openai_completion_params(
             model=model_obj.provider_resource_id,
diff --git a/llama_stack/providers/remote/inference/watsonx/__init__.py b/llama_stack/providers/remote/inference/watsonx/__init__.py
new file mode 100644
index 000000000..e59e873b6
--- /dev/null
+++ b/llama_stack/providers/remote/inference/watsonx/__init__.py
@@ -0,0 +1,22 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.apis.inference import Inference
+
+from .config import WatsonXConfig
+
+
+async def get_adapter_impl(config: WatsonXConfig, _deps) -> Inference:
+    # import dynamically so `llama stack build` does not fail due to missing dependencies
+    from .watsonx import WatsonXInferenceAdapter
+
+    if not isinstance(config, WatsonXConfig):
+        raise RuntimeError(f"Unexpected config type: {type(config)}")
+    adapter = WatsonXInferenceAdapter(config)
+    return adapter
+
+
+__all__ = ["get_adapter_impl", "WatsonXConfig"]
diff --git a/llama_stack/providers/remote/inference/watsonx/config.py b/llama_stack/providers/remote/inference/watsonx/config.py
new file mode 100644
index 000000000..7ee99b7e0
--- /dev/null
+++ b/llama_stack/providers/remote/inference/watsonx/config.py
@@ -0,0 +1,46 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import os
+from typing import Any, Dict, Optional
+
+from pydantic import BaseModel, Field, SecretStr
+
+from llama_stack.schema_utils import json_schema_type
+
+
+class WatsonXProviderDataValidator(BaseModel):
+    url: str
+    api_key: str
+    project_id: str
+
+
+@json_schema_type
+class WatsonXConfig(BaseModel):
+    url: str = Field(
+        default_factory=lambda: os.getenv("WATSONX_BASE_URL", "https://us-south.ml.cloud.ibm.com"),
+        description="A base url for accessing the watsonx.ai",
+    )
+    api_key: Optional[SecretStr] = Field(
+        default_factory=lambda: os.getenv("WATSONX_API_KEY"),
+        description="The watsonx API key, only needed of using the hosted service",
+    )
+    project_id: Optional[str] = Field(
+        default_factory=lambda: os.getenv("WATSONX_PROJECT_ID"),
+        description="The Project ID key, only needed of using the hosted service",
+    )
+    timeout: int = Field(
+        default=60,
+        description="Timeout for the HTTP requests",
+    )
+
+    @classmethod
+    def sample_run_config(cls, **kwargs) -> Dict[str, Any]:
+        return {
+            "url": "${env.WATSONX_BASE_URL:https://us-south.ml.cloud.ibm.com}",
+            "api_key": "${env.WATSONX_API_KEY:}",
+            "project_id": "${env.WATSONX_PROJECT_ID:}",
+        }
diff --git a/llama_stack/providers/remote/inference/watsonx/models.py b/llama_stack/providers/remote/inference/watsonx/models.py
new file mode 100644
index 000000000..d98f0510a
--- /dev/null
+++ b/llama_stack/providers/remote/inference/watsonx/models.py
@@ -0,0 +1,47 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.models.llama.sku_types import CoreModelId
+from llama_stack.providers.utils.inference.model_registry import build_hf_repo_model_entry
+
+MODEL_ENTRIES = [
+    build_hf_repo_model_entry(
+        "meta-llama/llama-3-3-70b-instruct",
+        CoreModelId.llama3_3_70b_instruct.value,
+    ),
+    build_hf_repo_model_entry(
+        "meta-llama/llama-2-13b-chat",
+        CoreModelId.llama2_13b.value,
+    ),
+    build_hf_repo_model_entry(
+        "meta-llama/llama-3-1-70b-instruct",
+        CoreModelId.llama3_1_70b_instruct.value,
+    ),
+    build_hf_repo_model_entry(
+        "meta-llama/llama-3-1-8b-instruct",
+        CoreModelId.llama3_1_8b_instruct.value,
+    ),
+    build_hf_repo_model_entry(
+        "meta-llama/llama-3-2-11b-vision-instruct",
+        CoreModelId.llama3_2_11b_vision_instruct.value,
+    ),
+    build_hf_repo_model_entry(
+        "meta-llama/llama-3-2-1b-instruct",
+        CoreModelId.llama3_2_1b_instruct.value,
+    ),
+    build_hf_repo_model_entry(
+        "meta-llama/llama-3-2-3b-instruct",
+        CoreModelId.llama3_2_3b_instruct.value,
+    ),
+    build_hf_repo_model_entry(
+        "meta-llama/llama-3-2-90b-vision-instruct",
+        CoreModelId.llama3_2_90b_vision_instruct.value,
+    ),
+    build_hf_repo_model_entry(
+        "meta-llama/llama-guard-3-11b-vision",
+        CoreModelId.llama_guard_3_11b_vision.value,
+    ),
+]
diff --git a/llama_stack/providers/remote/inference/watsonx/watsonx.py b/llama_stack/providers/remote/inference/watsonx/watsonx.py
new file mode 100644
index 000000000..fa9cc4391
--- /dev/null
+++ b/llama_stack/providers/remote/inference/watsonx/watsonx.py
@@ -0,0 +1,378 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any, AsyncGenerator, AsyncIterator, Dict, List, Optional, Union
+
+from ibm_watson_machine_learning.foundation_models import Model
+from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams
+from openai import AsyncOpenAI
+
+from llama_stack.apis.common.content_types import InterleavedContent, InterleavedContentItem
+from llama_stack.apis.inference import (
+    ChatCompletionRequest,
+    ChatCompletionResponse,
+    CompletionRequest,
+    EmbeddingsResponse,
+    EmbeddingTaskType,
+    Inference,
+    LogProbConfig,
+    Message,
+    ResponseFormat,
+    SamplingParams,
+    TextTruncation,
+    ToolChoice,
+    ToolConfig,
+    ToolDefinition,
+    ToolPromptFormat,
+)
+from llama_stack.apis.inference.inference import (
+    GreedySamplingStrategy,
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAICompletion,
+    OpenAIMessageParam,
+    OpenAIResponseFormatParam,
+    TopKSamplingStrategy,
+    TopPSamplingStrategy,
+)
+from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
+from llama_stack.providers.utils.inference.openai_compat import (
+    OpenAICompatCompletionChoice,
+    OpenAICompatCompletionResponse,
+    prepare_openai_completion_params,
+    process_chat_completion_response,
+    process_chat_completion_stream_response,
+    process_completion_response,
+    process_completion_stream_response,
+)
+from llama_stack.providers.utils.inference.prompt_adapter import (
+    chat_completion_request_to_prompt,
+    completion_request_to_prompt,
+    request_has_media,
+)
+
+from . import WatsonXConfig
+from .models import MODEL_ENTRIES
+
+
+class WatsonXInferenceAdapter(Inference, ModelRegistryHelper):
+    def __init__(self, config: WatsonXConfig) -> None:
+        ModelRegistryHelper.__init__(self, MODEL_ENTRIES)
+
+        print(f"Initializing watsonx InferenceAdapter({config.url})...")
+
+        self._config = config
+
+        self._project_id = self._config.project_id
+
+    async def initialize(self) -> None:
+        pass
+
+    async def shutdown(self) -> None:
+        pass
+
+    async def completion(
+        self,
+        model_id: str,
+        content: InterleavedContent,
+        sampling_params: Optional[SamplingParams] = None,
+        response_format: Optional[ResponseFormat] = None,
+        stream: Optional[bool] = False,
+        logprobs: Optional[LogProbConfig] = None,
+    ) -> AsyncGenerator:
+        if sampling_params is None:
+            sampling_params = SamplingParams()
+        model = await self.model_store.get_model(model_id)
+        request = CompletionRequest(
+            model=model.provider_resource_id,
+            content=content,
+            sampling_params=sampling_params,
+            response_format=response_format,
+            stream=stream,
+            logprobs=logprobs,
+        )
+        if stream:
+            return self._stream_completion(request)
+        else:
+            return await self._nonstream_completion(request)
+
+    def _get_client(self, model_id) -> Model:
+        config_api_key = self._config.api_key.get_secret_value() if self._config.api_key else None
+        config_url = self._config.url
+        project_id = self._config.project_id
+        credentials = {"url": config_url, "apikey": config_api_key}
+
+        return Model(model_id=model_id, credentials=credentials, project_id=project_id)
+
+    def _get_openai_client(self) -> AsyncOpenAI:
+        if not self._openai_client:
+            self._openai_client = AsyncOpenAI(
+                base_url=f"{self._config.url}/openai/v1",
+                api_key=self._config.api_key,
+            )
+        return self._openai_client
+
+    async def _nonstream_completion(self, request: CompletionRequest) -> ChatCompletionResponse:
+        params = await self._get_params(request)
+        r = self._get_client(request.model).generate(**params)
+        choices = []
+        if "results" in r:
+            for result in r["results"]:
+                choice = OpenAICompatCompletionChoice(
+                    finish_reason=result["stop_reason"] if result["stop_reason"] else None,
+                    text=result["generated_text"],
+                )
+                choices.append(choice)
+        response = OpenAICompatCompletionResponse(
+            choices=choices,
+        )
+        return process_completion_response(response)
+
+    async def _stream_completion(self, request: CompletionRequest) -> AsyncGenerator:
+        params = await self._get_params(request)
+
+        async def _generate_and_convert_to_openai_compat():
+            s = self._get_client(request.model).generate_text_stream(**params)
+            for chunk in s:
+                choice = OpenAICompatCompletionChoice(
+                    finish_reason=None,
+                    text=chunk,
+                )
+                yield OpenAICompatCompletionResponse(
+                    choices=[choice],
+                )
+
+        stream = _generate_and_convert_to_openai_compat()
+        async for chunk in process_completion_stream_response(stream):
+            yield chunk
+
+    async def chat_completion(
+        self,
+        model_id: str,
+        messages: List[Message],
+        sampling_params: Optional[SamplingParams] = None,
+        tools: Optional[List[ToolDefinition]] = None,
+        tool_choice: Optional[ToolChoice] = ToolChoice.auto,
+        tool_prompt_format: Optional[ToolPromptFormat] = None,
+        response_format: Optional[ResponseFormat] = None,
+        stream: Optional[bool] = False,
+        logprobs: Optional[LogProbConfig] = None,
+        tool_config: Optional[ToolConfig] = None,
+    ) -> AsyncGenerator:
+        if sampling_params is None:
+            sampling_params = SamplingParams()
+        model = await self.model_store.get_model(model_id)
+        request = ChatCompletionRequest(
+            model=model.provider_resource_id,
+            messages=messages,
+            sampling_params=sampling_params,
+            tools=tools or [],
+            response_format=response_format,
+            stream=stream,
+            logprobs=logprobs,
+            tool_config=tool_config,
+        )
+
+        if stream:
+            return self._stream_chat_completion(request)
+        else:
+            return await self._nonstream_chat_completion(request)
+
+    async def _nonstream_chat_completion(self, request: ChatCompletionRequest) -> ChatCompletionResponse:
+        params = await self._get_params(request)
+        r = self._get_client(request.model).generate(**params)
+        choices = []
+        if "results" in r:
+            for result in r["results"]:
+                choice = OpenAICompatCompletionChoice(
+                    finish_reason=result["stop_reason"] if result["stop_reason"] else None,
+                    text=result["generated_text"],
+                )
+                choices.append(choice)
+        response = OpenAICompatCompletionResponse(
+            choices=choices,
+        )
+        return process_chat_completion_response(response, request)
+
+    async def _stream_chat_completion(self, request: ChatCompletionRequest) -> AsyncGenerator:
+        params = await self._get_params(request)
+        model_id = request.model
+
+        # if we shift to TogetherAsyncClient, we won't need this wrapper
+        async def _to_async_generator():
+            s = self._get_client(model_id).generate_text_stream(**params)
+            for chunk in s:
+                choice = OpenAICompatCompletionChoice(
+                    finish_reason=None,
+                    text=chunk,
+                )
+                yield OpenAICompatCompletionResponse(
+                    choices=[choice],
+                )
+
+        stream = _to_async_generator()
+        async for chunk in process_chat_completion_stream_response(stream, request):
+            yield chunk
+
+    async def _get_params(self, request: Union[ChatCompletionRequest, CompletionRequest]) -> dict:
+        input_dict = {"params": {}}
+        media_present = request_has_media(request)
+        llama_model = self.get_llama_model(request.model)
+        if isinstance(request, ChatCompletionRequest):
+            input_dict["prompt"] = await chat_completion_request_to_prompt(request, llama_model)
+        else:
+            assert not media_present, "Together does not support media for Completion requests"
+            input_dict["prompt"] = await completion_request_to_prompt(request)
+        if request.sampling_params:
+            if request.sampling_params.strategy:
+                input_dict["params"][GenParams.DECODING_METHOD] = request.sampling_params.strategy.type
+            if request.sampling_params.max_tokens:
+                input_dict["params"][GenParams.MAX_NEW_TOKENS] = request.sampling_params.max_tokens
+            if request.sampling_params.repetition_penalty:
+                input_dict["params"][GenParams.REPETITION_PENALTY] = request.sampling_params.repetition_penalty
+
+            if isinstance(request.sampling_params.strategy, TopPSamplingStrategy):
+                input_dict["params"][GenParams.TOP_P] = request.sampling_params.strategy.top_p
+                input_dict["params"][GenParams.TEMPERATURE] = request.sampling_params.strategy.temperature
+            if isinstance(request.sampling_params.strategy, TopKSamplingStrategy):
+                input_dict["params"][GenParams.TOP_K] = request.sampling_params.strategy.top_k
+            if isinstance(request.sampling_params.strategy, GreedySamplingStrategy):
+                input_dict["params"][GenParams.TEMPERATURE] = 0.0
+
+        input_dict["params"][GenParams.STOP_SEQUENCES] = ["<|endoftext|>"]
+
+        params = {
+            **input_dict,
+        }
+        return params
+
+    async def embeddings(
+        self,
+        model_id: str,
+        contents: List[str] | List[InterleavedContentItem],
+        text_truncation: Optional[TextTruncation] = TextTruncation.none,
+        output_dimension: Optional[int] = None,
+        task_type: Optional[EmbeddingTaskType] = None,
+    ) -> EmbeddingsResponse:
+        raise NotImplementedError("embedding is not supported for watsonx")
+
+    async def openai_completion(
+        self,
+        model: str,
+        prompt: Union[str, List[str], List[int], List[List[int]]],
+        best_of: Optional[int] = None,
+        echo: Optional[bool] = None,
+        frequency_penalty: Optional[float] = None,
+        logit_bias: Optional[Dict[str, float]] = None,
+        logprobs: Optional[bool] = None,
+        max_tokens: Optional[int] = None,
+        n: Optional[int] = None,
+        presence_penalty: Optional[float] = None,
+        seed: Optional[int] = None,
+        stop: Optional[Union[str, List[str]]] = None,
+        stream: Optional[bool] = None,
+        stream_options: Optional[Dict[str, Any]] = None,
+        temperature: Optional[float] = None,
+        top_p: Optional[float] = None,
+        user: Optional[str] = None,
+        guided_choice: Optional[List[str]] = None,
+        prompt_logprobs: Optional[int] = None,
+    ) -> OpenAICompletion:
+        model_obj = await self.model_store.get_model(model)
+        params = await prepare_openai_completion_params(
+            model=model_obj.provider_resource_id,
+            prompt=prompt,
+            best_of=best_of,
+            echo=echo,
+            frequency_penalty=frequency_penalty,
+            logit_bias=logit_bias,
+            logprobs=logprobs,
+            max_tokens=max_tokens,
+            n=n,
+            presence_penalty=presence_penalty,
+            seed=seed,
+            stop=stop,
+            stream=stream,
+            stream_options=stream_options,
+            temperature=temperature,
+            top_p=top_p,
+            user=user,
+        )
+        return await self._get_openai_client().completions.create(**params)  # type: ignore
+
+    async def openai_chat_completion(
+        self,
+        model: str,
+        messages: List[OpenAIMessageParam],
+        frequency_penalty: Optional[float] = None,
+        function_call: Optional[Union[str, Dict[str, Any]]] = None,
+        functions: Optional[List[Dict[str, Any]]] = None,
+        logit_bias: Optional[Dict[str, float]] = None,
+        logprobs: Optional[bool] = None,
+        max_completion_tokens: Optional[int] = None,
+        max_tokens: Optional[int] = None,
+        n: Optional[int] = None,
+        parallel_tool_calls: Optional[bool] = None,
+        presence_penalty: Optional[float] = None,
+        response_format: Optional[OpenAIResponseFormatParam] = None,
+        seed: Optional[int] = None,
+        stop: Optional[Union[str, List[str]]] = None,
+        stream: Optional[bool] = None,
+        stream_options: Optional[Dict[str, Any]] = None,
+        temperature: Optional[float] = None,
+        tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
+        tools: Optional[List[Dict[str, Any]]] = None,
+        top_logprobs: Optional[int] = None,
+        top_p: Optional[float] = None,
+        user: Optional[str] = None,
+    ) -> Union[OpenAIChatCompletion, AsyncIterator[OpenAIChatCompletionChunk]]:
+        model_obj = await self.model_store.get_model(model)
+        params = await prepare_openai_completion_params(
+            model=model_obj.provider_resource_id,
+            messages=messages,
+            frequency_penalty=frequency_penalty,
+            function_call=function_call,
+            functions=functions,
+            logit_bias=logit_bias,
+            logprobs=logprobs,
+            max_completion_tokens=max_completion_tokens,
+            max_tokens=max_tokens,
+            n=n,
+            parallel_tool_calls=parallel_tool_calls,
+            presence_penalty=presence_penalty,
+            response_format=response_format,
+            seed=seed,
+            stop=stop,
+            stream=stream,
+            stream_options=stream_options,
+            temperature=temperature,
+            tool_choice=tool_choice,
+            tools=tools,
+            top_logprobs=top_logprobs,
+            top_p=top_p,
+            user=user,
+        )
+        if params.get("stream", False):
+            return self._stream_openai_chat_completion(params)
+        return await self._get_openai_client().chat.completions.create(**params)  # type: ignore
+
+    async def _stream_openai_chat_completion(self, params: dict) -> AsyncGenerator:
+        # watsonx.ai sometimes adds usage data to the stream
+        include_usage = False
+        if params.get("stream_options", None):
+            include_usage = params["stream_options"].get("include_usage", False)
+        stream = await self._get_openai_client().chat.completions.create(**params)
+
+        seen_finish_reason = False
+        async for chunk in stream:
+            # Final usage chunk with no choices that the user didn't request, so discard
+            if not include_usage and seen_finish_reason and len(chunk.choices) == 0:
+                break
+            yield chunk
+            for choice in chunk.choices:
+                if choice.finish_reason:
+                    seen_finish_reason = True
+                    break
diff --git a/llama_stack/providers/remote/post_training/nvidia/README.md b/llama_stack/providers/remote/post_training/nvidia/README.md
index 015d6a5f9..3ef538d29 100644
--- a/llama_stack/providers/remote/post_training/nvidia/README.md
+++ b/llama_stack/providers/remote/post_training/nvidia/README.md
@@ -36,7 +36,6 @@ import os
 
 os.environ["NVIDIA_API_KEY"] = "your-api-key"
 os.environ["NVIDIA_CUSTOMIZER_URL"] = "http://nemo.test"
-os.environ["NVIDIA_USER_ID"] = "llama-stack-user"
 os.environ["NVIDIA_DATASET_NAMESPACE"] = "default"
 os.environ["NVIDIA_PROJECT_ID"] = "test-project"
 os.environ["NVIDIA_OUTPUT_MODEL_DIR"] = "test-example-model@v1"
@@ -128,13 +127,14 @@ client.post_training.job.cancel(job_uuid="your-job-id")
 #### 1. Register the model
 
 ```python
-model = Model(
-    identifier="test-example-model@v1",
+from llama_stack.apis.models import Model, ModelType
+
+client.models.register(
+    model_id="test-example-model@v1",
     provider_id="nvidia",
     provider_model_id="test-example-model@v1",
     model_type=ModelType.llm,
 )
-client.register_model(model)
 ```
 
 #### 2. Inference with the fine-tuned model
diff --git a/llama_stack/providers/remote/post_training/nvidia/models.py b/llama_stack/providers/remote/post_training/nvidia/models.py
index 7c696ac20..1b31b4dbe 100644
--- a/llama_stack/providers/remote/post_training/nvidia/models.py
+++ b/llama_stack/providers/remote/post_training/nvidia/models.py
@@ -16,7 +16,11 @@ _MODEL_ENTRIES = [
     build_hf_repo_model_entry(
         "meta/llama-3.1-8b-instruct",
         CoreModelId.llama3_1_8b_instruct.value,
-    )
+    ),
+    build_hf_repo_model_entry(
+        "meta/llama-3.2-1b-instruct",
+        CoreModelId.llama3_2_1b_instruct.value,
+    ),
 ]
 
 
diff --git a/llama_stack/providers/remote/post_training/nvidia/post_training.py b/llama_stack/providers/remote/post_training/nvidia/post_training.py
index e454113f0..c74fb2a24 100644
--- a/llama_stack/providers/remote/post_training/nvidia/post_training.py
+++ b/llama_stack/providers/remote/post_training/nvidia/post_training.py
@@ -67,13 +67,18 @@ class NvidiaPostTrainingAdapter(ModelRegistryHelper):
         self.timeout = aiohttp.ClientTimeout(total=config.timeout)
         # TODO: filter by available models based on /config endpoint
         ModelRegistryHelper.__init__(self, model_entries=_MODEL_ENTRIES)
-        self.session = aiohttp.ClientSession(headers=self.headers, timeout=self.timeout)
-        self.customizer_url = config.customizer_url
+        self.session = None
 
+        self.customizer_url = config.customizer_url
         if not self.customizer_url:
             warnings.warn("Customizer URL is not set, using default value: http://nemo.test", stacklevel=2)
             self.customizer_url = "http://nemo.test"
 
+    async def _get_session(self) -> aiohttp.ClientSession:
+        if self.session is None or self.session.closed:
+            self.session = aiohttp.ClientSession(headers=self.headers, timeout=self.timeout)
+        return self.session
+
     async def _make_request(
         self,
         method: str,
@@ -94,11 +99,9 @@ class NvidiaPostTrainingAdapter(ModelRegistryHelper):
         if json and "Content-Type" not in request_headers:
             request_headers["Content-Type"] = "application/json"
 
+        session = await self._get_session()
         for _ in range(self.config.max_retries):
-            # TODO: Remove `verify_ssl=False`. Added for testing purposes to call NMP int environment from `docs/notebooks/nvidia/`
-            async with self.session.request(
-                method, url, params=params, json=json, verify_ssl=False, **kwargs
-            ) as response:
+            async with session.request(method, url, params=params, json=json, **kwargs) as response:
                 if response.status >= 400:
                     error_data = await response.json()
                     raise Exception(f"API request failed: {error_data}")
@@ -125,8 +128,8 @@ class NvidiaPostTrainingAdapter(ModelRegistryHelper):
         jobs = []
         for job in response.get("data", []):
             job_id = job.pop("id")
-            job_status = job.pop("status", "unknown").lower()
-            mapped_status = STATUS_MAPPING.get(job_status, "unknown")
+            job_status = job.pop("status", "scheduled").lower()
+            mapped_status = STATUS_MAPPING.get(job_status, "scheduled")
 
             # Convert string timestamps to datetime objects
             created_at = (
@@ -180,7 +183,7 @@ class NvidiaPostTrainingAdapter(ModelRegistryHelper):
         )
 
         api_status = response.pop("status").lower()
-        mapped_status = STATUS_MAPPING.get(api_status, "unknown")
+        mapped_status = STATUS_MAPPING.get(api_status, "scheduled")
 
         return NvidiaPostTrainingJobStatusResponse(
             status=JobStatus(mapped_status),
@@ -242,6 +245,7 @@ class NvidiaPostTrainingAdapter(ModelRegistryHelper):
 
         Supported models:
             - meta/llama-3.1-8b-instruct
+            - meta/llama-3.2-1b-instruct
 
         Supported algorithm configs:
             - LoRA, SFT
@@ -287,10 +291,6 @@ class NvidiaPostTrainingAdapter(ModelRegistryHelper):
 
             - LoRA config:
                 ## NeMo customizer specific LoRA parameters
-                - adapter_dim: int - Adapter dimension
-                    Default: 8 (supports powers of 2)
-                - adapter_dropout: float - Adapter dropout
-                    Default: None (0.0-1.0)
                 - alpha: int - Scaling factor for the LoRA update
                     Default: 16
             Note:
@@ -300,7 +300,7 @@ class NvidiaPostTrainingAdapter(ModelRegistryHelper):
             User is informed about unsupported parameters via warnings.
         """
         # Map model to nvidia model name
-        # ToDo: only supports llama-3.1-8b-instruct now, need to update this to support other models
+        # See `_MODEL_ENTRIES` for supported models
         nvidia_model = self.get_provider_model_id(model)
 
         # Check for unsupported method parameters
@@ -333,7 +333,7 @@ class NvidiaPostTrainingAdapter(ModelRegistryHelper):
             },
             "data_config": {"dataset_id", "batch_size"},
             "optimizer_config": {"lr", "weight_decay"},
-            "lora_config": {"type", "adapter_dim", "adapter_dropout", "alpha"},
+            "lora_config": {"type", "alpha"},
         }
 
         # Validate all parameters at once
@@ -392,17 +392,10 @@ class NvidiaPostTrainingAdapter(ModelRegistryHelper):
 
         # Handle LoRA-specific configuration
         if algorithm_config:
-            algorithm_config_dict = algorithm_config.model_dump()
-            if algorithm_config_dict.get("type") == "LoRA":
-                warn_unsupported_params(algorithm_config_dict, supported_params["lora_config"], "LoRA config")
+            if algorithm_config.type == "LoRA":
+                warn_unsupported_params(algorithm_config, supported_params["lora_config"], "LoRA config")
                 job_config["hyperparameters"]["lora"] = {
-                    k: v
-                    for k, v in {
-                        "adapter_dim": algorithm_config_dict.get("adapter_dim"),
-                        "alpha": algorithm_config_dict.get("alpha"),
-                        "adapter_dropout": algorithm_config_dict.get("adapter_dropout"),
-                    }.items()
-                    if v is not None
+                    k: v for k, v in {"alpha": algorithm_config.alpha}.items() if v is not None
                 }
             else:
                 raise NotImplementedError(f"Unsupported algorithm config: {algorithm_config}")
diff --git a/llama_stack/providers/remote/safety/nvidia/README.md b/llama_stack/providers/remote/safety/nvidia/README.md
new file mode 100644
index 000000000..434db32fb
--- /dev/null
+++ b/llama_stack/providers/remote/safety/nvidia/README.md
@@ -0,0 +1,77 @@
+# NVIDIA Safety Provider for LlamaStack
+
+This provider enables safety checks and guardrails for LLM interactions using NVIDIA's NeMo Guardrails service.
+
+## Features
+
+- Run safety checks for messages
+
+## Getting Started
+
+### Prerequisites
+
+- LlamaStack with NVIDIA configuration
+- Access to NVIDIA NeMo Guardrails service
+- NIM for model to use for safety check is deployed
+
+### Setup
+
+Build the NVIDIA environment:
+
+```bash
+llama stack build --template nvidia --image-type conda
+```
+
+### Basic Usage using the LlamaStack Python Client
+
+#### Initialize the client
+
+```python
+import os
+
+os.environ["NVIDIA_API_KEY"] = "your-api-key"
+os.environ["NVIDIA_GUARDRAILS_URL"] = "http://guardrails.test"
+
+from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
+
+client = LlamaStackAsLibraryClient("nvidia")
+client.initialize()
+```
+
+#### Create a safety shield
+
+```python
+from llama_stack.apis.safety import Shield
+from llama_stack.apis.inference import Message
+
+# Create a safety shield
+shield = Shield(
+    shield_id="your-shield-id",
+    provider_resource_id="safety-model-id",  # The model to use for safety checks
+    description="Safety checks for content moderation",
+)
+
+# Register the shield
+await client.safety.register_shield(shield)
+```
+
+#### Run safety checks
+
+```python
+# Messages to check
+messages = [Message(role="user", content="Your message to check")]
+
+# Run safety check
+response = await client.safety.run_shield(
+    shield_id="your-shield-id",
+    messages=messages,
+)
+
+# Check for violations
+if response.violation:
+    print(f"Safety violation detected: {response.violation.user_message}")
+    print(f"Violation level: {response.violation.violation_level}")
+    print(f"Metadata: {response.violation.metadata}")
+else:
+    print("No safety violations detected")
+```
diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py
index d98261abb..4d690287b 100644
--- a/llama_stack/providers/utils/inference/openai_compat.py
+++ b/llama_stack/providers/utils/inference/openai_compat.py
@@ -8,7 +8,17 @@ import logging
 import time
 import uuid
 import warnings
-from typing import Any, AsyncGenerator, AsyncIterator, Awaitable, Dict, Iterable, List, Optional, Union
+from typing import (
+    Any,
+    AsyncGenerator,
+    AsyncIterator,
+    Awaitable,
+    Dict,
+    Iterable,
+    List,
+    Optional,
+    Union,
+)
 
 from openai import AsyncStream
 from openai.types.chat import (
@@ -78,6 +88,7 @@ from llama_stack.apis.common.content_types import (
     TextDelta,
     ToolCallDelta,
     ToolCallParseStatus,
+    _URLOrData,
 )
 from llama_stack.apis.inference import (
     ChatCompletionRequest,
@@ -93,6 +104,7 @@ from llama_stack.apis.inference import (
     SamplingParams,
     SystemMessage,
     TokenLogProbs,
+    ToolChoice,
     ToolResponseMessage,
     TopKSamplingStrategy,
     TopPSamplingStrategy,
@@ -103,7 +115,6 @@ from llama_stack.apis.inference.inference import (
     OpenAIChatCompletion,
     OpenAICompletion,
     OpenAICompletionChoice,
-    OpenAIMessageParam,
     OpenAIResponseFormatParam,
     ToolConfig,
 )
@@ -513,11 +524,26 @@ async def convert_message_to_openai_dict(message: Message, download: bool = Fals
     else:
         content = [await _convert_content(message.content)]
 
-    return {
+    result = {
         "role": message.role,
         "content": content,
     }
 
+    if hasattr(message, "tool_calls") and message.tool_calls:
+        result["tool_calls"] = []
+        for tc in message.tool_calls:
+            result["tool_calls"].append(
+                {
+                    "id": tc.call_id,
+                    "type": "function",
+                    "function": {
+                        "name": tc.tool_name,
+                        "arguments": tc.arguments_json if hasattr(tc, "arguments_json") else json.dumps(tc.arguments),
+                    },
+                }
+            )
+    return result
+
 
 class UnparseableToolCall(BaseModel):
     """
@@ -612,13 +638,10 @@ async def convert_message_to_openai_dict_new(
             )
             for tool in message.tool_calls
         ]
-        params = {}
-        if tool_calls:
-            params = {"tool_calls": tool_calls}
         out = OpenAIChatCompletionAssistantMessage(
             role="assistant",
             content=await _convert_message_content(message.content),
-            **params,
+            tool_calls=tool_calls or None,
         )
     elif isinstance(message, ToolResponseMessage):
         out = OpenAIChatCompletionToolMessage(
@@ -695,7 +718,10 @@ def to_openai_param_type(param_type: str) -> dict:
     if param_type.startswith("list[") and param_type.endswith("]"):
         inner_type = param_type[5:-1]
         if inner_type in basic_types:
-            return {"type": "array", "items": {"type": basic_types.get(inner_type, inner_type)}}
+            return {
+                "type": "array",
+                "items": {"type": basic_types.get(inner_type, inner_type)},
+            }
 
     return {"type": param_type}
 
@@ -815,6 +841,10 @@ def _convert_openai_finish_reason(finish_reason: str) -> StopReason:
 def _convert_openai_request_tool_config(tool_choice: Optional[Union[str, Dict[str, Any]]] = None) -> ToolConfig:
     tool_config = ToolConfig()
     if tool_choice:
+        try:
+            tool_choice = ToolChoice(tool_choice)
+        except ValueError:
+            pass
         tool_config.tool_choice = tool_choice
     return tool_config
 
@@ -849,7 +879,9 @@ def _convert_openai_request_tools(tools: Optional[List[Dict[str, Any]]] = None)
     return lls_tools
 
 
-def _convert_openai_request_response_format(response_format: OpenAIResponseFormatParam = None):
+def _convert_openai_request_response_format(
+    response_format: OpenAIResponseFormatParam = None,
+):
     if not response_format:
         return None
     # response_format can be a dict or a pydantic model
@@ -957,38 +989,50 @@ def _convert_openai_sampling_params(
     return sampling_params
 
 
-def _convert_openai_request_messages(messages: List[OpenAIMessageParam]):
-    # Llama Stack messages and OpenAI messages are similar, but not identical.
-    lls_messages = []
+def openai_messages_to_messages(
+    messages: List[OpenAIChatCompletionMessage],
+) -> List[Message]:
+    """
+    Convert a list of OpenAIChatCompletionMessage into a list of Message.
+    """
+    converted_messages = []
     for message in messages:
-        lls_message = dict(message)
+        if message.role == "system":
+            converted_message = SystemMessage(content=message.content)
+        elif message.role == "user":
+            converted_message = UserMessage(content=openai_content_to_content(message.content))
+        elif message.role == "assistant":
+            converted_message = CompletionMessage(
+                content=message.content,
+                tool_calls=_convert_openai_tool_calls(message.tool_calls),
+                stop_reason=StopReason.end_of_turn,
+            )
+        elif message.role == "tool":
+            converted_message = ToolResponseMessage(
+                role="tool",
+                call_id=message.tool_call_id,
+                content=openai_content_to_content(message.content),
+            )
+        else:
+            raise ValueError(f"Unknown role {message.role}")
+        converted_messages.append(converted_message)
+    return converted_messages
 
-        #  Llama Stack expects `call_id` but OpenAI uses `tool_call_id`
-        tool_call_id = lls_message.pop("tool_call_id", None)
-        if tool_call_id:
-            lls_message["call_id"] = tool_call_id
 
-        content = lls_message.get("content", None)
-        if isinstance(content, list):
-            lls_content = []
-            for item in content:
-                # items can either by pydantic models or dicts here...
-                item = dict(item)
-                if item.get("type", "") == "image_url":
-                    lls_item = ImageContentItem(
-                        type="image",
-                        image=URL(uri=item.get("image_url", {}).get("url", "")),
-                    )
-                elif item.get("type", "") == "text":
-                    lls_item = TextContentItem(
-                        type="text",
-                        text=item.get("text", ""),
-                    )
-                lls_content.append(lls_item)
-            lls_message["content"] = lls_content
-        lls_messages.append(lls_message)
-
-    return lls_messages
+def openai_content_to_content(content: Union[str, Iterable[OpenAIChatCompletionContentPartParam]]):
+    if isinstance(content, str):
+        return content
+    elif isinstance(content, list):
+        return [openai_content_to_content(c) for c in content]
+    elif hasattr(content, "type"):
+        if content.type == "text":
+            return TextContentItem(type="text", text=content.text)
+        elif content.type == "image_url":
+            return ImageContentItem(type="image", image=_URLOrData(url=URL(uri=content.image_url.url)))
+        else:
+            raise ValueError(f"Unknown content type: {content.type}")
+    else:
+        raise ValueError(f"Unknown content type: {content}")
 
 
 def convert_openai_chat_completion_choice(
@@ -1313,7 +1357,7 @@ class OpenAIChatCompletionToLlamaStackMixin:
         top_p: Optional[float] = None,
         user: Optional[str] = None,
     ) -> Union[OpenAIChatCompletion, AsyncIterator[OpenAIChatCompletionChunk]]:
-        messages = _convert_openai_request_messages(messages)
+        messages = openai_messages_to_messages(messages)
         response_format = _convert_openai_request_response_format(response_format)
         sampling_params = _convert_openai_sampling_params(
             max_tokens=max_tokens,
@@ -1321,7 +1365,10 @@ class OpenAIChatCompletionToLlamaStackMixin:
             top_p=top_p,
         )
         tool_config = _convert_openai_request_tool_config(tool_choice)
+
         tools = _convert_openai_request_tools(tools)
+        if tool_config.tool_choice == ToolChoice.none:
+            tools = []
 
         outstanding_responses = []
         # "n" is the number of completions to generate per prompt
@@ -1346,7 +1393,9 @@ class OpenAIChatCompletionToLlamaStackMixin:
         )
 
     async def _process_stream_response(
-        self, model: str, outstanding_responses: List[Awaitable[AsyncIterator[ChatCompletionResponseStreamChunk]]]
+        self,
+        model: str,
+        outstanding_responses: List[Awaitable[AsyncIterator[ChatCompletionResponseStreamChunk]]],
     ):
         id = f"chatcmpl-{uuid.uuid4()}"
         for outstanding_response in outstanding_responses:
@@ -1369,11 +1418,31 @@ class OpenAIChatCompletionToLlamaStackMixin:
                 elif isinstance(event.delta, ToolCallDelta):
                     if event.delta.parse_status == ToolCallParseStatus.succeeded:
                         tool_call = event.delta.tool_call
+
+                        # First chunk includes full structure
                         openai_tool_call = OpenAIChoiceDeltaToolCall(
                             index=0,
                             id=tool_call.call_id,
                             function=OpenAIChoiceDeltaToolCallFunction(
-                                name=tool_call.tool_name, arguments=tool_call.arguments_json
+                                name=tool_call.tool_name,
+                                arguments="",
+                            ),
+                        )
+                        delta = OpenAIChoiceDelta(tool_calls=[openai_tool_call])
+                        yield OpenAIChatCompletionChunk(
+                            id=id,
+                            choices=[
+                                OpenAIChatCompletionChunkChoice(index=i, finish_reason=finish_reason, delta=delta)
+                            ],
+                            created=int(time.time()),
+                            model=model,
+                            object="chat.completion.chunk",
+                        )
+                        # arguments
+                        openai_tool_call = OpenAIChoiceDeltaToolCall(
+                            index=0,
+                            function=OpenAIChoiceDeltaToolCallFunction(
+                                arguments=tool_call.arguments_json,
                             ),
                         )
                         delta = OpenAIChoiceDelta(tool_calls=[openai_tool_call])
diff --git a/llama_stack/providers/utils/inference/prompt_adapter.py b/llama_stack/providers/utils/inference/prompt_adapter.py
index 4f9c4927a..657dc4b86 100644
--- a/llama_stack/providers/utils/inference/prompt_adapter.py
+++ b/llama_stack/providers/utils/inference/prompt_adapter.py
@@ -52,6 +52,9 @@ from llama_stack.models.llama.llama3.prompt_templates import (
     SystemDefaultGenerator,
 )
 from llama_stack.models.llama.llama3.tokenizer import Tokenizer
+from llama_stack.models.llama.llama4.prompt_templates.system_prompts import (
+    PythonListCustomToolGenerator as PythonListCustomToolGeneratorLlama4,
+)
 from llama_stack.models.llama.sku_list import resolve_model
 from llama_stack.models.llama.sku_types import ModelFamily, is_multimodal
 from llama_stack.providers.utils.inference import supported_inference_models
@@ -306,10 +309,11 @@ def chat_completion_request_to_messages(
     elif model.model_family in (
         ModelFamily.llama3_2,
         ModelFamily.llama3_3,
-        ModelFamily.llama4,
     ):
-        # llama3.2, llama3.3 and llama4 models follow the same tool prompt format
-        messages = augment_messages_for_tools_llama_3_2(request)
+        # llama3.2, llama3.3 follow the same tool prompt format
+        messages = augment_messages_for_tools_llama(request, PythonListCustomToolGenerator)
+    elif model.model_family == ModelFamily.llama4:
+        messages = augment_messages_for_tools_llama(request, PythonListCustomToolGeneratorLlama4)
     else:
         messages = request.messages
 
@@ -399,8 +403,9 @@ def augment_messages_for_tools_llama_3_1(
     return messages
 
 
-def augment_messages_for_tools_llama_3_2(
+def augment_messages_for_tools_llama(
     request: ChatCompletionRequest,
+    custom_tool_prompt_generator,
 ) -> List[Message]:
     existing_messages = request.messages
     existing_system_message = None
@@ -434,7 +439,7 @@ def augment_messages_for_tools_llama_3_2(
         if existing_system_message and request.tool_config.system_message_behavior == SystemMessageBehavior.replace:
             system_prompt = existing_system_message.content
 
-        tool_template = PythonListCustomToolGenerator().gen(custom_tools, system_prompt)
+        tool_template = custom_tool_prompt_generator().gen(custom_tools, system_prompt)
 
         sys_content += tool_template.render()
         sys_content += "\n"
diff --git a/llama_stack/templates/dependencies.json b/llama_stack/templates/dependencies.json
index f95ca81d5..1f25dda14 100644
--- a/llama_stack/templates/dependencies.json
+++ b/llama_stack/templates/dependencies.json
@@ -756,5 +756,41 @@
     "vllm",
     "sentence-transformers --no-deps",
     "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
+  ],
+  "watsonx": [
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "chardet",
+    "datasets",
+    "emoji",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "ibm_watson_machine_learning",
+    "langdetect",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn"
   ]
 }
diff --git a/llama_stack/templates/meta-reference-gpu/doc_template.md b/llama_stack/templates/meta-reference-gpu/doc_template.md
index a174331b4..2ca6793d7 100644
--- a/llama_stack/templates/meta-reference-gpu/doc_template.md
+++ b/llama_stack/templates/meta-reference-gpu/doc_template.md
@@ -69,6 +69,7 @@ LLAMA_STACK_PORT=8321
 docker run \
   -it \
   --pull always \
+  --gpu all \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
   -v ~/.llama:/root/.llama \
   llamastack/distribution-{{ name }} \
@@ -82,6 +83,7 @@ If you are using Llama Stack Safety / Shield APIs, use:
 docker run \
   -it \
   --pull always \
+  --gpu all \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
   -v ~/.llama:/root/.llama \
   llamastack/distribution-{{ name }} \
diff --git a/llama_stack/templates/nvidia/doc_template.md b/llama_stack/templates/nvidia/doc_template.md
index da95227d8..068dd7ac3 100644
--- a/llama_stack/templates/nvidia/doc_template.md
+++ b/llama_stack/templates/nvidia/doc_template.md
@@ -25,14 +25,84 @@ The following models are available by default:
 {% endif %}
 
 
-### Prerequisite: API Keys
+## Prerequisites
+### NVIDIA API Keys
 
-Make sure you have access to a NVIDIA API Key. You can get one by visiting [https://build.nvidia.com/](https://build.nvidia.com/).
+Make sure you have access to a NVIDIA API Key. You can get one by visiting [https://build.nvidia.com/](https://build.nvidia.com/). Use this key for the `NVIDIA_API_KEY` environment variable.
 
+### Deploy NeMo Microservices Platform
+The NVIDIA NeMo microservices platform supports end-to-end microservice deployment of a complete AI flywheel on your Kubernetes cluster through the NeMo Microservices Helm Chart. Please reference the [NVIDIA NeMo Microservices documentation](https://docs.nvidia.com/nemo/microservices/latest/about/index.html) for platform prerequisites and instructions to install and deploy the platform.
+
+## Supported Services
+Each Llama Stack API corresponds to a specific NeMo microservice. The core microservices (Customizer, Evaluator, Guardrails) are exposed by the same endpoint. The platform components (Data Store) are each exposed by separate endpoints.
+
+### Inference: NVIDIA NIM
+NVIDIA NIM is used for running inference with registered models. There are two ways to access NVIDIA NIMs:
+  1. Hosted (default): Preview APIs hosted at https://integrate.api.nvidia.com (Requires an API key)
+  2. Self-hosted: NVIDIA NIMs that run on your own infrastructure.
+
+The deployed platform includes the NIM Proxy microservice, which is the service that provides to access your NIMs (for example, to run inference on a model). Set the `NVIDIA_BASE_URL` environment variable to use your NVIDIA NIM Proxy deployment.
+
+### Datasetio API: NeMo Data Store
+The NeMo Data Store microservice serves as the default file storage solution for the NeMo microservices platform. It exposts APIs compatible with the Hugging Face Hub client (`HfApi`), so you can use the client to interact with Data Store. The `NVIDIA_DATASETS_URL` environment variable should point to your NeMo Data Store endpoint.
+
+See the [NVIDIA Datasetio docs](/llama_stack/providers/remote/datasetio/nvidia/README.md) for supported features and example usage.
+
+### Eval API: NeMo Evaluator
+The NeMo Evaluator microservice supports evaluation of LLMs. Launching an Evaluation job with NeMo Evaluator requires an Evaluation Config (an object that contains metadata needed by the job). A Llama Stack Benchmark maps to an Evaluation Config, so registering a Benchmark creates an Evaluation Config in NeMo Evaluator. The `NVIDIA_EVALUATOR_URL` environment variable should point to your NeMo Microservices endpoint.
+
+See the [NVIDIA Eval docs](/llama_stack/providers/remote/eval/nvidia/README.md) for supported features and example usage.
+
+### Post-Training API: NeMo Customizer
+The NeMo Customizer microservice supports fine-tuning models. You can reference [this list of supported models](/llama_stack/providers/remote/post_training/nvidia/models.py) that can be fine-tuned using Llama Stack. The `NVIDIA_CUSTOMIZER_URL` environment variable should point to your NeMo Microservices endpoint.
+
+See the [NVIDIA Post-Training docs](/llama_stack/providers/remote/post_training/nvidia/README.md) for supported features and example usage.
+
+### Safety API: NeMo Guardrails
+The NeMo Guardrails microservice sits between your application and the LLM, and adds checks and content moderation to a model. The `GUARDRAILS_SERVICE_URL` environment variable should point to your NeMo Microservices endpoint.
+
+See the NVIDIA Safety docs for supported features and example usage.
+
+## Deploying models
+In order to use a registered model with the Llama Stack APIs, ensure the corresponding NIM is deployed to your environment. For example, you can use the NIM Proxy microservice to deploy `meta/llama-3.2-1b-instruct`.
+
+Note: For improved inference speeds, we need to use NIM with `fast_outlines` guided decoding system (specified in the request body). This is the default if you deployed the platform with the NeMo Microservices Helm Chart.
+```sh
+# URL to NeMo NIM Proxy service
+export NEMO_URL="http://nemo.test"
+
+curl --location "$NEMO_URL/v1/deployment/model-deployments" \
+   -H 'accept: application/json' \
+   -H 'Content-Type: application/json' \
+   -d '{
+      "name": "llama-3.2-1b-instruct",
+      "namespace": "meta",
+      "config": {
+         "model": "meta/llama-3.2-1b-instruct",
+         "nim_deployment": {
+            "image_name": "nvcr.io/nim/meta/llama-3.2-1b-instruct",
+            "image_tag": "1.8.3",
+            "pvc_size": "25Gi",
+            "gpu": 1,
+            "additional_envs": {
+               "NIM_GUIDED_DECODING_BACKEND": "fast_outlines"
+            }
+         }
+      }
+   }'
+```
+This NIM deployment should take approximately 10 minutes to go live. [See the docs](https://docs.nvidia.com/nemo/microservices/latest/get-started/tutorials/deploy-nims.html) for more information on how to deploy a NIM and verify it's available for inference.
+
+You can also remove a deployed NIM to free up GPU resources, if needed.
+```sh
+export NEMO_URL="http://nemo.test"
+
+curl -X DELETE "$NEMO_URL/v1/deployment/model-deployments/meta/llama-3.1-8b-instruct"
+```
 
 ## Running Llama Stack with NVIDIA
 
-You can do this via Conda (build code) or Docker which has a pre-built image.
+You can do this via Conda or venv (build code), or Docker which has a pre-built image.
 
 ### Via Docker
 
@@ -54,9 +124,23 @@ docker run \
 ### Via Conda
 
 ```bash
+INFERENCE_MODEL=meta-llama/Llama-3.1-8b-Instruct
 llama stack build --template nvidia --image-type conda
 llama stack run ./run.yaml \
   --port 8321 \
-  --env NVIDIA_API_KEY=$NVIDIA_API_KEY
+  --env NVIDIA_API_KEY=$NVIDIA_API_KEY \
+  --env INFERENCE_MODEL=$INFERENCE_MODEL
+```
+
+### Via venv
+
+If you've set up your local development environment, you can also build the image using your local virtual environment.
+
+```bash
+INFERENCE_MODEL=meta-llama/Llama-3.1-8b-Instruct
+llama stack build --template nvidia --image-type venv
+llama stack run ./run.yaml \
+  --port 8321 \
+  --env NVIDIA_API_KEY=$NVIDIA_API_KEY \
   --env INFERENCE_MODEL=$INFERENCE_MODEL
 ```
diff --git a/llama_stack/templates/nvidia/nvidia.py b/llama_stack/templates/nvidia/nvidia.py
index ce27ff568..47a0c04fa 100644
--- a/llama_stack/templates/nvidia/nvidia.py
+++ b/llama_stack/templates/nvidia/nvidia.py
@@ -98,23 +98,15 @@ def get_distribution_template() -> DistributionTemplate:
                 "",
                 "NVIDIA API Key",
             ),
-            ## Nemo Customizer related variables
-            "NVIDIA_USER_ID": (
-                "llama-stack-user",
-                "NVIDIA User ID",
-            ),
             "NVIDIA_APPEND_API_VERSION": (
                 "True",
                 "Whether to append the API version to the base_url",
             ),
+            ## Nemo Customizer related variables
             "NVIDIA_DATASET_NAMESPACE": (
                 "default",
                 "NVIDIA Dataset Namespace",
             ),
-            "NVIDIA_ACCESS_POLICIES": (
-                "{}",
-                "NVIDIA Access Policies",
-            ),
             "NVIDIA_PROJECT_ID": (
                 "test-project",
                 "NVIDIA Project ID",
diff --git a/llama_stack/templates/nvidia/run-with-safety.yaml b/llama_stack/templates/nvidia/run-with-safety.yaml
index d45807380..13d5febf7 100644
--- a/llama_stack/templates/nvidia/run-with-safety.yaml
+++ b/llama_stack/templates/nvidia/run-with-safety.yaml
@@ -57,7 +57,7 @@ providers:
   - provider_id: nvidia
     provider_type: remote::nvidia
     config:
-      evaluator_service_url: ${env.NVIDIA_EVALUATOR_URL:http://localhost:7331}
+      evaluator_url: ${env.NVIDIA_EVALUATOR_URL:http://localhost:7331}
   post_training:
   - provider_id: nvidia
     provider_type: remote::nvidia
diff --git a/llama_stack/templates/nvidia/run.yaml b/llama_stack/templates/nvidia/run.yaml
index bf08e462f..bc527d4cb 100644
--- a/llama_stack/templates/nvidia/run.yaml
+++ b/llama_stack/templates/nvidia/run.yaml
@@ -52,7 +52,7 @@ providers:
   - provider_id: nvidia
     provider_type: remote::nvidia
     config:
-      evaluator_service_url: ${env.NVIDIA_EVALUATOR_URL:http://localhost:7331}
+      evaluator_url: ${env.NVIDIA_EVALUATOR_URL:http://localhost:7331}
   post_training:
   - provider_id: nvidia
     provider_type: remote::nvidia
@@ -178,6 +178,16 @@ models:
   provider_id: nvidia
   provider_model_id: meta/llama-3.2-90b-vision-instruct
   model_type: llm
+- metadata: {}
+  model_id: meta/llama-3.3-70b-instruct
+  provider_id: nvidia
+  provider_model_id: meta/llama-3.3-70b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.3-70B-Instruct
+  provider_id: nvidia
+  provider_model_id: meta/llama-3.3-70b-instruct
+  model_type: llm
 - metadata:
     embedding_dimension: 2048
     context_length: 8192
diff --git a/llama_stack/templates/remote-vllm/doc_template.md b/llama_stack/templates/remote-vllm/doc_template.md
index efcdb62c6..3cede6080 100644
--- a/llama_stack/templates/remote-vllm/doc_template.md
+++ b/llama_stack/templates/remote-vllm/doc_template.md
@@ -28,10 +28,10 @@ The following environment variables can be configured:
 
 ## Setting up vLLM server
 
-In the following sections, we'll use either AMD and NVIDIA GPUs to serve as hardware accelerators for the vLLM
+In the following sections, we'll use AMD, NVIDIA or Intel GPUs to serve as hardware accelerators for the vLLM
 server, which acts as both the LLM inference provider and the safety provider. Note that vLLM also
 [supports many other hardware accelerators](https://docs.vllm.ai/en/latest/getting_started/installation.html) and
-that we only use GPUs here for demonstration purposes.
+that we only use GPUs here for demonstration purposes. Note that if you run into issues, you can include the environment variable `--env VLLM_DEBUG_LOG_API_SERVER_RESPONSE=true` (available in vLLM v0.8.3 and above) in the `docker run` command to enable log response from API server for debugging.
 
 ### Setting up vLLM server on AMD GPU
 
@@ -149,6 +149,55 @@ docker run \
     --port $SAFETY_PORT
 ```
 
+### Setting up vLLM server on Intel GPU
+
+Refer to [vLLM Documentation for XPU](https://docs.vllm.ai/en/v0.8.2/getting_started/installation/gpu.html?device=xpu) to get a vLLM endpoint. In addition to vLLM side setup which guides towards installing vLLM from sources orself-building vLLM Docker container, Intel provides prebuilt vLLM container to use on systems with Intel GPUs supported by PyTorch XPU backend:
+- [intel/vllm](https://hub.docker.com/r/intel/vllm)
+
+Here is a sample script to start a vLLM server locally via Docker using Intel provided container:
+
+```bash
+export INFERENCE_PORT=8000
+export INFERENCE_MODEL=meta-llama/Llama-3.2-1B-Instruct
+export ZE_AFFINITY_MASK=0
+
+docker run \
+    --pull always \
+    --device /dev/dri \
+    -v /dev/dri/by-path:/dev/dri/by-path \
+    -v ~/.cache/huggingface:/root/.cache/huggingface \
+    --env "HUGGING_FACE_HUB_TOKEN=$HF_TOKEN" \
+    --env ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK \
+    -p $INFERENCE_PORT:$INFERENCE_PORT \
+    --ipc=host \
+    intel/vllm:xpu \
+    --gpu-memory-utilization 0.7 \
+    --model $INFERENCE_MODEL \
+    --port $INFERENCE_PORT
+```
+
+If you are using Llama Stack Safety / Shield APIs, then you will need to also run another instance of a vLLM with a corresponding safety model like `meta-llama/Llama-Guard-3-1B` using a script like:
+
+```bash
+export SAFETY_PORT=8081
+export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
+export ZE_AFFINITY_MASK=1
+
+docker run \
+    --pull always \
+    --device /dev/dri \
+    -v /dev/dri/by-path:/dev/dri/by-path \
+    -v ~/.cache/huggingface:/root/.cache/huggingface \
+    --env "HUGGING_FACE_HUB_TOKEN=$HF_TOKEN" \
+    --env ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK \
+    -p $SAFETY_PORT:$SAFETY_PORT \
+    --ipc=host \
+    intel/vllm:xpu \
+    --gpu-memory-utilization 0.7 \
+    --model $SAFETY_MODEL \
+    --port $SAFETY_PORT
+```
+
 ## Running Llama Stack
 
 Now you are ready to run Llama Stack with vLLM as the inference provider. You can do this via Conda (build code) or Docker which has a pre-built image.
diff --git a/llama_stack/templates/watsonx/__init__.py b/llama_stack/templates/watsonx/__init__.py
new file mode 100644
index 000000000..078d86144
--- /dev/null
+++ b/llama_stack/templates/watsonx/__init__.py
@@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .watsonx import get_distribution_template  # noqa: F401
diff --git a/llama_stack/templates/watsonx/build.yaml b/llama_stack/templates/watsonx/build.yaml
new file mode 100644
index 000000000..badd643ad
--- /dev/null
+++ b/llama_stack/templates/watsonx/build.yaml
@@ -0,0 +1,30 @@
+version: '2'
+distribution_spec:
+  description: Use watsonx for running LLM inference
+  providers:
+    inference:
+    - remote::watsonx
+    vector_io:
+    - inline::faiss
+    safety:
+    - inline::llama-guard
+    agents:
+    - inline::meta-reference
+    telemetry:
+    - inline::meta-reference
+    eval:
+    - inline::meta-reference
+    datasetio:
+    - remote::huggingface
+    - inline::localfs
+    scoring:
+    - inline::basic
+    - inline::llm-as-judge
+    - inline::braintrust
+    tool_runtime:
+    - remote::brave-search
+    - remote::tavily-search
+    - inline::code-interpreter
+    - inline::rag-runtime
+    - remote::model-context-protocol
+image_type: conda
diff --git a/llama_stack/templates/watsonx/doc_template.md b/llama_stack/templates/watsonx/doc_template.md
new file mode 100644
index 000000000..af0ae15a8
--- /dev/null
+++ b/llama_stack/templates/watsonx/doc_template.md
@@ -0,0 +1,74 @@
+---
+orphan: true
+---
+# watsonx Distribution
+
+```{toctree}
+:maxdepth: 2
+:hidden:
+
+self
+```
+
+The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations.
+
+{{ providers_table }}
+
+{% if run_config_env_vars  %}
+
+### Environment Variables
+
+The following environment variables can be configured:
+
+{% for var, (default_value, description) in run_config_env_vars.items() %}
+- `{{ var }}`: {{ description }} (default: `{{ default_value }}`)
+{% endfor %}
+{% endif %}
+
+{% if default_models %}
+### Models
+
+The following models are available by default:
+
+{% for model in default_models %}
+- `{{ model.model_id }} {{ model.doc_string }}`
+{% endfor %}
+{% endif %}
+
+
+### Prerequisite: API Keys
+
+Make sure you have access to a watsonx API Key. You can get one by referring [watsonx.ai](https://www.ibm.com/docs/en/masv-and-l/maximo-manage/continuous-delivery?topic=setup-create-watsonx-api-key).
+
+
+## Running Llama Stack with watsonx
+
+You can do this via Conda (build code), venv or Docker which has a pre-built image.
+
+### Via Docker
+
+This method allows you to get started quickly without having to build the distribution code.
+
+```bash
+LLAMA_STACK_PORT=5001
+docker run \
+  -it \
+  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
+  -v ./run.yaml:/root/my-run.yaml \
+  llamastack/distribution-{{ name }} \
+  --yaml-config /root/my-run.yaml \
+  --port $LLAMA_STACK_PORT \
+  --env WATSONX_API_KEY=$WATSONX_API_KEY \
+  --env WATSONX_PROJECT_ID=$WATSONX_PROJECT_ID \
+  --env WATSONX_BASE_URL=$WATSONX_BASE_URL
+```
+
+### Via Conda
+
+```bash
+llama stack build --template watsonx --image-type conda
+llama stack run ./run.yaml \
+  --port $LLAMA_STACK_PORT \
+  --env WATSONX_API_KEY=$WATSONX_API_KEY \
+  --env WATSONX_PROJECT_ID=$WATSONX_PROJECT_ID
+```
diff --git a/llama_stack/templates/watsonx/run.yaml b/llama_stack/templates/watsonx/run.yaml
new file mode 100644
index 000000000..1048f7192
--- /dev/null
+++ b/llama_stack/templates/watsonx/run.yaml
@@ -0,0 +1,210 @@
+version: '2'
+image_name: watsonx
+apis:
+- agents
+- datasetio
+- eval
+- inference
+- safety
+- scoring
+- telemetry
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: watsonx
+    provider_type: remote::watsonx
+    config:
+      url: ${env.WATSONX_BASE_URL:https://us-south.ml.cloud.ibm.com}
+      api_key: ${env.WATSONX_API_KEY:}
+      project_id: ${env.WATSONX_PROJECT_ID:}
+  vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/faiss_store.db
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config:
+      excluded_categories: []
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence_store:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/agents_store.db
+  telemetry:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
+      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/watsonx/trace_store.db}
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/meta_reference_eval.db
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/huggingface_datasetio.db
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/localfs_datasetio.db
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      max_results: 3
+  - provider_id: code-interpreter
+    provider_type: inline::code-interpreter
+    config: {}
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
+metadata_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/registry.db
+models:
+- metadata: {}
+  model_id: meta-llama/llama-3-3-70b-instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-3-70b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.3-70B-Instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-3-70b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/llama-2-13b-chat
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-2-13b-chat
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-2-13b
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-2-13b-chat
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/llama-3-1-70b-instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-1-70b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.1-70B-Instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-1-70b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/llama-3-1-8b-instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-1-8b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.1-8B-Instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-1-8b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/llama-3-2-11b-vision-instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-2-11b-vision-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-2-11b-vision-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/llama-3-2-1b-instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-2-1b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.2-1B-Instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-2-1b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/llama-3-2-3b-instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-2-3b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.2-3B-Instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-2-3b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/llama-3-2-90b-vision-instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-2-90b-vision-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-2-90b-vision-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/llama-guard-3-11b-vision
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-guard-3-11b-vision
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-Guard-3-11B-Vision
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-guard-3-11b-vision
+  model_type: llm
+shields: []
+vector_dbs: []
+datasets: []
+scoring_fns: []
+benchmarks: []
+tool_groups:
+- toolgroup_id: builtin::websearch
+  provider_id: tavily-search
+- toolgroup_id: builtin::rag
+  provider_id: rag-runtime
+- toolgroup_id: builtin::code_interpreter
+  provider_id: code-interpreter
+server:
+  port: 8321
diff --git a/llama_stack/templates/watsonx/watsonx.py b/llama_stack/templates/watsonx/watsonx.py
new file mode 100644
index 000000000..d59bb6f20
--- /dev/null
+++ b/llama_stack/templates/watsonx/watsonx.py
@@ -0,0 +1,90 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from pathlib import Path
+
+from llama_stack.distribution.datatypes import Provider, ToolGroupInput
+from llama_stack.providers.remote.inference.watsonx import WatsonXConfig
+from llama_stack.providers.remote.inference.watsonx.models import MODEL_ENTRIES
+from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
+
+
+def get_distribution_template() -> DistributionTemplate:
+    providers = {
+        "inference": ["remote::watsonx"],
+        "vector_io": ["inline::faiss"],
+        "safety": ["inline::llama-guard"],
+        "agents": ["inline::meta-reference"],
+        "telemetry": ["inline::meta-reference"],
+        "eval": ["inline::meta-reference"],
+        "datasetio": ["remote::huggingface", "inline::localfs"],
+        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
+        "tool_runtime": [
+            "remote::brave-search",
+            "remote::tavily-search",
+            "inline::code-interpreter",
+            "inline::rag-runtime",
+            "remote::model-context-protocol",
+        ],
+    }
+
+    inference_provider = Provider(
+        provider_id="watsonx",
+        provider_type="remote::watsonx",
+        config=WatsonXConfig.sample_run_config(),
+    )
+
+    available_models = {
+        "watsonx": MODEL_ENTRIES,
+    }
+    default_tool_groups = [
+        ToolGroupInput(
+            toolgroup_id="builtin::websearch",
+            provider_id="tavily-search",
+        ),
+        ToolGroupInput(
+            toolgroup_id="builtin::rag",
+            provider_id="rag-runtime",
+        ),
+        ToolGroupInput(
+            toolgroup_id="builtin::code_interpreter",
+            provider_id="code-interpreter",
+        ),
+    ]
+
+    default_models = get_model_registry(available_models)
+    return DistributionTemplate(
+        name="watsonx",
+        distro_type="remote_hosted",
+        description="Use watsonx for running LLM inference",
+        container_image=None,
+        template_path=Path(__file__).parent / "doc_template.md",
+        providers=providers,
+        available_models_by_provider=available_models,
+        run_configs={
+            "run.yaml": RunConfigSettings(
+                provider_overrides={
+                    "inference": [inference_provider],
+                },
+                default_models=default_models,
+                default_tool_groups=default_tool_groups,
+            ),
+        },
+        run_config_env_vars={
+            "LLAMASTACK_PORT": (
+                "5001",
+                "Port for the Llama Stack distribution server",
+            ),
+            "WATSONX_API_KEY": (
+                "",
+                "watsonx API Key",
+            ),
+            "WATSONX_PROJECT_ID": (
+                "",
+                "watsonx Project ID",
+            ),
+        },
+    )
diff --git a/pyproject.toml b/pyproject.toml
index 90de8cf23..0f44ca053 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -38,6 +38,7 @@ dependencies = [
     "termcolor",
     "tiktoken",
     "pillow",
+    "h11>=0.16.0",
 ]
 
 [project.optional-dependencies]
@@ -46,6 +47,7 @@ dev = [
     "pytest-asyncio",
     "pytest-cov",
     "pytest-html",
+    "pytest-json-report",
     "nbval",            # For notebook testing
     "black",
     "ruff",
@@ -57,7 +59,16 @@ dev = [
     "ruamel.yaml",      # needed for openapi generator
 ]
 # These are the dependencies required for running unit tests.
-unit = ["sqlite-vec", "openai", "aiosqlite", "aiohttp", "pypdf", "chardet", "qdrant-client"]
+unit = [
+    "sqlite-vec",
+    "openai",
+    "aiosqlite",
+    "aiohttp",
+    "pypdf",
+    "chardet",
+    "qdrant-client",
+    "opentelemetry-exporter-otlp-proto-http"
+]
 # These are the core dependencies required for running integration tests. They are shared across all
 # providers. If a provider requires additional dependencies, please add them to your environment
 # separately. If you are using "uv" to execute your tests, you can use the "--with" flag to specify extra
@@ -265,6 +276,7 @@ exclude = [
     "^llama_stack/providers/remote/inference/sample/",
     "^llama_stack/providers/remote/inference/tgi/",
     "^llama_stack/providers/remote/inference/together/",
+    "^llama_stack/providers/remote/inference/watsonx/",
     "^llama_stack/providers/remote/safety/bedrock/",
     "^llama_stack/providers/remote/safety/nvidia/",
     "^llama_stack/providers/remote/safety/sample/",
diff --git a/requirements.txt b/requirements.txt
index 2961b1533..057a1bb2b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,8 +13,8 @@ exceptiongroup==1.2.2 ; python_full_version < '3.11'
 filelock==3.17.0
 fire==0.7.0
 fsspec==2024.12.0
-h11==0.14.0
-httpcore==1.0.7
+h11==0.16.0
+httpcore==1.0.9
 httpx==0.28.1
 huggingface-hub==0.29.0
 idna==3.10
diff --git a/scripts/distro_codegen.py b/scripts/distro_codegen.py
index 98faa53a3..a65e2c80d 100755
--- a/scripts/distro_codegen.py
+++ b/scripts/distro_codegen.py
@@ -98,7 +98,7 @@ def collect_template_dependencies(template_dir: Path) -> tuple[str | None, list[
 
         if template_func := getattr(module, "get_distribution_template", None):
             template = template_func()
-            normal_deps, special_deps = get_provider_dependencies(template.providers)
+            normal_deps, special_deps = get_provider_dependencies(template)
             # Combine all dependencies in order: normal deps, special deps, server deps
             all_deps = sorted(set(normal_deps + SERVER_DEPENDENCIES)) + sorted(set(special_deps))
 
diff --git a/tests/external-provider/llama-stack-provider-ollama/custom-distro.yaml b/tests/external-provider/llama-stack-provider-ollama/custom-distro.yaml
new file mode 100644
index 000000000..eb3b85e52
--- /dev/null
+++ b/tests/external-provider/llama-stack-provider-ollama/custom-distro.yaml
@@ -0,0 +1,9 @@
+version: '2'
+distribution_spec:
+  description: Custom distro for CI tests
+  providers:
+    inference:
+    - remote::custom_ollama
+image_type: container
+image_name: ci-test
+external_providers_dir: /tmp/providers.d
diff --git a/tests/external-provider/llama-stack-provider-ollama/custom_ollama.yaml b/tests/external-provider/llama-stack-provider-ollama/custom_ollama.yaml
index f0960b4d8..2ae1e2cf3 100644
--- a/tests/external-provider/llama-stack-provider-ollama/custom_ollama.yaml
+++ b/tests/external-provider/llama-stack-provider-ollama/custom_ollama.yaml
@@ -1,6 +1,6 @@
 adapter:
   adapter_type: custom_ollama
-  pip_packages: ["ollama", "aiohttp"]
+  pip_packages: ["ollama", "aiohttp", "tests/external-provider/llama-stack-provider-ollama"]
   config_class: llama_stack_provider_ollama.config.OllamaImplConfig
   module: llama_stack_provider_ollama
 api_dependencies: []
diff --git a/tests/external-provider/llama-stack-provider-ollama/run.yaml b/tests/external-provider/llama-stack-provider-ollama/run.yaml
index 7a3636c4d..a070a6dbb 100644
--- a/tests/external-provider/llama-stack-provider-ollama/run.yaml
+++ b/tests/external-provider/llama-stack-provider-ollama/run.yaml
@@ -1,14 +1,10 @@
 version: '2'
 image_name: ollama
 apis:
-- agents
-- datasetio
-- eval
 - inference
-- safety
-- scoring
 - telemetry
 - tool_runtime
+- datasetio
 - vector_io
 providers:
   inference:
@@ -24,19 +20,6 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db
-  safety:
-  - provider_id: llama-guard
-    provider_type: inline::llama-guard
-    config:
-      excluded_categories: []
-  agents:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      persistence_store:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -44,14 +27,6 @@ providers:
       service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
       sinks: ${env.TELEMETRY_SINKS:console,sqlite}
       sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/ollama/trace_store.db}
-  eval:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -67,17 +42,6 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/localfs_datasetio.db
-  scoring:
-  - provider_id: basic
-    provider_type: inline::basic
-    config: {}
-  - provider_id: llm-as-judge
-    provider_type: inline::llm-as-judge
-    config: {}
-  - provider_id: braintrust
-    provider_type: inline::braintrust
-    config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
diff --git a/tests/integration/agents/test_agents.py b/tests/integration/agents/test_agents.py
index 7def55291..f884d440d 100644
--- a/tests/integration/agents/test_agents.py
+++ b/tests/integration/agents/test_agents.py
@@ -115,6 +115,70 @@ def test_agent_simple(llama_stack_client_with_mocked_inference, agent_config):
         assert "I can't" in logs_str
 
 
+def test_agent_name(llama_stack_client, text_model_id):
+    agent_name = f"test-agent-{uuid4()}"
+
+    try:
+        agent = Agent(
+            llama_stack_client,
+            model=text_model_id,
+            instructions="You are a helpful assistant",
+            name=agent_name,
+        )
+    except TypeError:
+        agent = Agent(
+            llama_stack_client,
+            model=text_model_id,
+            instructions="You are a helpful assistant",
+        )
+        return
+
+    session_id = agent.create_session(f"test-session-{uuid4()}")
+
+    agent.create_turn(
+        messages=[
+            {
+                "role": "user",
+                "content": "Give me a sentence that contains the word: hello",
+            }
+        ],
+        session_id=session_id,
+        stream=False,
+    )
+
+    all_spans = []
+    for span in llama_stack_client.telemetry.query_spans(
+        attribute_filters=[
+            {"key": "session_id", "op": "eq", "value": session_id},
+        ],
+        attributes_to_return=["input", "output", "agent_name", "agent_id", "session_id"],
+    ):
+        all_spans.append(span.attributes)
+
+    agent_name_spans = []
+    for span in llama_stack_client.telemetry.query_spans(
+        attribute_filters=[],
+        attributes_to_return=["agent_name"],
+    ):
+        if "agent_name" in span.attributes:
+            agent_name_spans.append(span.attributes)
+
+    agent_logs = []
+    for span in llama_stack_client.telemetry.query_spans(
+        attribute_filters=[
+            {"key": "agent_name", "op": "eq", "value": agent_name},
+        ],
+        attributes_to_return=["input", "output", "agent_name"],
+    ):
+        if "output" in span.attributes and span.attributes["output"] != "no shields":
+            agent_logs.append(span.attributes)
+
+    assert len(agent_logs) == 1
+    assert agent_logs[0]["agent_name"] == agent_name
+    assert "Give me a sentence that contains the word: hello" in agent_logs[0]["input"]
+    assert "hello" in agent_logs[0]["output"].lower()
+
+
 def test_tool_config(llama_stack_client_with_mocked_inference, agent_config):
     common_params = dict(
         model="meta-llama/Llama-3.2-3B-Instruct",
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index 22290b519..131219e52 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -10,6 +10,7 @@ import platform
 import textwrap
 import time
 
+import pytest
 from dotenv import load_dotenv
 
 from llama_stack.log import get_logger
@@ -19,10 +20,29 @@ from .report import Report
 logger = get_logger(__name__, category="tests")
 
 
+@pytest.hookimpl(hookwrapper=True)
+def pytest_runtest_makereport(item, call):
+    outcome = yield
+    report = outcome.get_result()
+    if report.when == "call":
+        item.execution_outcome = report.outcome
+        item.was_xfail = getattr(report, "wasxfail", False)
+
+
 def pytest_runtest_teardown(item):
-    interval_seconds = os.getenv("LLAMA_STACK_TEST_INTERVAL_SECONDS")
-    if interval_seconds:
-        time.sleep(float(interval_seconds))
+    # Check if the test actually ran and passed or failed, but was not skipped or an expected failure (xfail)
+    outcome = getattr(item, "execution_outcome", None)
+    was_xfail = getattr(item, "was_xfail", False)
+
+    name = item.nodeid
+    if not any(x in name for x in ("inference/", "safety/", "agents/")):
+        return
+
+    logger.debug(f"Test '{item.nodeid}' outcome was '{outcome}' (xfail={was_xfail})")
+    if outcome in ("passed", "failed") and not was_xfail:
+        interval_seconds = os.getenv("LLAMA_STACK_TEST_INTERVAL_SECONDS")
+        if interval_seconds:
+            time.sleep(float(interval_seconds))
 
 
 def pytest_configure(config):
diff --git a/tests/integration/datasets/test_datasets.py b/tests/integration/datasets/test_datasets.py
index 60db95f30..18b31d39c 100644
--- a/tests/integration/datasets/test_datasets.py
+++ b/tests/integration/datasets/test_datasets.py
@@ -31,6 +31,7 @@ def data_url_from_file(file_path: str) -> str:
     return data_url
 
 
+@pytest.mark.skip(reason="flaky. Couldn't find 'llamastack/simpleqa' on the Hugging Face Hub")
 @pytest.mark.parametrize(
     "purpose, source, provider_id, limit",
     [
diff --git a/tests/integration/inference/test_openai_completion.py b/tests/integration/inference/test_openai_completion.py
index 75b53100c..46ec03d2e 100644
--- a/tests/integration/inference/test_openai_completion.py
+++ b/tests/integration/inference/test_openai_completion.py
@@ -75,19 +75,24 @@ def openai_client(client_with_models):
     return OpenAI(base_url=base_url, api_key="bar")
 
 
+@pytest.fixture(params=["openai_client", "llama_stack_client"])
+def compat_client(request):
+    return request.getfixturevalue(request.param)
+
+
 @pytest.mark.parametrize(
     "test_case",
     [
         "inference:completion:sanity",
     ],
 )
-def test_openai_completion_non_streaming(openai_client, client_with_models, text_model_id, test_case):
+def test_openai_completion_non_streaming(llama_stack_client, client_with_models, text_model_id, test_case):
     skip_if_model_doesnt_support_openai_completion(client_with_models, text_model_id)
     tc = TestCase(test_case)
 
     # ollama needs more verbose prompting for some reason here...
     prompt = "Respond to this question and explain your answer. " + tc["content"]
-    response = openai_client.completions.create(
+    response = llama_stack_client.completions.create(
         model=text_model_id,
         prompt=prompt,
         stream=False,
@@ -103,13 +108,13 @@ def test_openai_completion_non_streaming(openai_client, client_with_models, text
         "inference:completion:sanity",
     ],
 )
-def test_openai_completion_streaming(openai_client, client_with_models, text_model_id, test_case):
+def test_openai_completion_streaming(llama_stack_client, client_with_models, text_model_id, test_case):
     skip_if_model_doesnt_support_openai_completion(client_with_models, text_model_id)
     tc = TestCase(test_case)
 
     # ollama needs more verbose prompting for some reason here...
     prompt = "Respond to this question and explain your answer. " + tc["content"]
-    response = openai_client.completions.create(
+    response = llama_stack_client.completions.create(
         model=text_model_id,
         prompt=prompt,
         stream=True,
@@ -127,11 +132,11 @@ def test_openai_completion_streaming(openai_client, client_with_models, text_mod
         0,
     ],
 )
-def test_openai_completion_prompt_logprobs(openai_client, client_with_models, text_model_id, prompt_logprobs):
+def test_openai_completion_prompt_logprobs(llama_stack_client, client_with_models, text_model_id, prompt_logprobs):
     skip_if_provider_isnt_vllm(client_with_models, text_model_id)
 
     prompt = "Hello, world!"
-    response = openai_client.completions.create(
+    response = llama_stack_client.completions.create(
         model=text_model_id,
         prompt=prompt,
         stream=False,
@@ -144,11 +149,11 @@ def test_openai_completion_prompt_logprobs(openai_client, client_with_models, te
     assert len(choice.prompt_logprobs) > 0
 
 
-def test_openai_completion_guided_choice(openai_client, client_with_models, text_model_id):
+def test_openai_completion_guided_choice(llama_stack_client, client_with_models, text_model_id):
     skip_if_provider_isnt_vllm(client_with_models, text_model_id)
 
     prompt = "I am feeling really sad today."
-    response = openai_client.completions.create(
+    response = llama_stack_client.completions.create(
         model=text_model_id,
         prompt=prompt,
         stream=False,
@@ -161,6 +166,9 @@ def test_openai_completion_guided_choice(openai_client, client_with_models, text
     assert choice.text in ["joy", "sadness"]
 
 
+# Run the chat-completion tests with both the OpenAI client and the LlamaStack client
+
+
 @pytest.mark.parametrize(
     "test_case",
     [
@@ -168,13 +176,13 @@ def test_openai_completion_guided_choice(openai_client, client_with_models, text
         "inference:chat_completion:non_streaming_02",
     ],
 )
-def test_openai_chat_completion_non_streaming(openai_client, client_with_models, text_model_id, test_case):
+def test_openai_chat_completion_non_streaming(compat_client, client_with_models, text_model_id, test_case):
     skip_if_model_doesnt_support_openai_chat_completion(client_with_models, text_model_id)
     tc = TestCase(test_case)
     question = tc["question"]
     expected = tc["expected"]
 
-    response = openai_client.chat.completions.create(
+    response = compat_client.chat.completions.create(
         model=text_model_id,
         messages=[
             {
@@ -196,13 +204,13 @@ def test_openai_chat_completion_non_streaming(openai_client, client_with_models,
         "inference:chat_completion:streaming_02",
     ],
 )
-def test_openai_chat_completion_streaming(openai_client, client_with_models, text_model_id, test_case):
+def test_openai_chat_completion_streaming(compat_client, client_with_models, text_model_id, test_case):
     skip_if_model_doesnt_support_openai_chat_completion(client_with_models, text_model_id)
     tc = TestCase(test_case)
     question = tc["question"]
     expected = tc["expected"]
 
-    response = openai_client.chat.completions.create(
+    response = compat_client.chat.completions.create(
         model=text_model_id,
         messages=[{"role": "user", "content": question}],
         stream=True,
diff --git a/tests/integration/tool_runtime/test_registration.py b/tests/integration/tool_runtime/test_registration.py
index e4241d813..b36237d05 100644
--- a/tests/integration/tool_runtime/test_registration.py
+++ b/tests/integration/tool_runtime/test_registration.py
@@ -114,7 +114,7 @@ def test_register_and_unregister_toolgroup(llama_stack_client, mcp_server):
     llama_stack_client.toolgroups.unregister(toolgroup_id=test_toolgroup_id)
 
     # Verify it is unregistered
-    with pytest.raises(ValueError, match=f"Tool group '{test_toolgroup_id}' not found"):
+    with pytest.raises(Exception, match=f"Tool group '{test_toolgroup_id}' not found"):
         llama_stack_client.toolgroups.get(toolgroup_id=test_toolgroup_id)
 
     # Verify tools are also unregistered
diff --git a/tests/unit/distribution/test_build_path.py b/tests/unit/distribution/test_build_path.py
new file mode 100644
index 000000000..555cdda4a
--- /dev/null
+++ b/tests/unit/distribution/test_build_path.py
@@ -0,0 +1,40 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from pathlib import Path
+
+from llama_stack.cli.stack._build import (
+    _run_stack_build_command_from_build_config,
+)
+from llama_stack.distribution.datatypes import BuildConfig, DistributionSpec
+from llama_stack.distribution.utils.image_types import LlamaStackImageType
+
+
+def test_container_build_passes_path(monkeypatch, tmp_path):
+    called_with = {}
+
+    def spy_build_image(cfg, build_file_path, image_name, template_or_config, run_config=None):
+        called_with["path"] = template_or_config
+        called_with["run_config"] = run_config
+        return 0
+
+    monkeypatch.setattr(
+        "llama_stack.cli.stack._build.build_image",
+        spy_build_image,
+        raising=True,
+    )
+
+    cfg = BuildConfig(
+        image_type=LlamaStackImageType.CONTAINER.value,
+        distribution_spec=DistributionSpec(providers={}, description=""),
+    )
+
+    _run_stack_build_command_from_build_config(cfg, image_name="dummy")
+
+    assert "path" in called_with
+    assert isinstance(called_with["path"], str)
+    assert Path(called_with["path"]).exists()
+    assert called_with["run_config"] is None
diff --git a/tests/unit/providers/inference/test_remote_vllm.py b/tests/unit/providers/inference/test_remote_vllm.py
index 9c2281d85..b3172cad4 100644
--- a/tests/unit/providers/inference/test_remote_vllm.py
+++ b/tests/unit/providers/inference/test_remote_vllm.py
@@ -26,9 +26,17 @@ from openai.types.chat.chat_completion_chunk import (
 )
 from openai.types.model import Model as OpenAIModel
 
-from llama_stack.apis.inference import ToolChoice, ToolConfig
+from llama_stack.apis.inference import (
+    ChatCompletionRequest,
+    CompletionMessage,
+    SystemMessage,
+    ToolChoice,
+    ToolConfig,
+    ToolResponseMessage,
+    UserMessage,
+)
 from llama_stack.apis.models import Model
-from llama_stack.models.llama.datatypes import StopReason
+from llama_stack.models.llama.datatypes import StopReason, ToolCall
 from llama_stack.providers.remote.inference.vllm.config import VLLMInferenceAdapterConfig
 from llama_stack.providers.remote.inference.vllm.vllm import (
     VLLMInferenceAdapter,
@@ -130,6 +138,49 @@ async def test_old_vllm_tool_choice(vllm_inference_adapter):
         assert request.tool_config.tool_choice == ToolChoice.none
 
 
+@pytest.mark.asyncio
+async def test_tool_call_response(vllm_inference_adapter):
+    """Verify that tool call arguments from a CompletionMessage are correctly converted
+    into the expected JSON format."""
+
+    # Patch the call to vllm so we can inspect the arguments sent were correct
+    with patch.object(
+        vllm_inference_adapter.client.chat.completions, "create", new_callable=AsyncMock
+    ) as mock_nonstream_completion:
+        messages = [
+            SystemMessage(content="You are a helpful assistant"),
+            UserMessage(content="How many?"),
+            CompletionMessage(
+                content="",
+                stop_reason=StopReason.end_of_turn,
+                tool_calls=[
+                    ToolCall(
+                        call_id="foo",
+                        tool_name="knowledge_search",
+                        arguments={"query": "How many?"},
+                        arguments_json='{"query": "How many?"}',
+                    )
+                ],
+            ),
+            ToolResponseMessage(call_id="foo", content="knowledge_search found 5...."),
+        ]
+        await vllm_inference_adapter.chat_completion(
+            "mock-model",
+            messages,
+            stream=False,
+            tools=[],
+            tool_config=ToolConfig(tool_choice=ToolChoice.auto),
+        )
+
+        assert mock_nonstream_completion.call_args.kwargs["messages"][2]["tool_calls"] == [
+            {
+                "id": "foo",
+                "type": "function",
+                "function": {"name": "knowledge_search", "arguments": '{"query": "How many?"}'},
+            }
+        ]
+
+
 @pytest.mark.asyncio
 async def test_tool_call_delta_empty_tool_call_buf():
     """
@@ -232,3 +283,14 @@ def test_chat_completion_doesnt_block_event_loop(caplog):
     # above.
     asyncio_warnings = [record.message for record in caplog.records if record.name == "asyncio"]
     assert not asyncio_warnings
+
+
+@pytest.mark.asyncio
+async def test_get_params_empty_tools(vllm_inference_adapter):
+    request = ChatCompletionRequest(
+        tools=[],
+        model="test_model",
+        messages=[UserMessage(content="test")],
+    )
+    params = await vllm_inference_adapter._get_params(request)
+    assert "tools" not in params
diff --git a/tests/unit/providers/nvidia/test_eval.py b/tests/unit/providers/nvidia/test_eval.py
index 53df82aa4..584ca2101 100644
--- a/tests/unit/providers/nvidia/test_eval.py
+++ b/tests/unit/providers/nvidia/test_eval.py
@@ -13,6 +13,7 @@ import pytest
 from llama_stack.apis.benchmarks import Benchmark
 from llama_stack.apis.common.job_types import Job, JobStatus
 from llama_stack.apis.eval.eval import BenchmarkConfig, EvaluateResponse, ModelCandidate, SamplingParams
+from llama_stack.models.llama.sku_types import CoreModelId
 from llama_stack.providers.remote.eval.nvidia.config import NVIDIAEvalConfig
 from llama_stack.providers.remote.eval.nvidia.eval import NVIDIAEvalImpl
 
@@ -32,7 +33,7 @@ class TestNVIDIAEvalImpl(unittest.TestCase):
         self.agents_api = MagicMock()
 
         self.config = NVIDIAEvalConfig(
-            evaluator_service_url=os.environ["NVIDIA_EVALUATOR_URL"],
+            evaluator_url=os.environ["NVIDIA_EVALUATOR_URL"],
         )
 
         self.eval_impl = NVIDIAEvalImpl(
@@ -118,7 +119,7 @@ class TestNVIDIAEvalImpl(unittest.TestCase):
         benchmark_config = BenchmarkConfig(
             eval_candidate=ModelCandidate(
                 type="model",
-                model="meta/llama-3.1-8b-instruct",
+                model=CoreModelId.llama3_1_8b_instruct.value,
                 sampling_params=SamplingParams(max_tokens=100, temperature=0.7),
             )
         )
@@ -137,7 +138,7 @@ class TestNVIDIAEvalImpl(unittest.TestCase):
         self._assert_request_body(
             {
                 "config": f"nvidia/{MOCK_BENCHMARK_ID}",
-                "target": {"type": "model", "model": benchmark_config.eval_candidate.model},
+                "target": {"type": "model", "model": "meta/llama-3.1-8b-instruct"},
             }
         )
 
diff --git a/tests/unit/providers/nvidia/test_parameters.py b/tests/unit/providers/nvidia/test_parameters.py
index cb1b92fba..ea12122a0 100644
--- a/tests/unit/providers/nvidia/test_parameters.py
+++ b/tests/unit/providers/nvidia/test_parameters.py
@@ -10,14 +10,17 @@ import warnings
 from unittest.mock import patch
 
 import pytest
-from llama_stack_client.types.algorithm_config_param import LoraFinetuningConfig
-from llama_stack_client.types.post_training_supervised_fine_tune_params import (
-    TrainingConfig,
-    TrainingConfigDataConfig,
-    TrainingConfigEfficiencyConfig,
-    TrainingConfigOptimizerConfig,
-)
 
+from llama_stack.apis.post_training.post_training import (
+    DataConfig,
+    DatasetFormat,
+    EfficiencyConfig,
+    LoraFinetuningConfig,
+    OptimizerConfig,
+    OptimizerType,
+    TrainingConfig,
+)
+from llama_stack.distribution.library_client import convert_pydantic_to_json_value
 from llama_stack.providers.remote.post_training.nvidia.post_training import (
     NvidiaPostTrainingAdapter,
     NvidiaPostTrainingConfig,
@@ -66,11 +69,8 @@ class TestNvidiaParameters(unittest.TestCase):
 
     def test_customizer_parameters_passed(self):
         """Test scenario 1: When an optional parameter is passed and value is correctly set."""
-        custom_adapter_dim = 32  # Different from default of 8
         algorithm_config = LoraFinetuningConfig(
             type="LoRA",
-            adapter_dim=custom_adapter_dim,
-            adapter_dropout=0.2,
             apply_lora_to_mlp=True,
             apply_lora_to_output=True,
             alpha=16,
@@ -78,8 +78,15 @@ class TestNvidiaParameters(unittest.TestCase):
             lora_attn_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
         )
 
-        data_config = TrainingConfigDataConfig(dataset_id="test-dataset", batch_size=16)
-        optimizer_config = TrainingConfigOptimizerConfig(lr=0.0002)
+        data_config = DataConfig(
+            dataset_id="test-dataset", batch_size=16, shuffle=False, data_format=DatasetFormat.instruct
+        )
+        optimizer_config = OptimizerConfig(
+            optimizer_type=OptimizerType.adam,
+            lr=0.0002,
+            weight_decay=0.01,
+            num_warmup_steps=100,
+        )
         training_config = TrainingConfig(
             n_epochs=3,
             data_config=data_config,
@@ -95,7 +102,7 @@ class TestNvidiaParameters(unittest.TestCase):
                     model="meta-llama/Llama-3.1-8B-Instruct",
                     checkpoint_dir="",
                     algorithm_config=algorithm_config,
-                    training_config=training_config,
+                    training_config=convert_pydantic_to_json_value(training_config),
                     logger_config={},
                     hyperparam_search_config={},
                 )
@@ -114,7 +121,7 @@ class TestNvidiaParameters(unittest.TestCase):
         self._assert_request_params(
             {
                 "hyperparameters": {
-                    "lora": {"adapter_dim": custom_adapter_dim, "adapter_dropout": 0.2, "alpha": 16},
+                    "lora": {"alpha": 16},
                     "epochs": 3,
                     "learning_rate": 0.0002,
                     "batch_size": 16,
@@ -130,8 +137,6 @@ class TestNvidiaParameters(unittest.TestCase):
 
         algorithm_config = LoraFinetuningConfig(
             type="LoRA",
-            adapter_dim=16,
-            adapter_dropout=0.1,
             apply_lora_to_mlp=True,
             apply_lora_to_output=True,
             alpha=16,
@@ -139,12 +144,16 @@ class TestNvidiaParameters(unittest.TestCase):
             lora_attn_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
         )
 
-        data_config = TrainingConfigDataConfig(
-            dataset_id=required_dataset_id,  # Required parameter
-            batch_size=8,
+        data_config = DataConfig(
+            dataset_id=required_dataset_id, batch_size=8, shuffle=False, data_format=DatasetFormat.instruct
         )
 
-        optimizer_config = TrainingConfigOptimizerConfig(lr=0.0001)
+        optimizer_config = OptimizerConfig(
+            optimizer_type=OptimizerType.adam,
+            lr=0.0001,
+            weight_decay=0.01,
+            num_warmup_steps=100,
+        )
 
         training_config = TrainingConfig(
             n_epochs=1,
@@ -161,7 +170,7 @@ class TestNvidiaParameters(unittest.TestCase):
                     model=required_model,  # Required parameter
                     checkpoint_dir="",
                     algorithm_config=algorithm_config,
-                    training_config=training_config,
+                    training_config=convert_pydantic_to_json_value(training_config),
                     logger_config={},
                     hyperparam_search_config={},
                 )
@@ -186,24 +195,24 @@ class TestNvidiaParameters(unittest.TestCase):
 
     def test_unsupported_parameters_warning(self):
         """Test that warnings are raised for unsupported parameters."""
-        data_config = TrainingConfigDataConfig(
+        data_config = DataConfig(
             dataset_id="test-dataset",
             batch_size=8,
             # Unsupported parameters
             shuffle=True,
-            data_format="instruct",
+            data_format=DatasetFormat.instruct,
             validation_dataset_id="val-dataset",
         )
 
-        optimizer_config = TrainingConfigOptimizerConfig(
+        optimizer_config = OptimizerConfig(
             lr=0.0001,
             weight_decay=0.01,
             # Unsupported parameters
-            optimizer_type="adam",
+            optimizer_type=OptimizerType.adam,
             num_warmup_steps=100,
         )
 
-        efficiency_config = TrainingConfigEfficiencyConfig(
+        efficiency_config = EfficiencyConfig(
             enable_activation_checkpointing=True  # Unsupported parameter
         )
 
@@ -230,15 +239,13 @@ class TestNvidiaParameters(unittest.TestCase):
                     checkpoint_dir="test-dir",  # Unsupported parameter
                     algorithm_config=LoraFinetuningConfig(
                         type="LoRA",
-                        adapter_dim=16,
-                        adapter_dropout=0.1,
                         apply_lora_to_mlp=True,
                         apply_lora_to_output=True,
                         alpha=16,
                         rank=16,
                         lora_attn_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
                     ),
-                    training_config=training_config,
+                    training_config=convert_pydantic_to_json_value(training_config),
                     logger_config={"test": "value"},  # Unsupported parameter
                     hyperparam_search_config={"test": "value"},  # Unsupported parameter
                 )
diff --git a/tests/unit/providers/nvidia/test_supervised_fine_tuning.py b/tests/unit/providers/nvidia/test_supervised_fine_tuning.py
index 94e189e3e..319011be3 100644
--- a/tests/unit/providers/nvidia/test_supervised_fine_tuning.py
+++ b/tests/unit/providers/nvidia/test_supervised_fine_tuning.py
@@ -10,14 +10,18 @@ import warnings
 from unittest.mock import patch
 
 import pytest
-from llama_stack_client.types.algorithm_config_param import LoraFinetuningConfig, QatFinetuningConfig
-from llama_stack_client.types.post_training_supervised_fine_tune_params import (
-    TrainingConfig,
-    TrainingConfigDataConfig,
-    TrainingConfigOptimizerConfig,
-)
 
 from llama_stack.apis.models import Model, ModelType
+from llama_stack.apis.post_training.post_training import (
+    DataConfig,
+    DatasetFormat,
+    LoraFinetuningConfig,
+    OptimizerConfig,
+    OptimizerType,
+    QATFinetuningConfig,
+    TrainingConfig,
+)
+from llama_stack.distribution.library_client import convert_pydantic_to_json_value
 from llama_stack.providers.remote.inference.nvidia.nvidia import NVIDIAConfig, NVIDIAInferenceAdapter
 from llama_stack.providers.remote.post_training.nvidia.post_training import (
     ListNvidiaPostTrainingJobs,
@@ -121,7 +125,7 @@ class TestNvidiaPostTraining(unittest.TestCase):
                 "batch_size": 16,
                 "epochs": 2,
                 "learning_rate": 0.0001,
-                "lora": {"adapter_dim": 16, "adapter_dropout": 0.1},
+                "lora": {"alpha": 16},
             },
             "output_model": "default/job-1234",
             "status": "created",
@@ -132,8 +136,6 @@ class TestNvidiaPostTraining(unittest.TestCase):
 
         algorithm_config = LoraFinetuningConfig(
             type="LoRA",
-            adapter_dim=16,
-            adapter_dropout=0.1,
             apply_lora_to_mlp=True,
             apply_lora_to_output=True,
             alpha=16,
@@ -141,10 +143,15 @@ class TestNvidiaPostTraining(unittest.TestCase):
             lora_attn_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
         )
 
-        data_config = TrainingConfigDataConfig(dataset_id="sample-basic-test", batch_size=16)
+        data_config = DataConfig(
+            dataset_id="sample-basic-test", batch_size=16, shuffle=False, data_format=DatasetFormat.instruct
+        )
 
-        optimizer_config = TrainingConfigOptimizerConfig(
+        optimizer_config = OptimizerConfig(
+            optimizer_type=OptimizerType.adam,
             lr=0.0001,
+            weight_decay=0.01,
+            num_warmup_steps=100,
         )
 
         training_config = TrainingConfig(
@@ -161,7 +168,7 @@ class TestNvidiaPostTraining(unittest.TestCase):
                     model="meta-llama/Llama-3.1-8B-Instruct",
                     checkpoint_dir="",
                     algorithm_config=algorithm_config,
-                    training_config=training_config,
+                    training_config=convert_pydantic_to_json_value(training_config),
                     logger_config={},
                     hyperparam_search_config={},
                 )
@@ -185,16 +192,22 @@ class TestNvidiaPostTraining(unittest.TestCase):
                     "epochs": 2,
                     "batch_size": 16,
                     "learning_rate": 0.0001,
-                    "lora": {"alpha": 16, "adapter_dim": 16, "adapter_dropout": 0.1},
+                    "weight_decay": 0.01,
+                    "lora": {"alpha": 16},
                 },
             },
         )
 
     def test_supervised_fine_tune_with_qat(self):
-        algorithm_config = QatFinetuningConfig(type="QAT", quantizer_name="quantizer_name", group_size=1)
-        data_config = TrainingConfigDataConfig(dataset_id="sample-basic-test", batch_size=16)
-        optimizer_config = TrainingConfigOptimizerConfig(
+        algorithm_config = QATFinetuningConfig(type="QAT", quantizer_name="quantizer_name", group_size=1)
+        data_config = DataConfig(
+            dataset_id="sample-basic-test", batch_size=16, shuffle=False, data_format=DatasetFormat.instruct
+        )
+        optimizer_config = OptimizerConfig(
+            optimizer_type=OptimizerType.adam,
             lr=0.0001,
+            weight_decay=0.01,
+            num_warmup_steps=100,
         )
         training_config = TrainingConfig(
             n_epochs=2,
@@ -209,42 +222,55 @@ class TestNvidiaPostTraining(unittest.TestCase):
                     model="meta-llama/Llama-3.1-8B-Instruct",
                     checkpoint_dir="",
                     algorithm_config=algorithm_config,
-                    training_config=training_config,
+                    training_config=convert_pydantic_to_json_value(training_config),
                     logger_config={},
                     hyperparam_search_config={},
                 )
             )
 
     def test_get_training_job_status(self):
-        self.mock_make_request.return_value = {
-            "created_at": "2024-12-09T04:06:28.580220",
-            "updated_at": "2024-12-09T04:21:19.852832",
-            "status": "completed",
-            "steps_completed": 1210,
-            "epochs_completed": 2,
-            "percentage_done": 100.0,
-            "best_epoch": 2,
-            "train_loss": 1.718016266822815,
-            "val_loss": 1.8661999702453613,
-        }
+        customizer_status_to_job_status = [
+            ("running", "in_progress"),
+            ("completed", "completed"),
+            ("failed", "failed"),
+            ("cancelled", "cancelled"),
+            ("pending", "scheduled"),
+            ("unknown", "scheduled"),
+        ]
 
-        job_id = "cust-JGTaMbJMdqjJU8WbQdN9Q2"
+        for customizer_status, expected_status in customizer_status_to_job_status:
+            with self.subTest(customizer_status=customizer_status, expected_status=expected_status):
+                self.mock_make_request.return_value = {
+                    "created_at": "2024-12-09T04:06:28.580220",
+                    "updated_at": "2024-12-09T04:21:19.852832",
+                    "status": customizer_status,
+                    "steps_completed": 1210,
+                    "epochs_completed": 2,
+                    "percentage_done": 100.0,
+                    "best_epoch": 2,
+                    "train_loss": 1.718016266822815,
+                    "val_loss": 1.8661999702453613,
+                }
 
-        status = self.run_async(self.adapter.get_training_job_status(job_uuid=job_id))
+                job_id = "cust-JGTaMbJMdqjJU8WbQdN9Q2"
 
-        assert isinstance(status, NvidiaPostTrainingJobStatusResponse)
-        assert status.status.value == "completed"
-        assert status.steps_completed == 1210
-        assert status.epochs_completed == 2
-        assert status.percentage_done == 100.0
-        assert status.best_epoch == 2
-        assert status.train_loss == 1.718016266822815
-        assert status.val_loss == 1.8661999702453613
+                status = self.run_async(self.adapter.get_training_job_status(job_uuid=job_id))
 
-        self.mock_make_request.assert_called_once()
-        self._assert_request(
-            self.mock_make_request, "GET", f"/v1/customization/jobs/{job_id}/status", expected_params={"job_id": job_id}
-        )
+                assert isinstance(status, NvidiaPostTrainingJobStatusResponse)
+                assert status.status.value == expected_status
+                assert status.steps_completed == 1210
+                assert status.epochs_completed == 2
+                assert status.percentage_done == 100.0
+                assert status.best_epoch == 2
+                assert status.train_loss == 1.718016266822815
+                assert status.val_loss == 1.8661999702453613
+
+                self._assert_request(
+                    self.mock_make_request,
+                    "GET",
+                    f"/v1/customization/jobs/{job_id}/status",
+                    expected_params={"job_id": job_id},
+                )
 
     def test_get_training_jobs(self):
         job_id = "cust-JGTaMbJMdqjJU8WbQdN9Q2"
diff --git a/tests/unit/providers/utils/inference/test_openai_compat.py b/tests/unit/providers/utils/inference/test_openai_compat.py
new file mode 100644
index 000000000..eb02f8203
--- /dev/null
+++ b/tests/unit/providers/utils/inference/test_openai_compat.py
@@ -0,0 +1,43 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import pytest
+
+from llama_stack.apis.common.content_types import TextContentItem
+from llama_stack.apis.inference.inference import CompletionMessage, UserMessage
+from llama_stack.models.llama.datatypes import StopReason, ToolCall
+from llama_stack.providers.utils.inference.openai_compat import convert_message_to_openai_dict
+
+
+@pytest.mark.asyncio
+async def test_convert_message_to_openai_dict():
+    message = UserMessage(content=[TextContentItem(text="Hello, world!")], role="user")
+    assert await convert_message_to_openai_dict(message) == {
+        "role": "user",
+        "content": [{"type": "text", "text": "Hello, world!"}],
+    }
+
+
+# Test convert_message_to_openai_dict with a tool call
+@pytest.mark.asyncio
+async def test_convert_message_to_openai_dict_with_tool_call():
+    message = CompletionMessage(
+        content="",
+        tool_calls=[
+            ToolCall(call_id="123", tool_name="test_tool", arguments_json='{"foo": "bar"}', arguments={"foo": "bar"})
+        ],
+        stop_reason=StopReason.end_of_turn,
+    )
+
+    openai_dict = await convert_message_to_openai_dict(message)
+
+    assert openai_dict == {
+        "role": "assistant",
+        "content": [{"type": "text", "text": ""}],
+        "tool_calls": [
+            {"id": "123", "type": "function", "function": {"name": "test_tool", "arguments": '{"foo": "bar"}'}}
+        ],
+    }
diff --git a/tests/unit/server/test_sse.py b/tests/unit/server/test_sse.py
new file mode 100644
index 000000000..c78122294
--- /dev/null
+++ b/tests/unit/server/test_sse.py
@@ -0,0 +1,91 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import asyncio
+
+import pytest
+
+from llama_stack.distribution.server.server import create_sse_event, sse_generator
+
+
+@pytest.mark.asyncio
+async def test_sse_generator_basic():
+    # An AsyncIterator wrapped in an Awaitable, just like our web methods
+    async def async_event_gen():
+        async def event_gen():
+            yield "Test event 1"
+            yield "Test event 2"
+
+        return event_gen()
+
+    sse_gen = sse_generator(async_event_gen())
+    assert sse_gen is not None
+
+    # Test that the events are streamed correctly
+    seen_events = []
+    async for event in sse_gen:
+        seen_events.append(event)
+    assert len(seen_events) == 2
+    assert seen_events[0] == create_sse_event("Test event 1")
+    assert seen_events[1] == create_sse_event("Test event 2")
+
+
+@pytest.mark.asyncio
+async def test_sse_generator_client_disconnected():
+    # An AsyncIterator wrapped in an Awaitable, just like our web methods
+    async def async_event_gen():
+        async def event_gen():
+            yield "Test event 1"
+            # Simulate a client disconnect before emitting event 2
+            raise asyncio.CancelledError()
+
+        return event_gen()
+
+    sse_gen = sse_generator(async_event_gen())
+    assert sse_gen is not None
+
+    seen_events = []
+    async for event in sse_gen:
+        seen_events.append(event)
+
+    # We should see 1 event before the client disconnected
+    assert len(seen_events) == 1
+    assert seen_events[0] == create_sse_event("Test event 1")
+
+
+@pytest.mark.asyncio
+async def test_sse_generator_client_disconnected_before_response_starts():
+    # Disconnect before the response starts
+    async def async_event_gen():
+        raise asyncio.CancelledError()
+
+    sse_gen = sse_generator(async_event_gen())
+    assert sse_gen is not None
+
+    seen_events = []
+    async for event in sse_gen:
+        seen_events.append(event)
+
+    # No events should be seen since the client disconnected immediately
+    assert len(seen_events) == 0
+
+
+@pytest.mark.asyncio
+async def test_sse_generator_error_before_response_starts():
+    # Raise an error before the response starts
+    async def async_event_gen():
+        raise Exception("Test error")
+
+    sse_gen = sse_generator(async_event_gen())
+    assert sse_gen is not None
+
+    seen_events = []
+    async for event in sse_gen:
+        seen_events.append(event)
+
+    # We should have 1 error event
+    assert len(seen_events) == 1
+    assert 'data: {"error":' in seen_events[0]
diff --git a/tests/verifications/README.md b/tests/verifications/README.md
index 986ff1087..88762e0ba 100644
--- a/tests/verifications/README.md
+++ b/tests/verifications/README.md
@@ -8,29 +8,44 @@ This framework allows you to run the same set of verification tests against diff
 
 ## Features
 
-The verification suite currently tests:
+The verification suite currently tests the following in both streaming and non-streaming modes:
 
-- Basic chat completions (streaming and non-streaming)
+- Basic chat completions
 - Image input capabilities
 - Structured JSON output formatting
 - Tool calling functionality
 
+## Report
+
+The lastest report can be found at [REPORT.md](REPORT.md).
+
+To update the report, ensure you have the API keys set,
+```bash
+export OPENAI_API_KEY=<your_openai_api_key>
+export FIREWORKS_API_KEY=<your_fireworks_api_key>
+export TOGETHER_API_KEY=<your_together_api_key>
+```
+then run
+```bash
+uv run --with-editable ".[dev]" python tests/verifications/generate_report.py --run-tests
+```
+
 ## Running Tests
 
 To run the verification tests, use pytest with the following parameters:
 
 ```bash
 cd llama-stack
-pytest tests/verifications/openai --provider=<provider-name>
+pytest tests/verifications/openai_api --provider=<provider-name>
 ```
 
 Example:
 ```bash
 # Run all tests
-pytest tests/verifications/openai --provider=together
+pytest tests/verifications/openai_api --provider=together
 
 # Only run tests with Llama 4 models
-pytest tests/verifications/openai --provider=together -k 'Llama-4'
+pytest tests/verifications/openai_api --provider=together -k 'Llama-4'
 ```
 
 ### Parameters
@@ -41,23 +56,22 @@ pytest tests/verifications/openai --provider=together -k 'Llama-4'
 
 ## Supported Providers
 
-The verification suite currently supports:
-- OpenAI
-- Fireworks
-- Together
-- Groq
-- Cerebras
+The verification suite supports any provider with an OpenAI compatible endpoint.
+
+See `tests/verifications/conf/` for the list of supported providers.
+
+To run on a new provider, simply add a new yaml file to the `conf/` directory with the provider config. See `tests/verifications/conf/together.yaml` for an example.
 
 ## Adding New Test Cases
 
-To add new test cases, create appropriate JSON files in the `openai/fixtures/test_cases/` directory following the existing patterns.
+To add new test cases, create appropriate JSON files in the `openai_api/fixtures/test_cases/` directory following the existing patterns.
 
 
 ## Structure
 
 - `__init__.py` - Marks the directory as a Python package
-- `conftest.py` - Global pytest configuration and fixtures
-- `openai/` - Tests specific to OpenAI-compatible APIs
+- `conf/` - Provider-specific configuration files
+- `openai_api/` - Tests specific to OpenAI-compatible APIs
   - `fixtures/` - Test fixtures and utilities
     - `fixtures.py` - Provider-specific fixtures
     - `load.py` - Utilities for loading test cases
diff --git a/tests/verifications/REPORT.md b/tests/verifications/REPORT.md
index 2dd0af41b..2a700fa9c 100644
--- a/tests/verifications/REPORT.md
+++ b/tests/verifications/REPORT.md
@@ -1,6 +1,6 @@
 # Test Results Report
 
-*Generated on: 2025-04-14 18:11:37*
+*Generated on: 2025-04-17 12:42:33*
 
 *This report was generated by running `python tests/verifications/generate_report.py`*
 
@@ -15,22 +15,74 @@
 
 | Provider | Pass Rate | Tests Passed | Total Tests |
 | --- | --- | --- | --- |
-| Together | 48.7% | 37 | 76 |
-| Fireworks | 47.4% | 36 | 76 |
-| Openai | 100.0% | 52 | 52 |
+| Meta_reference | 100.0% | 28 | 28 |
+| Together | 50.0% | 40 | 80 |
+| Fireworks | 50.0% | 40 | 80 |
+| Openai | 100.0% | 56 | 56 |
 
 
 
+## Meta_reference
+
+*Tests run on: 2025-04-17 12:37:11*
+
+```bash
+# Run all tests for this provider:
+pytest tests/verifications/openai_api/test_chat_completion.py --provider=meta_reference -v
+
+# Example: Run only the 'stream=False' case of test_chat_multi_turn_multiple_images:
+pytest tests/verifications/openai_api/test_chat_completion.py --provider=meta_reference -k "test_chat_multi_turn_multiple_images and stream=False"
+```
+
+
+**Model Key (Meta_reference)**
+
+| Display Name | Full Model ID |
+| --- | --- |
+| Llama-4-Scout-Instruct | `meta-llama/Llama-4-Scout-17B-16E-Instruct` |
+
+
+| Test | Llama-4-Scout-Instruct |
+| --- | --- |
+| test_chat_multi_turn_multiple_images (stream=False) | ✅ |
+| test_chat_multi_turn_multiple_images (stream=True) | ✅ |
+| test_chat_non_streaming_basic (earth) | ✅ |
+| test_chat_non_streaming_basic (saturn) | ✅ |
+| test_chat_non_streaming_image | ✅ |
+| test_chat_non_streaming_multi_turn_tool_calling (add_product_tool) | ✅ |
+| test_chat_non_streaming_multi_turn_tool_calling (compare_monthly_expense_tool) | ✅ |
+| test_chat_non_streaming_multi_turn_tool_calling (get_then_create_event_tool) | ✅ |
+| test_chat_non_streaming_multi_turn_tool_calling (text_then_weather_tool) | ✅ |
+| test_chat_non_streaming_multi_turn_tool_calling (weather_tool_then_text) | ✅ |
+| test_chat_non_streaming_structured_output (calendar) | ✅ |
+| test_chat_non_streaming_structured_output (math) | ✅ |
+| test_chat_non_streaming_tool_calling | ✅ |
+| test_chat_non_streaming_tool_choice_none | ✅ |
+| test_chat_non_streaming_tool_choice_required | ✅ |
+| test_chat_streaming_basic (earth) | ✅ |
+| test_chat_streaming_basic (saturn) | ✅ |
+| test_chat_streaming_image | ✅ |
+| test_chat_streaming_multi_turn_tool_calling (add_product_tool) | ✅ |
+| test_chat_streaming_multi_turn_tool_calling (compare_monthly_expense_tool) | ✅ |
+| test_chat_streaming_multi_turn_tool_calling (get_then_create_event_tool) | ✅ |
+| test_chat_streaming_multi_turn_tool_calling (text_then_weather_tool) | ✅ |
+| test_chat_streaming_multi_turn_tool_calling (weather_tool_then_text) | ✅ |
+| test_chat_streaming_structured_output (calendar) | ✅ |
+| test_chat_streaming_structured_output (math) | ✅ |
+| test_chat_streaming_tool_calling | ✅ |
+| test_chat_streaming_tool_choice_none | ✅ |
+| test_chat_streaming_tool_choice_required | ✅ |
+
 ## Together
 
-*Tests run on: 2025-04-14 18:08:14*
+*Tests run on: 2025-04-17 12:27:45*
 
 ```bash
 # Run all tests for this provider:
 pytest tests/verifications/openai_api/test_chat_completion.py --provider=together -v
 
-# Example: Run only the 'earth' case of test_chat_non_streaming_basic:
-pytest tests/verifications/openai_api/test_chat_completion.py --provider=together -k "test_chat_non_streaming_basic and earth"
+# Example: Run only the 'stream=False' case of test_chat_multi_turn_multiple_images:
+pytest tests/verifications/openai_api/test_chat_completion.py --provider=together -k "test_chat_multi_turn_multiple_images and stream=False"
 ```
 
 
@@ -45,11 +97,13 @@ pytest tests/verifications/openai_api/test_chat_completion.py --provider=togethe
 
 | Test | Llama-3.3-70B-Instruct | Llama-4-Maverick-Instruct | Llama-4-Scout-Instruct |
 | --- | --- | --- | --- |
+| test_chat_multi_turn_multiple_images (stream=False) | ⚪ | ✅ | ✅ |
+| test_chat_multi_turn_multiple_images (stream=True) | ⚪ | ❌ | ❌ |
 | test_chat_non_streaming_basic (earth) | ✅ | ✅ | ✅ |
 | test_chat_non_streaming_basic (saturn) | ✅ | ✅ | ✅ |
 | test_chat_non_streaming_image | ⚪ | ✅ | ✅ |
 | test_chat_non_streaming_multi_turn_tool_calling (add_product_tool) | ✅ | ✅ | ✅ |
-| test_chat_non_streaming_multi_turn_tool_calling (compare_monthly_expense_tool) | ❌ | ✅ | ✅ |
+| test_chat_non_streaming_multi_turn_tool_calling (compare_monthly_expense_tool) | ✅ | ✅ | ✅ |
 | test_chat_non_streaming_multi_turn_tool_calling (get_then_create_event_tool) | ✅ | ❌ | ✅ |
 | test_chat_non_streaming_multi_turn_tool_calling (text_then_weather_tool) | ❌ | ❌ | ❌ |
 | test_chat_non_streaming_multi_turn_tool_calling (weather_tool_then_text) | ✅ | ✅ | ✅ |
@@ -74,14 +128,14 @@ pytest tests/verifications/openai_api/test_chat_completion.py --provider=togethe
 
 ## Fireworks
 
-*Tests run on: 2025-04-14 18:04:06*
+*Tests run on: 2025-04-17 12:29:53*
 
 ```bash
 # Run all tests for this provider:
 pytest tests/verifications/openai_api/test_chat_completion.py --provider=fireworks -v
 
-# Example: Run only the 'earth' case of test_chat_non_streaming_basic:
-pytest tests/verifications/openai_api/test_chat_completion.py --provider=fireworks -k "test_chat_non_streaming_basic and earth"
+# Example: Run only the 'stream=False' case of test_chat_multi_turn_multiple_images:
+pytest tests/verifications/openai_api/test_chat_completion.py --provider=fireworks -k "test_chat_multi_turn_multiple_images and stream=False"
 ```
 
 
@@ -96,6 +150,8 @@ pytest tests/verifications/openai_api/test_chat_completion.py --provider=firewor
 
 | Test | Llama-3.3-70B-Instruct | Llama-4-Maverick-Instruct | Llama-4-Scout-Instruct |
 | --- | --- | --- | --- |
+| test_chat_multi_turn_multiple_images (stream=False) | ⚪ | ✅ | ✅ |
+| test_chat_multi_turn_multiple_images (stream=True) | ⚪ | ✅ | ✅ |
 | test_chat_non_streaming_basic (earth) | ✅ | ✅ | ✅ |
 | test_chat_non_streaming_basic (saturn) | ✅ | ✅ | ✅ |
 | test_chat_non_streaming_image | ⚪ | ✅ | ✅ |
@@ -125,14 +181,14 @@ pytest tests/verifications/openai_api/test_chat_completion.py --provider=firewor
 
 ## Openai
 
-*Tests run on: 2025-04-14 18:09:51*
+*Tests run on: 2025-04-17 12:34:08*
 
 ```bash
 # Run all tests for this provider:
 pytest tests/verifications/openai_api/test_chat_completion.py --provider=openai -v
 
-# Example: Run only the 'earth' case of test_chat_non_streaming_basic:
-pytest tests/verifications/openai_api/test_chat_completion.py --provider=openai -k "test_chat_non_streaming_basic and earth"
+# Example: Run only the 'stream=False' case of test_chat_multi_turn_multiple_images:
+pytest tests/verifications/openai_api/test_chat_completion.py --provider=openai -k "test_chat_multi_turn_multiple_images and stream=False"
 ```
 
 
@@ -146,6 +202,8 @@ pytest tests/verifications/openai_api/test_chat_completion.py --provider=openai
 
 | Test | gpt-4o | gpt-4o-mini |
 | --- | --- | --- |
+| test_chat_multi_turn_multiple_images (stream=False) | ✅ | ✅ |
+| test_chat_multi_turn_multiple_images (stream=True) | ✅ | ✅ |
 | test_chat_non_streaming_basic (earth) | ✅ | ✅ |
 | test_chat_non_streaming_basic (saturn) | ✅ | ✅ |
 | test_chat_non_streaming_image | ✅ | ✅ |
diff --git a/tests/verifications/conf/cerebras.yaml b/tests/verifications/conf/cerebras.yaml
index 5b19b4916..37fc713d6 100644
--- a/tests/verifications/conf/cerebras.yaml
+++ b/tests/verifications/conf/cerebras.yaml
@@ -8,3 +8,4 @@ test_exclusions:
   llama-3.3-70b:
   - test_chat_non_streaming_image
   - test_chat_streaming_image
+  - test_chat_multi_turn_multiple_images
diff --git a/tests/verifications/conf/fireworks-llama-stack.yaml b/tests/verifications/conf/fireworks-llama-stack.yaml
index d91443dd9..fc78a1377 100644
--- a/tests/verifications/conf/fireworks-llama-stack.yaml
+++ b/tests/verifications/conf/fireworks-llama-stack.yaml
@@ -12,3 +12,4 @@ test_exclusions:
   fireworks/llama-v3p3-70b-instruct:
   - test_chat_non_streaming_image
   - test_chat_streaming_image
+  - test_chat_multi_turn_multiple_images
diff --git a/tests/verifications/conf/fireworks.yaml b/tests/verifications/conf/fireworks.yaml
index f55b707ba..9bb21f706 100644
--- a/tests/verifications/conf/fireworks.yaml
+++ b/tests/verifications/conf/fireworks.yaml
@@ -12,3 +12,4 @@ test_exclusions:
   accounts/fireworks/models/llama-v3p3-70b-instruct:
   - test_chat_non_streaming_image
   - test_chat_streaming_image
+  - test_chat_multi_turn_multiple_images
diff --git a/tests/verifications/conf/groq-llama-stack.yaml b/tests/verifications/conf/groq-llama-stack.yaml
index fd5e9abec..6958bafc5 100644
--- a/tests/verifications/conf/groq-llama-stack.yaml
+++ b/tests/verifications/conf/groq-llama-stack.yaml
@@ -12,3 +12,4 @@ test_exclusions:
   groq/llama-3.3-70b-versatile:
   - test_chat_non_streaming_image
   - test_chat_streaming_image
+  - test_chat_multi_turn_multiple_images
diff --git a/tests/verifications/conf/groq.yaml b/tests/verifications/conf/groq.yaml
index 76b1244ae..bc3de58e9 100644
--- a/tests/verifications/conf/groq.yaml
+++ b/tests/verifications/conf/groq.yaml
@@ -12,3 +12,4 @@ test_exclusions:
   llama-3.3-70b-versatile:
   - test_chat_non_streaming_image
   - test_chat_streaming_image
+  - test_chat_multi_turn_multiple_images
diff --git a/tests/verifications/conf/meta_reference.yaml b/tests/verifications/conf/meta_reference.yaml
new file mode 100644
index 000000000..fb2680fe0
--- /dev/null
+++ b/tests/verifications/conf/meta_reference.yaml
@@ -0,0 +1,8 @@
+# LLAMA_STACK_PORT=5002 llama stack run meta-reference-gpu --env INFERENCE_MODEL=meta-llama/Llama-4-Scout-17B-16E-Instruct --env INFERENCE_CHECKPOINT_DIR=<path_to_ckpt>
+base_url: http://localhost:5002/v1/openai/v1
+api_key_var: foo
+models:
+- meta-llama/Llama-4-Scout-17B-16E-Instruct
+model_display_names:
+  meta-llama/Llama-4-Scout-17B-16E-Instruct: Llama-4-Scout-Instruct
+test_exclusions: {}
diff --git a/tests/verifications/conf/together-llama-stack.yaml b/tests/verifications/conf/together-llama-stack.yaml
index e49d82604..719e2d776 100644
--- a/tests/verifications/conf/together-llama-stack.yaml
+++ b/tests/verifications/conf/together-llama-stack.yaml
@@ -12,3 +12,4 @@ test_exclusions:
   together/meta-llama/Llama-3.3-70B-Instruct-Turbo:
   - test_chat_non_streaming_image
   - test_chat_streaming_image
+  - test_chat_multi_turn_multiple_images
diff --git a/tests/verifications/conf/together.yaml b/tests/verifications/conf/together.yaml
index 258616662..e8fb62ab9 100644
--- a/tests/verifications/conf/together.yaml
+++ b/tests/verifications/conf/together.yaml
@@ -12,3 +12,4 @@ test_exclusions:
   meta-llama/Llama-3.3-70B-Instruct-Turbo:
   - test_chat_non_streaming_image
   - test_chat_streaming_image
+  - test_chat_multi_turn_multiple_images
diff --git a/tests/verifications/generate_report.py b/tests/verifications/generate_report.py
index b39c3fd19..f0894bfce 100755
--- a/tests/verifications/generate_report.py
+++ b/tests/verifications/generate_report.py
@@ -3,14 +3,6 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-
-# /// script
-# requires-python = ">=3.10"
-# dependencies = [
-#     "pytest-json-report",
-#     "pyyaml",
-# ]
-# ///
 """
 Test Report Generator
 
@@ -67,16 +59,11 @@ RESULTS_DIR.mkdir(exist_ok=True)
 # Maximum number of test result files to keep per provider
 MAX_RESULTS_PER_PROVIDER = 1
 
-PROVIDER_ORDER = [
+DEFAULT_PROVIDERS = [
+    "meta_reference",
     "together",
     "fireworks",
-    "groq",
-    "cerebras",
     "openai",
-    "together-llama-stack",
-    "fireworks-llama-stack",
-    "groq-llama-stack",
-    "openai-llama-stack",
 ]
 
 VERIFICATION_CONFIG = _load_all_verification_configs()
@@ -142,6 +129,14 @@ def run_tests(provider, keyword=None):
         return None
 
 
+def run_multiple_tests(providers_to_run: list[str], keyword: str | None):
+    """Runs tests for a list of providers."""
+    print(f"Running tests for providers: {', '.join(providers_to_run)}")
+    for provider in providers_to_run:
+        run_tests(provider.strip(), keyword=keyword)
+    print("Finished running tests.")
+
+
 def parse_results(
     result_file,
 ) -> Tuple[DefaultDict[str, DefaultDict[str, Dict[str, bool]]], DefaultDict[str, Set[str]], Set[str], str]:
@@ -250,20 +245,6 @@ def parse_results(
     return parsed_results, providers_in_file, tests_in_file, run_timestamp_str
 
 
-def get_all_result_files_by_provider():
-    """Get all test result files, keyed by provider."""
-    provider_results = {}
-
-    result_files = list(RESULTS_DIR.glob("*.json"))
-
-    for file in result_files:
-        provider = file.stem
-        if provider:
-            provider_results[provider] = file
-
-    return provider_results
-
-
 def generate_report(
     results_dict: Dict[str, Any],
     providers: Dict[str, Set[str]],
@@ -276,6 +257,7 @@ def generate_report(
     Args:
         results_dict: Aggregated results [provider][model][test_name] -> status.
         providers: Dict of all providers and their models {provider: {models}}.
+                   The order of keys in this dict determines the report order.
         all_tests: Set of all test names found.
         provider_timestamps: Dict of provider to timestamp when tests were run
         output_file: Optional path to save the report.
@@ -353,22 +335,17 @@ def generate_report(
                                 passed_tests += 1
         provider_totals[provider] = (provider_passed, provider_total)
 
-    # Add summary table (use passed-in providers dict)
+    # Add summary table (use the order from the providers dict keys)
     report.append("| Provider | Pass Rate | Tests Passed | Total Tests |")
     report.append("| --- | --- | --- | --- |")
-    for provider in [p for p in PROVIDER_ORDER if p in providers]:  # Check against keys of passed-in dict
-        passed, total = provider_totals.get(provider, (0, 0))
-        pass_rate = f"{(passed / total * 100):.1f}%" if total > 0 else "N/A"
-        report.append(f"| {provider.capitalize()} | {pass_rate} | {passed} | {total} |")
-    for provider in [p for p in providers if p not in PROVIDER_ORDER]:  # Check against keys of passed-in dict
+    # Iterate through providers in the order they appear in the input dict
+    for provider in providers_sorted.keys():
         passed, total = provider_totals.get(provider, (0, 0))
         pass_rate = f"{(passed / total * 100):.1f}%" if total > 0 else "N/A"
         report.append(f"| {provider.capitalize()} | {pass_rate} | {passed} | {total} |")
     report.append("\n")
 
-    for provider in sorted(
-        providers_sorted.keys(), key=lambda p: (PROVIDER_ORDER.index(p) if p in PROVIDER_ORDER else float("inf"), p)
-    ):
+    for provider in providers_sorted.keys():
         provider_models = providers_sorted[provider]  # Use sorted models
         if not provider_models:
             continue
@@ -461,60 +438,62 @@ def main():
         "--providers",
         type=str,
         nargs="+",
-        help="Specify providers to test (comma-separated or space-separated, default: all)",
+        help="Specify providers to include/test (comma-separated or space-separated, default: uses DEFAULT_PROVIDERS)",
     )
     parser.add_argument("--output", type=str, help="Output file location (default: tests/verifications/REPORT.md)")
     parser.add_argument("--k", type=str, help="Keyword expression to filter tests (passed to pytest -k)")
     args = parser.parse_args()
 
     all_results = {}
-    # Initialize collections to aggregate results in main
-    aggregated_providers = defaultdict(set)
+    final_providers_order = {}  # Dictionary to store results, preserving processing order
     aggregated_tests = set()
     provider_timestamps = {}
 
-    if args.run_tests:
-        # Get list of available providers from command line or use detected providers
-        if args.providers:
-            # Handle both comma-separated and space-separated lists
-            test_providers = []
-            for provider_arg in args.providers:
-                # Split by comma if commas are present
-                if "," in provider_arg:
-                    test_providers.extend(provider_arg.split(","))
-                else:
-                    test_providers.append(provider_arg)
-        else:
-            # Default providers to test
-            test_providers = PROVIDER_ORDER
-
-        for provider in test_providers:
-            provider = provider.strip()  # Remove any whitespace
-            result_file = run_tests(provider, keyword=args.k)
-            if result_file:
-                # Parse and aggregate results
-                parsed_results, providers_in_file, tests_in_file, run_timestamp = parse_results(result_file)
-                all_results.update(parsed_results)
-                for prov, models in providers_in_file.items():
-                    aggregated_providers[prov].update(models)
-                    if run_timestamp:
-                        provider_timestamps[prov] = run_timestamp
-                aggregated_tests.update(tests_in_file)
+    # 1. Determine the desired list and order of providers
+    if args.providers:
+        desired_providers = []
+        for provider_arg in args.providers:
+            desired_providers.extend([p.strip() for p in provider_arg.split(",")])
     else:
-        # Use existing results
-        provider_result_files = get_all_result_files_by_provider()
+        desired_providers = DEFAULT_PROVIDERS  # Use default order/list
 
-        for result_file in provider_result_files.values():
-            # Parse and aggregate results
-            parsed_results, providers_in_file, tests_in_file, run_timestamp = parse_results(result_file)
-            all_results.update(parsed_results)
-            for prov, models in providers_in_file.items():
-                aggregated_providers[prov].update(models)
-                if run_timestamp:
-                    provider_timestamps[prov] = run_timestamp
-            aggregated_tests.update(tests_in_file)
+    # 2. Run tests if requested (using the desired provider list)
+    if args.run_tests:
+        run_multiple_tests(desired_providers, args.k)
 
-    generate_report(all_results, aggregated_providers, aggregated_tests, provider_timestamps, args.output)
+    for provider in desired_providers:
+        # Construct the expected result file path directly
+        result_file = RESULTS_DIR / f"{provider}.json"
+
+        if result_file.exists():  # Check if the specific file exists
+            print(f"Loading results for {provider} from {result_file}")
+            try:
+                parsed_data = parse_results(result_file)
+                parsed_results, providers_in_file, tests_in_file, run_timestamp = parsed_data
+                all_results.update(parsed_results)
+                aggregated_tests.update(tests_in_file)
+
+                # Add models for this provider, ensuring it's added in the correct report order
+                if provider in providers_in_file:
+                    if provider not in final_providers_order:
+                        final_providers_order[provider] = set()
+                    final_providers_order[provider].update(providers_in_file[provider])
+                    if run_timestamp != "Unknown":
+                        provider_timestamps[provider] = run_timestamp
+                else:
+                    print(
+                        f"Warning: Provider '{provider}' found in desired list but not within its result file data ({result_file})."
+                    )
+
+            except Exception as e:
+                print(f"Error parsing results for provider {provider} from {result_file}: {e}")
+        else:
+            # Only print warning if we expected results (i.e., provider was in the desired list)
+            print(f"Result file for desired provider '{provider}' not found at {result_file}. Skipping.")
+
+    # 5. Generate the report using the filtered & ordered results
+    print(f"Final Provider Order for Report: {list(final_providers_order.keys())}")
+    generate_report(all_results, final_providers_order, aggregated_tests, provider_timestamps, args.output)
 
 
 if __name__ == "__main__":
diff --git a/tests/verifications/openai_api/fixtures/images/vision_test_1.jpg b/tests/verifications/openai_api/fixtures/images/vision_test_1.jpg
new file mode 100644
index 000000000..32fd0c0e3
Binary files /dev/null and b/tests/verifications/openai_api/fixtures/images/vision_test_1.jpg differ
diff --git a/tests/verifications/openai_api/fixtures/images/vision_test_2.jpg b/tests/verifications/openai_api/fixtures/images/vision_test_2.jpg
new file mode 100644
index 000000000..f9c28e3d5
Binary files /dev/null and b/tests/verifications/openai_api/fixtures/images/vision_test_2.jpg differ
diff --git a/tests/verifications/openai_api/fixtures/images/vision_test_3.jpg b/tests/verifications/openai_api/fixtures/images/vision_test_3.jpg
new file mode 100644
index 000000000..63165ea86
Binary files /dev/null and b/tests/verifications/openai_api/fixtures/images/vision_test_3.jpg differ
diff --git a/tests/verifications/openai_api/fixtures/test_cases/chat_completion.yaml b/tests/verifications/openai_api/fixtures/test_cases/chat_completion.yaml
index 1ace76e34..0c9f1fe9e 100644
--- a/tests/verifications/openai_api/fixtures/test_cases/chat_completion.yaml
+++ b/tests/verifications/openai_api/fixtures/test_cases/chat_completion.yaml
@@ -15,6 +15,52 @@ test_chat_basic:
             S?
           role: user
       output: Saturn
+test_chat_input_validation:
+  test_name: test_chat_input_validation
+  test_params:
+    case:
+    - case_id: "messages_missing"
+      input:
+        messages: []
+      output:
+        error:
+          status_code: 400
+    - case_id: "messages_role_invalid"
+      input:
+        messages:
+        - content: Which planet do humans live on?
+          role: fake_role
+      output:
+        error:
+          status_code: 400
+    - case_id: "tool_choice_invalid"
+      input:
+        messages:
+        - content: Which planet do humans live on?
+          role: user
+        tool_choice: invalid
+      output:
+        error:
+          status_code: 400
+    - case_id: "tool_choice_no_tools"
+      input:
+        messages:
+        - content: Which planet do humans live on?
+          role: user
+        tool_choice: required
+      output:
+        error:
+          status_code: 400
+    - case_id: "tools_type_invalid"
+      input:
+        messages:
+        - content: Which planet do humans live on?
+          role: user
+        tools:
+        - type: invalid
+      output:
+        error:
+          status_code: 400
 test_chat_image:
   test_name: test_chat_image
   test_params:
diff --git a/tests/verifications/openai_api/test_chat_completion.py b/tests/verifications/openai_api/test_chat_completion.py
index 62a223afb..277eaafa3 100644
--- a/tests/verifications/openai_api/test_chat_completion.py
+++ b/tests/verifications/openai_api/test_chat_completion.py
@@ -4,19 +4,26 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+import base64
 import copy
 import json
 import re
+from pathlib import Path
 from typing import Any
 
 import pytest
+from openai import APIError
 from pydantic import BaseModel
 
-from tests.verifications.openai_api.fixtures.fixtures import _load_all_verification_configs
+from tests.verifications.openai_api.fixtures.fixtures import (
+    _load_all_verification_configs,
+)
 from tests.verifications.openai_api.fixtures.load import load_test_cases
 
 chat_completion_test_cases = load_test_cases("chat_completion")
 
+THIS_DIR = Path(__file__).parent
+
 
 def case_id_generator(case):
     """Generate a test ID from the case's 'case_id' field, or use a default."""
@@ -69,6 +76,21 @@ def get_base_test_name(request):
     return request.node.originalname
 
 
+@pytest.fixture
+def multi_image_data():
+    files = [
+        THIS_DIR / "fixtures/images/vision_test_1.jpg",
+        THIS_DIR / "fixtures/images/vision_test_2.jpg",
+        THIS_DIR / "fixtures/images/vision_test_3.jpg",
+    ]
+    encoded_files = []
+    for file in files:
+        with open(file, "rb") as image_file:
+            base64_data = base64.b64encode(image_file.read()).decode("utf-8")
+            encoded_files.append(f"data:image/jpeg;base64,{base64_data}")
+    return encoded_files
+
+
 # --- Test Functions ---
 
 
@@ -115,6 +137,50 @@ def test_chat_streaming_basic(request, openai_client, model, provider, verificat
     assert case["output"].lower() in content.lower()
 
 
+@pytest.mark.parametrize(
+    "case",
+    chat_completion_test_cases["test_chat_input_validation"]["test_params"]["case"],
+    ids=case_id_generator,
+)
+def test_chat_non_streaming_error_handling(request, openai_client, model, provider, verification_config, case):
+    test_name_base = get_base_test_name(request)
+    if should_skip_test(verification_config, provider, model, test_name_base):
+        pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
+
+    with pytest.raises(APIError) as e:
+        openai_client.chat.completions.create(
+            model=model,
+            messages=case["input"]["messages"],
+            stream=False,
+            tool_choice=case["input"]["tool_choice"] if "tool_choice" in case["input"] else None,
+            tools=case["input"]["tools"] if "tools" in case["input"] else None,
+        )
+    assert case["output"]["error"]["status_code"] == e.value.status_code
+
+
+@pytest.mark.parametrize(
+    "case",
+    chat_completion_test_cases["test_chat_input_validation"]["test_params"]["case"],
+    ids=case_id_generator,
+)
+def test_chat_streaming_error_handling(request, openai_client, model, provider, verification_config, case):
+    test_name_base = get_base_test_name(request)
+    if should_skip_test(verification_config, provider, model, test_name_base):
+        pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
+
+    with pytest.raises(APIError) as e:
+        response = openai_client.chat.completions.create(
+            model=model,
+            messages=case["input"]["messages"],
+            stream=True,
+            tool_choice=case["input"]["tool_choice"] if "tool_choice" in case["input"] else None,
+            tools=case["input"]["tools"] if "tools" in case["input"] else None,
+        )
+        for _chunk in response:
+            pass
+    assert str(case["output"]["error"]["status_code"]) in e.value.message
+
+
 @pytest.mark.parametrize(
     "case",
     chat_completion_test_cases["test_chat_image"]["test_params"]["case"],
@@ -272,7 +338,6 @@ def test_chat_non_streaming_tool_choice_required(request, openai_client, model,
         tool_choice="required",  # Force tool call
         stream=False,
     )
-    print(response)
 
     assert response.choices[0].message.role == "assistant"
     assert len(response.choices[0].message.tool_calls) > 0, "Expected tool call when tool_choice='required'"
@@ -532,6 +597,86 @@ def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, p
             )
 
 
+@pytest.mark.parametrize("stream", [False, True], ids=["stream=False", "stream=True"])
+def test_chat_multi_turn_multiple_images(
+    request, openai_client, model, provider, verification_config, multi_image_data, stream
+):
+    test_name_base = get_base_test_name(request)
+    if should_skip_test(verification_config, provider, model, test_name_base):
+        pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
+
+    messages_turn1 = [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": multi_image_data[0],
+                    },
+                },
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": multi_image_data[1],
+                    },
+                },
+                {
+                    "type": "text",
+                    "text": "What furniture is in the first image that is not in the second image?",
+                },
+            ],
+        },
+    ]
+
+    # First API call
+    response1 = openai_client.chat.completions.create(
+        model=model,
+        messages=messages_turn1,
+        stream=stream,
+    )
+    if stream:
+        message_content1 = ""
+        for chunk in response1:
+            message_content1 += chunk.choices[0].delta.content or ""
+    else:
+        message_content1 = response1.choices[0].message.content
+    assert len(message_content1) > 0
+    assert any(expected in message_content1.lower().strip() for expected in {"chair", "table"}), message_content1
+
+    # Prepare messages for the second turn
+    messages_turn2 = messages_turn1 + [
+        {"role": "assistant", "content": message_content1},
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": multi_image_data[2],
+                    },
+                },
+                {"type": "text", "text": "What is in this image that is also in the first image?"},
+            ],
+        },
+    ]
+
+    # Second API call
+    response2 = openai_client.chat.completions.create(
+        model=model,
+        messages=messages_turn2,
+        stream=stream,
+    )
+    if stream:
+        message_content2 = ""
+        for chunk in response2:
+            message_content2 += chunk.choices[0].delta.content or ""
+    else:
+        message_content2 = response2.choices[0].message.content
+    assert len(message_content2) > 0
+    assert any(expected in message_content2.lower().strip() for expected in {"bed"}), message_content2
+
+
 # --- Helper functions (structured output validation) ---
 
 
diff --git a/tests/verifications/test_results/fireworks.json b/tests/verifications/test_results/fireworks.json
index 1fb6cb1b4..ef5cf142e 100644
--- a/tests/verifications/test_results/fireworks.json
+++ b/tests/verifications/test_results/fireworks.json
@@ -1,15 +1,15 @@
 {
-  "created": 1744679294.344288,
-  "duration": 243.49469900131226,
+  "created": 1744918448.686489,
+  "duration": 254.68238854408264,
   "exitcode": 1,
-  "root": "/Users/erichuang/projects/llama-stack",
+  "root": "/home/erichuang/llama-stack",
   "environment": {},
   "summary": {
-    "passed": 36,
-    "skipped": 2,
+    "passed": 40,
+    "skipped": 4,
     "failed": 40,
-    "total": 78,
-    "collected": 78
+    "total": 84,
+    "collected": 84
   },
   "collectors": [
     {
@@ -29,392 +29,422 @@
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]",
           "type": "Function",
-          "lineno": 74
+          "lineno": 95
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]",
           "type": "Function",
-          "lineno": 74
+          "lineno": 95
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]",
           "type": "Function",
-          "lineno": 74
+          "lineno": 95
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]",
           "type": "Function",
-          "lineno": 74
+          "lineno": 95
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]",
           "type": "Function",
-          "lineno": 74
+          "lineno": 95
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]",
           "type": "Function",
-          "lineno": 74
+          "lineno": 95
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]",
           "type": "Function",
-          "lineno": 93
+          "lineno": 114
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]",
           "type": "Function",
-          "lineno": 93
+          "lineno": 114
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]",
           "type": "Function",
-          "lineno": 93
+          "lineno": 114
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]",
           "type": "Function",
-          "lineno": 93
+          "lineno": 114
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]",
           "type": "Function",
-          "lineno": 93
+          "lineno": 114
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]",
           "type": "Function",
-          "lineno": 93
+          "lineno": 114
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
           "type": "Function",
-          "lineno": 117
+          "lineno": 138
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
           "type": "Function",
-          "lineno": 117
+          "lineno": 138
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
           "type": "Function",
-          "lineno": 117
+          "lineno": 138
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
           "type": "Function",
-          "lineno": 136
+          "lineno": 157
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
           "type": "Function",
-          "lineno": 136
+          "lineno": 157
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
           "type": "Function",
-          "lineno": 136
+          "lineno": 157
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]",
           "type": "Function",
-          "lineno": 160
+          "lineno": 181
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]",
           "type": "Function",
-          "lineno": 160
+          "lineno": 181
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]",
           "type": "Function",
-          "lineno": 160
+          "lineno": 181
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]",
           "type": "Function",
-          "lineno": 160
+          "lineno": 181
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]",
           "type": "Function",
-          "lineno": 160
+          "lineno": 181
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]",
           "type": "Function",
-          "lineno": 160
+          "lineno": 181
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]",
           "type": "Function",
-          "lineno": 183
+          "lineno": 204
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]",
           "type": "Function",
-          "lineno": 183
+          "lineno": 204
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]",
           "type": "Function",
-          "lineno": 183
+          "lineno": 204
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]",
           "type": "Function",
-          "lineno": 183
+          "lineno": 204
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]",
           "type": "Function",
-          "lineno": 183
+          "lineno": 204
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]",
           "type": "Function",
-          "lineno": 183
+          "lineno": 204
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
           "type": "Function",
-          "lineno": 205
+          "lineno": 226
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
           "type": "Function",
-          "lineno": 205
+          "lineno": 226
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
           "type": "Function",
-          "lineno": 205
+          "lineno": 226
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
           "type": "Function",
-          "lineno": 229
+          "lineno": 250
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
           "type": "Function",
-          "lineno": 229
+          "lineno": 250
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
           "type": "Function",
-          "lineno": 229
+          "lineno": 250
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
           "type": "Function",
-          "lineno": 257
+          "lineno": 278
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
           "type": "Function",
-          "lineno": 257
+          "lineno": 278
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
           "type": "Function",
-          "lineno": 257
+          "lineno": 278
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
           "type": "Function",
-          "lineno": 281
+          "lineno": 302
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
           "type": "Function",
-          "lineno": 281
+          "lineno": 302
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
           "type": "Function",
-          "lineno": 281
+          "lineno": 302
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
           "type": "Function",
-          "lineno": 308
+          "lineno": 329
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
           "type": "Function",
-          "lineno": 308
+          "lineno": 329
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
           "type": "Function",
-          "lineno": 308
+          "lineno": 329
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
           "type": "Function",
-          "lineno": 331
+          "lineno": 352
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
           "type": "Function",
-          "lineno": 331
+          "lineno": 352
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
           "type": "Function",
-          "lineno": 331
+          "lineno": 352
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool]",
           "type": "Function",
-          "lineno": 359
+          "lineno": 380
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text]",
           "type": "Function",
-          "lineno": 359
+          "lineno": 380
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool]",
           "type": "Function",
-          "lineno": 359
+          "lineno": 380
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool]",
           "type": "Function",
-          "lineno": 359
+          "lineno": 380
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool]",
           "type": "Function",
-          "lineno": 359
+          "lineno": 380
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool]",
           "type": "Function",
-          "lineno": 359
+          "lineno": 380
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text]",
           "type": "Function",
-          "lineno": 359
+          "lineno": 380
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool]",
           "type": "Function",
-          "lineno": 359
+          "lineno": 380
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool]",
           "type": "Function",
-          "lineno": 359
+          "lineno": 380
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool]",
           "type": "Function",
-          "lineno": 359
+          "lineno": 380
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool]",
           "type": "Function",
-          "lineno": 359
+          "lineno": 380
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text]",
           "type": "Function",
-          "lineno": 359
+          "lineno": 380
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool]",
           "type": "Function",
-          "lineno": 359
+          "lineno": 380
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool]",
           "type": "Function",
-          "lineno": 359
+          "lineno": 380
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool]",
           "type": "Function",
-          "lineno": 359
+          "lineno": 380
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool]",
           "type": "Function",
-          "lineno": 450
+          "lineno": 471
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text]",
           "type": "Function",
-          "lineno": 450
+          "lineno": 471
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool]",
           "type": "Function",
-          "lineno": 450
+          "lineno": 471
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool]",
           "type": "Function",
-          "lineno": 450
+          "lineno": 471
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool]",
           "type": "Function",
-          "lineno": 450
+          "lineno": 471
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool]",
           "type": "Function",
-          "lineno": 450
+          "lineno": 471
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text]",
           "type": "Function",
-          "lineno": 450
+          "lineno": 471
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool]",
           "type": "Function",
-          "lineno": 450
+          "lineno": 471
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool]",
           "type": "Function",
-          "lineno": 450
+          "lineno": 471
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool]",
           "type": "Function",
-          "lineno": 450
+          "lineno": 471
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool]",
           "type": "Function",
-          "lineno": 450
+          "lineno": 471
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text]",
           "type": "Function",
-          "lineno": 450
+          "lineno": 471
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool]",
           "type": "Function",
-          "lineno": 450
+          "lineno": 471
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool]",
           "type": "Function",
-          "lineno": 450
+          "lineno": 471
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool]",
           "type": "Function",
-          "lineno": 450
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama-v3p3-70b-instruct-stream=False]",
+          "type": "Function",
+          "lineno": 554
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama-v3p3-70b-instruct-stream=True]",
+          "type": "Function",
+          "lineno": 554
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-scout-instruct-basic-stream=False]",
+          "type": "Function",
+          "lineno": 554
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-scout-instruct-basic-stream=True]",
+          "type": "Function",
+          "lineno": 554
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-maverick-instruct-basic-stream=False]",
+          "type": "Function",
+          "lineno": 554
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-maverick-instruct-basic-stream=True]",
+          "type": "Function",
+          "lineno": 554
         }
       ]
     }
@@ -422,7 +452,7 @@
   "tests": [
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]",
-      "lineno": 74,
+      "lineno": 95,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]",
@@ -441,21 +471,21 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.2540216660127044,
+        "duration": 0.13845239393413067,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.6861197501420975,
+        "duration": 1.3300942620262504,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00015208404511213303,
+        "duration": 0.00025453977286815643,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]",
-      "lineno": 74,
+      "lineno": 95,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]",
@@ -474,21 +504,21 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.006722707999870181,
+        "duration": 0.0806605163961649,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.5997684169560671,
+        "duration": 0.6202042903751135,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0002298750914633274,
+        "duration": 0.00026358477771282196,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]",
-      "lineno": 74,
+      "lineno": 95,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]",
@@ -507,21 +537,21 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.015468083089217544,
+        "duration": 0.07190297450870275,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.4625723329372704,
+        "duration": 0.7458920907229185,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0003302919212728739,
+        "duration": 0.00024067144840955734,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]",
-      "lineno": 74,
+      "lineno": 95,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]",
@@ -540,21 +570,21 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.014780875062569976,
+        "duration": 0.07551384158432484,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.4616922920104116,
+        "duration": 0.6140249809250236,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0004110001027584076,
+        "duration": 0.00024476367980241776,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]",
-      "lineno": 74,
+      "lineno": 95,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]",
@@ -573,21 +603,21 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.016551292035728693,
+        "duration": 0.07434738799929619,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.9366653750184923,
+        "duration": 1.6738943997770548,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00045104208402335644,
+        "duration": 0.000227426178753376,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]",
-      "lineno": 74,
+      "lineno": 95,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]",
@@ -606,21 +636,21 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.043513541808351874,
+        "duration": 0.07130288146436214,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.5119727500714362,
+        "duration": 1.337895905598998,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00016754190437495708,
+        "duration": 0.00028038304299116135,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]",
-      "lineno": 93,
+      "lineno": 114,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]",
@@ -639,21 +669,21 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.008419709047302604,
+        "duration": 0.0727478675544262,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.7933078748174012,
+        "duration": 0.7670011632144451,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00016583292745053768,
+        "duration": 0.00023174844682216644,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]",
-      "lineno": 93,
+      "lineno": 114,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]",
@@ -672,21 +702,21 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.013550583040341735,
+        "duration": 0.07163545861840248,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.6633435001131147,
+        "duration": 0.7582714259624481,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00023925001733005047,
+        "duration": 0.00028524454683065414,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]",
-      "lineno": 93,
+      "lineno": 114,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]",
@@ -705,21 +735,21 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.007293834118172526,
+        "duration": 0.08122281823307276,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.5193503750488162,
+        "duration": 0.6061851140111685,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00018516601994633675,
+        "duration": 0.0002497304230928421,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]",
-      "lineno": 93,
+      "lineno": 114,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]",
@@ -738,21 +768,21 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.009030540939420462,
+        "duration": 0.07185561209917068,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.4338789170142263,
+        "duration": 0.7516075978055596,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0004670829512178898,
+        "duration": 0.00026526860892772675,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]",
-      "lineno": 93,
+      "lineno": 114,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]",
@@ -771,21 +801,21 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.01854533306322992,
+        "duration": 0.07012896798551083,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.0042304168455303,
+        "duration": 1.8946502823382616,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0004844998475164175,
+        "duration": 0.0002452842891216278,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]",
-      "lineno": 93,
+      "lineno": 114,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]",
@@ -804,21 +834,21 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.018001709133386612,
+        "duration": 0.06955648958683014,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.5567380839493126,
+        "duration": 1.0446623722091317,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00015412503853440285,
+        "duration": 0.00023738667368888855,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
-      "lineno": 117,
+      "lineno": 138,
       "outcome": "skipped",
       "keywords": [
         "test_chat_non_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
@@ -837,22 +867,22 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.008420375175774097,
+        "duration": 0.07077906839549541,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.00015591713599860668,
+        "duration": 0.00021365191787481308,
         "outcome": "skipped",
-        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 126, 'Skipped: Skipping test_chat_non_streaming_image for model accounts/fireworks/models/llama-v3p3-70b-instruct on provider fireworks based on config.')"
+        "longrepr": "('/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 147, 'Skipped: Skipping test_chat_non_streaming_image for model accounts/fireworks/models/llama-v3p3-70b-instruct on provider fireworks based on config.')"
       },
       "teardown": {
-        "duration": 0.0001371251419186592,
+        "duration": 0.00018982868641614914,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
-      "lineno": 117,
+      "lineno": 138,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
@@ -871,21 +901,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.00672045792452991,
+        "duration": 0.07118859142065048,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.790064417058602,
+        "duration": 4.20654855389148,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0004657919052988291,
+        "duration": 0.00023640412837266922,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
-      "lineno": 117,
+      "lineno": 138,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
@@ -904,21 +934,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.015534916892647743,
+        "duration": 0.07351029943674803,
         "outcome": "passed"
       },
       "call": {
-        "duration": 3.2250108749140054,
+        "duration": 4.875292049720883,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00038420804776251316,
+        "duration": 0.0002571679651737213,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
-      "lineno": 136,
+      "lineno": 157,
       "outcome": "skipped",
       "keywords": [
         "test_chat_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
@@ -937,22 +967,22 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.03246337501332164,
+        "duration": 0.07474396284669638,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.0005176670383661985,
+        "duration": 0.0002510417252779007,
         "outcome": "skipped",
-        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 145, 'Skipped: Skipping test_chat_streaming_image for model accounts/fireworks/models/llama-v3p3-70b-instruct on provider fireworks based on config.')"
+        "longrepr": "('/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 166, 'Skipped: Skipping test_chat_streaming_image for model accounts/fireworks/models/llama-v3p3-70b-instruct on provider fireworks based on config.')"
       },
       "teardown": {
-        "duration": 0.0002715419977903366,
+        "duration": 0.00020200759172439575,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
-      "lineno": 136,
+      "lineno": 157,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
@@ -971,21 +1001,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.12475762516260147,
+        "duration": 0.07380561903119087,
         "outcome": "passed"
       },
       "call": {
-        "duration": 4.934706958010793,
+        "duration": 2.0082657346501946,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00027604191564023495,
+        "duration": 0.0002522030845284462,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
-      "lineno": 136,
+      "lineno": 157,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
@@ -1004,21 +1034,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.01025745808146894,
+        "duration": 0.07040839456021786,
         "outcome": "passed"
       },
       "call": {
-        "duration": 3.5653172079473734,
+        "duration": 4.871666649356484,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0005323749501258135,
+        "duration": 0.0002490682527422905,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]",
-      "lineno": 160,
+      "lineno": 181,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]",
@@ -1037,21 +1067,21 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.0553184999153018,
+        "duration": 0.07167178671807051,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.366144834086299,
+        "duration": 0.9903911761939526,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00042316620238125324,
+        "duration": 0.0002704570069909096,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]",
-      "lineno": 160,
+      "lineno": 181,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]",
@@ -1070,21 +1100,21 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.06981937494128942,
+        "duration": 0.07073096185922623,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.829931082902476,
+        "duration": 3.9858130905777216,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0003029161598533392,
+        "duration": 0.00024665892124176025,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]",
-      "lineno": 160,
+      "lineno": 181,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]",
@@ -1103,21 +1133,21 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.0244335001334548,
+        "duration": 0.07138721086084843,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.7541109579615295,
+        "duration": 1.1312237158417702,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0004666249733418226,
+        "duration": 0.00027671270072460175,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]",
-      "lineno": 160,
+      "lineno": 181,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]",
@@ -1136,21 +1166,21 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.016700832871720195,
+        "duration": 0.08204951789230108,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.208378749899566,
+        "duration": 2.7500197598710656,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00016137491911649704,
+        "duration": 0.00024303700774908066,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]",
-      "lineno": 160,
+      "lineno": 181,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]",
@@ -1169,21 +1199,21 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.006982124876230955,
+        "duration": 0.07405088562518358,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.6431179158389568,
+        "duration": 1.238045932725072,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00033412501215934753,
+        "duration": 0.00024984683841466904,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]",
-      "lineno": 160,
+      "lineno": 181,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]",
@@ -1202,21 +1232,21 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.015676999930292368,
+        "duration": 0.07009329181164503,
         "outcome": "passed"
       },
       "call": {
-        "duration": 4.404933541081846,
+        "duration": 3.55908961314708,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0002617498394101858,
+        "duration": 0.00026627909392118454,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]",
-      "lineno": 183,
+      "lineno": 204,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]",
@@ -1235,21 +1265,21 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.07572970795445144,
+        "duration": 0.07596437353640795,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.1367775409016758,
+        "duration": 1.0093460381031036,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0006681671366095543,
+        "duration": 0.0002171723172068596,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]",
-      "lineno": 183,
+      "lineno": 204,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]",
@@ -1268,21 +1298,21 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.028525790898129344,
+        "duration": 0.06995268166065216,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.1424834579229355,
+        "duration": 2.617857910692692,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0003642500378191471,
+        "duration": 0.00024063047021627426,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]",
-      "lineno": 183,
+      "lineno": 204,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]",
@@ -1301,21 +1331,21 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.0146782910451293,
+        "duration": 0.0729895168915391,
         "outcome": "passed"
       },
       "call": {
-        "duration": 15.13383225002326,
+        "duration": 0.9500969992950559,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00045950012281537056,
+        "duration": 0.000257221981883049,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]",
-      "lineno": 183,
+      "lineno": 204,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]",
@@ -1334,21 +1364,21 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.01714799995534122,
+        "duration": 0.07070339564234018,
         "outcome": "passed"
       },
       "call": {
-        "duration": 10.714752790983766,
+        "duration": 2.6405998673290014,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00027029216289520264,
+        "duration": 0.0002397783100605011,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]",
-      "lineno": 183,
+      "lineno": 204,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]",
@@ -1367,21 +1397,21 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.010765291983261704,
+        "duration": 0.07140882592648268,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.6682700838427991,
+        "duration": 0.7515814090147614,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00015808409079909325,
+        "duration": 0.0002773841843008995,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]",
-      "lineno": 183,
+      "lineno": 204,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]",
@@ -1400,21 +1430,21 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.0071080829948186874,
+        "duration": 0.07105506956577301,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.9725822920445353,
+        "duration": 3.091084435582161,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0004201668780297041,
+        "duration": 0.0002588946372270584,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
-      "lineno": 205,
+      "lineno": 226,
       "outcome": "failed",
       "keywords": [
         "test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
@@ -1433,34 +1463,34 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.013940333155915141,
+        "duration": 0.07215945608913898,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.5732313331682235,
+        "duration": 1.13668860681355,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 224,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 245,
           "message": "TypeError: object of type 'NoneType' has no len()"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 224,
+            "lineno": 245,
             "message": "TypeError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]>>\nopenai_client = <openai.OpenAI object at 0x106c7edd0>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=False,\n        )\n    \n        assert response.choices[0].message.role == \"assistant\"\n>       assert len(response.choices[0].message.tool_calls) > 0\nE       TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:224: TypeError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdbd0430>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=False,\n        )\n    \n        assert response.choices[0].message.role == \"assistant\"\n>       assert len(response.choices[0].message.tool_calls) > 0\nE       TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:245: TypeError"
       },
       "teardown": {
-        "duration": 0.00022962503135204315,
+        "duration": 0.0003727646544575691,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
-      "lineno": 205,
+      "lineno": 226,
       "outcome": "failed",
       "keywords": [
         "test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
@@ -1479,34 +1509,34 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.006374292075634003,
+        "duration": 0.07085339725017548,
         "outcome": "passed"
       },
       "call": {
-        "duration": 7.2776273330673575,
+        "duration": 6.564900263212621,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 224,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 245,
           "message": "TypeError: object of type 'NoneType' has no len()"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 224,
+            "lineno": 245,
             "message": "TypeError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]>>\nopenai_client = <openai.OpenAI object at 0x106c3da80>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=False,\n        )\n    \n        assert response.choices[0].message.role == \"assistant\"\n>       assert len(response.choices[0].message.tool_calls) > 0\nE       TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:224: TypeError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f1acda3cdf0>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=False,\n        )\n    \n        assert response.choices[0].message.role == \"assistant\"\n>       assert len(response.choices[0].message.tool_calls) > 0\nE       TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:245: TypeError"
       },
       "teardown": {
-        "duration": 0.0004100420046597719,
+        "duration": 0.00036074407398700714,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
-      "lineno": 205,
+      "lineno": 226,
       "outcome": "failed",
       "keywords": [
         "test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
@@ -1525,34 +1555,34 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.012761292047798634,
+        "duration": 0.07105840742588043,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.8920639578718692,
+        "duration": 1.9664474660530686,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 224,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 245,
           "message": "TypeError: object of type 'NoneType' has no len()"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 224,
+            "lineno": 245,
             "message": "TypeError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]>>\nopenai_client = <openai.OpenAI object at 0x106cb1000>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=False,\n        )\n    \n        assert response.choices[0].message.role == \"assistant\"\n>       assert len(response.choices[0].message.tool_calls) > 0\nE       TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:224: TypeError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdb6ee60>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=False,\n        )\n    \n        assert response.choices[0].message.role == \"assistant\"\n>       assert len(response.choices[0].message.tool_calls) > 0\nE       TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:245: TypeError"
       },
       "teardown": {
-        "duration": 0.0004124999977648258,
+        "duration": 0.0003125220537185669,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
-      "lineno": 229,
+      "lineno": 250,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
@@ -1571,34 +1601,34 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.013205124996602535,
+        "duration": 0.07491886802017689,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.930448625003919,
+        "duration": 1.6239055208861828,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 248,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 269,
           "message": "assert 0 == 1\n +  where 0 = len([])"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 248,
+            "lineno": 269,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]>>\nopenai_client = <openai.OpenAI object at 0x106c7c430>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=True,\n        )\n    \n        _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n>       assert len(tool_calls_buffer) == 1\nE       assert 0 == 1\nE        +  where 0 = len([])\n\ntests/verifications/openai_api/test_chat_completion.py:248: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f1acda56740>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=True,\n        )\n    \n        _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n>       assert len(tool_calls_buffer) == 1\nE       assert 0 == 1\nE        +  where 0 = len([])\n\ntests/verifications/openai_api/test_chat_completion.py:269: AssertionError"
       },
       "teardown": {
-        "duration": 0.0005771249998360872,
+        "duration": 0.0003996873274445534,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
-      "lineno": 229,
+      "lineno": 250,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
@@ -1617,34 +1647,34 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.01408083294518292,
+        "duration": 0.07084537390619516,
         "outcome": "passed"
       },
       "call": {
-        "duration": 10.029349042102695,
+        "duration": 7.175910825841129,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 248,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 269,
           "message": "assert 0 == 1\n +  where 0 = len([])"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 248,
+            "lineno": 269,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]>>\nopenai_client = <openai.OpenAI object at 0x106c28a90>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=True,\n        )\n    \n        _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n>       assert len(tool_calls_buffer) == 1\nE       assert 0 == 1\nE        +  where 0 = len([])\n\ntests/verifications/openai_api/test_chat_completion.py:248: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdb51360>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=True,\n        )\n    \n        _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n>       assert len(tool_calls_buffer) == 1\nE       assert 0 == 1\nE        +  where 0 = len([])\n\ntests/verifications/openai_api/test_chat_completion.py:269: AssertionError"
       },
       "teardown": {
-        "duration": 0.0004449589177966118,
+        "duration": 0.0003013862296938896,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
-      "lineno": 229,
+      "lineno": 250,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
@@ -1663,34 +1693,34 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.013213291997089982,
+        "duration": 0.07152015157043934,
         "outcome": "passed"
       },
       "call": {
-        "duration": 8.608150291023776,
+        "duration": 9.749054622836411,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 248,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 269,
           "message": "assert 0 == 1\n +  where 0 = len([])"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 248,
+            "lineno": 269,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]>>\nopenai_client = <openai.OpenAI object at 0x107302e30>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=True,\n        )\n    \n        _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n>       assert len(tool_calls_buffer) == 1\nE       assert 0 == 1\nE        +  where 0 = len([])\n\ntests/verifications/openai_api/test_chat_completion.py:248: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f1acda32bc0>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=True,\n        )\n    \n        _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n>       assert len(tool_calls_buffer) == 1\nE       assert 0 == 1\nE        +  where 0 = len([])\n\ntests/verifications/openai_api/test_chat_completion.py:269: AssertionError"
       },
       "teardown": {
-        "duration": 0.0005860829260200262,
+        "duration": 0.0002990690991282463,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
-      "lineno": 257,
+      "lineno": 278,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
@@ -1709,21 +1739,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.01437820796854794,
+        "duration": 0.07075500208884478,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.7105170420836657,
+        "duration": 0.9870151281356812,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00017283298075199127,
+        "duration": 0.00022785458713769913,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
-      "lineno": 257,
+      "lineno": 278,
       "outcome": "failed",
       "keywords": [
         "test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
@@ -1742,34 +1772,34 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.009220415959134698,
+        "duration": 0.0698307491838932,
         "outcome": "passed"
       },
       "call": {
-        "duration": 5.718667333945632,
+        "duration": 4.061793921515346,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 277,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 298,
           "message": "TypeError: object of type 'NoneType' has no len()"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 277,
+            "lineno": 298,
             "message": "TypeError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama4-scout-instruct-basic-case0]>>\nopenai_client = <openai.OpenAI object at 0x107333070>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"required\",  # Force tool call\n            stream=False,\n        )\n    \n        assert response.choices[0].message.role == \"assistant\"\n>       assert len(response.choices[0].message.tool_calls) > 0, \"Expected tool call when tool_choice='required'\"\nE       TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:277: TypeError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama4-scout-instruct-basic-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdb678e0>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"required\",  # Force tool call\n            stream=False,\n        )\n    \n        assert response.choices[0].message.role == \"assistant\"\n>       assert len(response.choices[0].message.tool_calls) > 0, \"Expected tool call when tool_choice='required'\"\nE       TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:298: TypeError"
       },
       "teardown": {
-        "duration": 0.0003282078541815281,
+        "duration": 0.00028742197901010513,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
-      "lineno": 257,
+      "lineno": 278,
       "outcome": "failed",
       "keywords": [
         "test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
@@ -1788,34 +1818,34 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.014709000010043383,
+        "duration": 0.07069965451955795,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.7260455000214279,
+        "duration": 24.973835667595267,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 277,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 298,
           "message": "TypeError: object of type 'NoneType' has no len()"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 277,
+            "lineno": 298,
             "message": "TypeError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]>>\nopenai_client = <openai.OpenAI object at 0x106c100d0>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"required\",  # Force tool call\n            stream=False,\n        )\n    \n        assert response.choices[0].message.role == \"assistant\"\n>       assert len(response.choices[0].message.tool_calls) > 0, \"Expected tool call when tool_choice='required'\"\nE       TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:277: TypeError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdab3430>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"required\",  # Force tool call\n            stream=False,\n        )\n    \n        assert response.choices[0].message.role == \"assistant\"\n>       assert len(response.choices[0].message.tool_calls) > 0, \"Expected tool call when tool_choice='required'\"\nE       TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:298: TypeError"
       },
       "teardown": {
-        "duration": 0.00022012507542967796,
+        "duration": 0.00034868158400058746,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
-      "lineno": 281,
+      "lineno": 302,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
@@ -1834,21 +1864,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.008183792000636458,
+        "duration": 0.07031871005892754,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.9683502500411123,
+        "duration": 0.7874777475371957,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0007690000347793102,
+        "duration": 0.00027067307382822037,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
-      "lineno": 281,
+      "lineno": 302,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
@@ -1867,34 +1897,34 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.014906208030879498,
+        "duration": 0.07194838207215071,
         "outcome": "passed"
       },
       "call": {
-        "duration": 11.76459054206498,
+        "duration": 5.034253670834005,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 302,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 323,
           "message": "AssertionError: Expected tool call when tool_choice='required'\nassert 0 > 0\n +  where 0 = len([])"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 302,
+            "lineno": 323,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama4-scout-instruct-basic-case0]>>\nopenai_client = <openai.OpenAI object at 0x1066a5090>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"required\",  # Force tool call\n            stream=True,\n        )\n    \n        _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n    \n>       assert len(tool_calls_buffer) > 0, \"Expected tool call when tool_choice='required'\"\nE       AssertionError: Expected tool call when tool_choice='required'\nE       assert 0 > 0\nE        +  where 0 = len([])\n\ntests/verifications/openai_api/test_chat_completion.py:302: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama4-scout-instruct-basic-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f1acda29390>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"required\",  # Force tool call\n            stream=True,\n        )\n    \n        _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n    \n>       assert len(tool_calls_buffer) > 0, \"Expected tool call when tool_choice='required'\"\nE       AssertionError: Expected tool call when tool_choice='required'\nE       assert 0 > 0\nE        +  where 0 = len([])\n\ntests/verifications/openai_api/test_chat_completion.py:323: AssertionError"
       },
       "teardown": {
-        "duration": 0.0003086249344050884,
+        "duration": 0.00030618347227573395,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
-      "lineno": 281,
+      "lineno": 302,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
@@ -1913,34 +1943,34 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.021144041791558266,
+        "duration": 0.07107715681195259,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.4300453749019653,
+        "duration": 6.841737313196063,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 302,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 323,
           "message": "AssertionError: Expected tool call when tool_choice='required'\nassert 0 > 0\n +  where 0 = len([])"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 302,
+            "lineno": 323,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]>>\nopenai_client = <openai.OpenAI object at 0x107301b70>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"required\",  # Force tool call\n            stream=True,\n        )\n    \n        _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n    \n>       assert len(tool_calls_buffer) > 0, \"Expected tool call when tool_choice='required'\"\nE       AssertionError: Expected tool call when tool_choice='required'\nE       assert 0 > 0\nE        +  where 0 = len([])\n\ntests/verifications/openai_api/test_chat_completion.py:302: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdab73d0>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"required\",  # Force tool call\n            stream=True,\n        )\n    \n        _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n    \n>       assert len(tool_calls_buffer) > 0, \"Expected tool call when tool_choice='required'\"\nE       AssertionError: Expected tool call when tool_choice='required'\nE       assert 0 > 0\nE        +  where 0 = len([])\n\ntests/verifications/openai_api/test_chat_completion.py:323: AssertionError"
       },
       "teardown": {
-        "duration": 0.00037800008431077003,
+        "duration": 0.0003354279324412346,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
-      "lineno": 308,
+      "lineno": 329,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_tool_choice_none[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
@@ -1959,21 +1989,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.007929167011752725,
+        "duration": 0.0726231737062335,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.0130669160280377,
+        "duration": 0.7659661257639527,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0004307499621063471,
+        "duration": 0.0003337552770972252,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
-      "lineno": 308,
+      "lineno": 329,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_tool_choice_none[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
@@ -1992,21 +2022,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.010822792071849108,
+        "duration": 0.09297824744135141,
         "outcome": "passed"
       },
       "call": {
-        "duration": 4.663267957977951,
+        "duration": 3.257608976215124,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0006220841314643621,
+        "duration": 0.00022768322378396988,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
-      "lineno": 308,
+      "lineno": 329,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_tool_choice_none[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
@@ -2025,21 +2055,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.010691167088225484,
+        "duration": 0.0726541867479682,
         "outcome": "passed"
       },
       "call": {
-        "duration": 3.383276625070721,
+        "duration": 4.5413802824914455,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00047616707161068916,
+        "duration": 0.00026340410113334656,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
-      "lineno": 331,
+      "lineno": 352,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_tool_choice_none[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
@@ -2058,21 +2088,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.030178457964211702,
+        "duration": 0.07666508108377457,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.4668415829073638,
+        "duration": 0.5535151390358806,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0007963338866829872,
+        "duration": 0.0003251638263463974,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
-      "lineno": 331,
+      "lineno": 352,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_tool_choice_none[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
@@ -2091,21 +2121,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.011727249948307872,
+        "duration": 0.09550460614264011,
         "outcome": "passed"
       },
       "call": {
-        "duration": 11.540696125011891,
+        "duration": 1.171110725030303,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0009242501109838486,
+        "duration": 0.0002604629844427109,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
-      "lineno": 331,
+      "lineno": 352,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_tool_choice_none[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
@@ -2124,21 +2154,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.008536209119483829,
+        "duration": 0.07114547491073608,
         "outcome": "passed"
       },
       "call": {
-        "duration": 3.6622679999563843,
+        "duration": 27.369331603869796,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0005495408549904823,
+        "duration": 0.00023956969380378723,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool]",
-      "lineno": 359,
+      "lineno": 380,
       "outcome": "failed",
       "keywords": [
         "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool]",
@@ -2157,34 +2187,34 @@
         "case_id": "text_then_weather_tool"
       },
       "setup": {
-        "duration": 0.017524708062410355,
+        "duration": 0.07612851448357105,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.625571500044316,
+        "duration": 2.10164753254503,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 446,
-          "message": "AssertionError: Expected one of ['sol'] in content, but got: 'I am not able to execute this task as it exceeds the limitations of the functions I have been given.'\nassert False\n +  where False = any(<generator object test_chat_non_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x1073e5cb0>)"
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 467,
+          "message": "AssertionError: Expected one of ['sol'] in content, but got: 'I cannot perform this task as it requires additional functionality that is not available in the given functions.'\nassert False\n +  where False = any(<generator object test_chat_non_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f1acda87ca0>)"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 446,
+            "lineno": 467,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x10736bac0>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n            assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\n    \n            if num_tool_calls > 0:\n                tool_call = assistant_message.tool_calls[0]\n                assert tool_call.function.name == expected[\"tool_name\"], (\n                    f\"Expected tool '{expected['tool_name']}', got '{tool_call.function.name}'\"\n                )\n                # Parse the JSON string arguments before comparing\n                actual_arguments = json.loads(tool_call.function.arguments)\n                assert actual_arguments == expected[\"tool_arguments\"], (\n                    f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n                )\n    \n                # Prepare and append the tool response for the next turn\n                tool_response = tool_responses.pop(0)\n                messages.append(\n                    {\n                        \"role\": \"tool\",\n                        \"tool_call_id\": tool_call.id,\n                        \"content\": tool_response[\"response\"],\n                    }\n                )\n            else:\n                assert assistant_message.content is not None, \"Expected content, but none received.\"\n                expected_answers = expected[\"answer\"]  # This is now a list\n                content_lower = assistant_message.content.lower()\n>               assert any(ans.lower() in content_lower for ans in expected_answers), (\n                    f\"Expected one of {expected_answers} in content, but got: '{assistant_message.content}'\"\n                )\nE               AssertionError: Expected one of ['sol'] in content, but got: 'I am not able to execute this task as it exceeds the limitations of the functions I have been given.'\nE               assert False\nE                +  where False = any(<generator object test_chat_non_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x1073e5cb0>)\n\ntests/verifications/openai_api/test_chat_completion.py:446: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acda57190>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n            assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\n    \n            if num_tool_calls > 0:\n                tool_call = assistant_message.tool_calls[0]\n                assert tool_call.function.name == expected[\"tool_name\"], (\n                    f\"Expected tool '{expected['tool_name']}', got '{tool_call.function.name}'\"\n                )\n                # Parse the JSON string arguments before comparing\n                actual_arguments = json.loads(tool_call.function.arguments)\n                assert actual_arguments == expected[\"tool_arguments\"], (\n                    f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n                )\n    \n                # Prepare and append the tool response for the next turn\n                tool_response = tool_responses.pop(0)\n                messages.append(\n                    {\n                        \"role\": \"tool\",\n                        \"tool_call_id\": tool_call.id,\n                        \"content\": tool_response[\"response\"],\n                    }\n                )\n            else:\n                assert assistant_message.content is not None, \"Expected content, but none received.\"\n                expected_answers = expected[\"answer\"]  # This is now a list\n                content_lower = assistant_message.content.lower()\n>               assert any(ans.lower() in content_lower for ans in expected_answers), (\n                    f\"Expected one of {expected_answers} in content, but got: '{assistant_message.content}'\"\n                )\nE               AssertionError: Expected one of ['sol'] in content, but got: 'I cannot perform this task as it requires additional functionality that is not available in the given functions.'\nE               assert False\nE                +  where False = any(<generator object test_chat_non_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f1acda87ca0>)\n\ntests/verifications/openai_api/test_chat_completion.py:467: AssertionError"
       },
       "teardown": {
-        "duration": 0.00044062500819563866,
+        "duration": 0.00030514132231473923,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text]",
-      "lineno": 359,
+      "lineno": 380,
       "outcome": "failed",
       "keywords": [
         "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text]",
@@ -2203,34 +2233,34 @@
         "case_id": "weather_tool_then_text"
       },
       "setup": {
-        "duration": 0.01056775008328259,
+        "duration": 0.07009781803935766,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.5624969999771565,
+        "duration": 2.49614445772022,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 418,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 439,
           "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len((None or []))\n +    where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"get_weather\", \"parameters\": {\"location\": \"San Francisco, CA\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 418,
+            "lineno": 439,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text]>>\nopenai_client = <openai.OpenAI object at 0x107363580>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n>           assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len((None or []))\nE            +    where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"get_weather\", \"parameters\": {\"location\": \"San Francisco, CA\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:418: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdb50490>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n>           assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len((None or []))\nE            +    where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"get_weather\", \"parameters\": {\"location\": \"San Francisco, CA\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError"
       },
       "teardown": {
-        "duration": 0.0004401658661663532,
+        "duration": 0.00035297591239213943,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool]",
-      "lineno": 359,
+      "lineno": 380,
       "outcome": "failed",
       "keywords": [
         "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool]",
@@ -2249,34 +2279,34 @@
         "case_id": "add_product_tool"
       },
       "setup": {
-        "duration": 0.013444249983876944,
+        "duration": 0.0719120567664504,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.8705885419622064,
+        "duration": 1.181352874264121,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 418,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 439,
           "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len((None or []))\n +    where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"addProduct\", \"parameters\": {\"name\": \"Widget\", \"price\": \"19.99\", \"inStock\": \"true\", \"tags\": \"[\\\\\"new\\\\\", \\\\\"sale\\\\\"]\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 418,
+            "lineno": 439,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool]>>\nopenai_client = <openai.OpenAI object at 0x10730c160>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n>           assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len((None or []))\nE            +    where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"addProduct\", \"parameters\": {\"name\": \"Widget\", \"price\": \"19.99\", \"inStock\": \"true\", \"tags\": \"[\\\\\"new\\\\\", \\\\\"sale\\\\\"]\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:418: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdc0c550>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n>           assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len((None or []))\nE            +    where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"addProduct\", \"parameters\": {\"name\": \"Widget\", \"price\": \"19.99\", \"inStock\": \"true\", \"tags\": \"[\\\\\"new\\\\\", \\\\\"sale\\\\\"]\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError"
       },
       "teardown": {
-        "duration": 0.0004647918976843357,
+        "duration": 0.000303901731967926,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool]",
-      "lineno": 359,
+      "lineno": 380,
       "outcome": "failed",
       "keywords": [
         "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool]",
@@ -2295,34 +2325,34 @@
         "case_id": "get_then_create_event_tool"
       },
       "setup": {
-        "duration": 0.013817500090226531,
+        "duration": 0.07158921286463737,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.6882082498632371,
+        "duration": 3.7202864307910204,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 418,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 439,
           "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len((None or []))\n +    where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 418,
+            "lineno": 439,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool]>>\nopenai_client = <openai.OpenAI object at 0x107357a60>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n>           assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len((None or []))\nE            +    where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:418: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdae22f0>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n>           assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len((None or []))\nE            +    where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError"
       },
       "teardown": {
-        "duration": 0.0005112909711897373,
+        "duration": 0.0003700554370880127,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool]",
-      "lineno": 359,
+      "lineno": 380,
       "outcome": "failed",
       "keywords": [
         "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool]",
@@ -2341,34 +2371,34 @@
         "case_id": "compare_monthly_expense_tool"
       },
       "setup": {
-        "duration": 0.013548000017181039,
+        "duration": 0.07388217654079199,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.5821714580524713,
+        "duration": 0.6030126195400953,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 418,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 439,
           "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len((None or []))\n +    where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": \"1\", \"year\": \"2025\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 418,
+            "lineno": 439,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool]>>\nopenai_client = <openai.OpenAI object at 0x106c11c30>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n>           assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len((None or []))\nE            +    where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": \"1\", \"year\": \"2025\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:418: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdca8670>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n>           assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len((None or []))\nE            +    where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": \"1\", \"year\": \"2025\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError"
       },
       "teardown": {
-        "duration": 0.00021225004456937313,
+        "duration": 0.0003188345581293106,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool]",
-      "lineno": 359,
+      "lineno": 380,
       "outcome": "failed",
       "keywords": [
         "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool]",
@@ -2387,34 +2417,34 @@
         "case_id": "text_then_weather_tool"
       },
       "setup": {
-        "duration": 0.0070156671572476625,
+        "duration": 0.07314795535057783,
         "outcome": "passed"
       },
       "call": {
-        "duration": 8.95718324999325,
+        "duration": 1.0849075820297003,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 418,
-          "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len((None or []))\n +    where None = ChatCompletionMessage(content='```\\n{\\n  \"name\": \"get_weather\",\\n  \"parameters\": {\\n    \"description\": \"Get the current weather\",\\n    \"parameters\": {\\n      \"location\": {\\n        \"description\": \"The city and state (both required)\",\\n        \"type\": \"object\",\\n        \"properties\": {\\n          \"location\": {\\n            \"description\": \"The city and state, e.g. San Francisco, CA.\",\\n            \"type\": \"string\"\\n          }\\n        }\\n      }\\n    },\\n    \"type\": \"object\",\\n    \"properties\": {\\n      \"location\": \"San Francisco, CA.\"\\n    }\\n  }\\n}\\n```', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 467,
+          "message": "AssertionError: Expected one of ['sol'] in content, but got: '{\"name\": \"get_weather\", \"parameters\": {\"description\": \"Get the current weather\", \"parameters\": {\"type\": \"object\", \"properties\": {\"location\": {\"description\": \"The city and state (both required). e.g. San Francisco, CA.\", \"type\": \"string\"}}}}'\nassert False\n +  where False = any(<generator object test_chat_non_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f1acdad8970>)"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 418,
+            "lineno": 467,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x106c10a60>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n>           assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len((None or []))\nE            +    where None = ChatCompletionMessage(content='```\\n{\\n  \"name\": \"get_weather\",\\n  \"parameters\": {\\n    \"description\": \"Get the current weather\",\\n    \"parameters\": {\\n      \"location\": {\\n        \"description\": \"The city and state (both required)\",\\n        \"type\": \"object\",\\n        \"properties\": {\\n          \"location\": {\\n            \"description\": \"The city and state, e.g. San Francisco, CA.\",\\n            \"type\": \"string\"\\n          }\\n        }\\n      }\\n    },\\n    \"type\": \"object\",\\n    \"properties\": {\\n      \"location\": \"San Francisco, CA.\"\\n    }\\n  }\\n}\\n```', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:418: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acda560e0>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n            assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\n    \n            if num_tool_calls > 0:\n                tool_call = assistant_message.tool_calls[0]\n                assert tool_call.function.name == expected[\"tool_name\"], (\n                    f\"Expected tool '{expected['tool_name']}', got '{tool_call.function.name}'\"\n                )\n                # Parse the JSON string arguments before comparing\n                actual_arguments = json.loads(tool_call.function.arguments)\n                assert actual_arguments == expected[\"tool_arguments\"], (\n                    f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n                )\n    \n                # Prepare and append the tool response for the next turn\n                tool_response = tool_responses.pop(0)\n                messages.append(\n                    {\n                        \"role\": \"tool\",\n                        \"tool_call_id\": tool_call.id,\n                        \"content\": tool_response[\"response\"],\n                    }\n                )\n            else:\n                assert assistant_message.content is not None, \"Expected content, but none received.\"\n                expected_answers = expected[\"answer\"]  # This is now a list\n                content_lower = assistant_message.content.lower()\n>               assert any(ans.lower() in content_lower for ans in expected_answers), (\n                    f\"Expected one of {expected_answers} in content, but got: '{assistant_message.content}'\"\n                )\nE               AssertionError: Expected one of ['sol'] in content, but got: '{\"name\": \"get_weather\", \"parameters\": {\"description\": \"Get the current weather\", \"parameters\": {\"type\": \"object\", \"properties\": {\"location\": {\"description\": \"The city and state (both required). e.g. San Francisco, CA.\", \"type\": \"string\"}}}}'\nE               assert False\nE                +  where False = any(<generator object test_chat_non_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f1acdad8970>)\n\ntests/verifications/openai_api/test_chat_completion.py:467: AssertionError"
       },
       "teardown": {
-        "duration": 0.00045741605572402477,
+        "duration": 0.00032442156225442886,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text]",
-      "lineno": 359,
+      "lineno": 380,
       "outcome": "failed",
       "keywords": [
         "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text]",
@@ -2433,34 +2463,34 @@
         "case_id": "weather_tool_then_text"
       },
       "setup": {
-        "duration": 0.011042665923014283,
+        "duration": 0.07257637288421392,
         "outcome": "passed"
       },
       "call": {
-        "duration": 3.372867708094418,
+        "duration": 1.1364115234464407,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 418,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 439,
           "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len((None or []))\n +    where None = ChatCompletionMessage(content='{\"name\": \"get_weather\", \"parameters\": {\"description\": \"Get the current weather\", \"parameters\": {\"type\": \"object\", \"properties\": {\"location\": {\"description\": \"The city and state (both required)\", \"type\": \"string\"}}}, \"required\": [\"location\"]}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 418,
+            "lineno": 439,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text]>>\nopenai_client = <openai.OpenAI object at 0x1065c6860>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n>           assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len((None or []))\nE            +    where None = ChatCompletionMessage(content='{\"name\": \"get_weather\", \"parameters\": {\"description\": \"Get the current weather\", \"parameters\": {\"type\": \"object\", \"properties\": {\"location\": {\"description\": \"The city and state (both required)\", \"type\": \"string\"}}}, \"required\": [\"location\"]}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:418: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text]>>\nopenai_client = <openai.OpenAI object at 0x7f1acda30c70>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n>           assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len((None or []))\nE            +    where None = ChatCompletionMessage(content='{\"name\": \"get_weather\", \"parameters\": {\"description\": \"Get the current weather\", \"parameters\": {\"type\": \"object\", \"properties\": {\"location\": {\"description\": \"The city and state (both required)\", \"type\": \"string\"}}}, \"required\": [\"location\"]}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError"
       },
       "teardown": {
-        "duration": 0.00042333384044468403,
+        "duration": 0.0003107702359557152,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool]",
-      "lineno": 359,
+      "lineno": 380,
       "outcome": "failed",
       "keywords": [
         "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool]",
@@ -2479,34 +2509,34 @@
         "case_id": "add_product_tool"
       },
       "setup": {
-        "duration": 0.01305404189042747,
+        "duration": 0.0716616166755557,
         "outcome": "passed"
       },
       "call": {
-        "duration": 3.5883425418287516,
+        "duration": 1.6755285635590553,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 418,
-          "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len((None or []))\n +    where None = ChatCompletionMessage(content='{\"name\": \"addProduct\", \"parameters\": {\"description\": \"Add a new product\", \"type\": \"object\", \"properties\": {\"name\": {\"description\": \"Name of the product\", \"type\": \"string\"}, \"price\": {\"description\": \"Price of the product\", \"type\": \"number\"}, \"inStock\": {\"description\": \"Availability status of the product\", \"type\": \"boolean\"}, \"tags\": {\"description\": \"List of product tags\", \"type\": \"array\", \"items\": {\"type\": \"string\"}}}}, \"required\": [\"name\", \"price\", \"inStock\", \"tags\"]}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 439,
+          "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len((None or []))\n +    where None = ChatCompletionMessage(content='{\"name\": \"addProduct\", \"parameters\": {\"name\": {\"type\": \"string\", \"value\": \"Widget\"}, \"description\": {\"type\": \"string\", \"value\": \"Name of the product\"}, \"price\": {\"type\": \"number\", \"value\": 19.99}, \"inStock\": {\"type\": \"boolean\", \"value\": true}, \"tags\": {\"type\": \"array\", \"value\": [\"new\", \"sale\"]}}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 418,
+            "lineno": 439,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool]>>\nopenai_client = <openai.OpenAI object at 0x106cb0790>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n>           assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len((None or []))\nE            +    where None = ChatCompletionMessage(content='{\"name\": \"addProduct\", \"parameters\": {\"description\": \"Add a new product\", \"type\": \"object\", \"properties\": {\"name\": {\"description\": \"Name of the product\", \"type\": \"string\"}, \"price\": {\"description\": \"Price of the product\", \"type\": \"number\"}, \"inStock\": {\"description\": \"Availability status of the product\", \"type\": \"boolean\"}, \"tags\": {\"description\": \"List of product tags\", \"type\": \"array\", \"items\": {\"type\": \"string\"}}}}, \"required\": [\"name\", \"price\", \"inStock\", \"tags\"]}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:418: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdb6f850>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n>           assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len((None or []))\nE            +    where None = ChatCompletionMessage(content='{\"name\": \"addProduct\", \"parameters\": {\"name\": {\"type\": \"string\", \"value\": \"Widget\"}, \"description\": {\"type\": \"string\", \"value\": \"Name of the product\"}, \"price\": {\"type\": \"number\", \"value\": 19.99}, \"inStock\": {\"type\": \"boolean\", \"value\": true}, \"tags\": {\"type\": \"array\", \"value\": [\"new\", \"sale\"]}}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError"
       },
       "teardown": {
-        "duration": 0.0005818749777972698,
+        "duration": 0.0003323536366224289,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool]",
-      "lineno": 359,
+      "lineno": 380,
       "outcome": "failed",
       "keywords": [
         "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool]",
@@ -2525,34 +2555,34 @@
         "case_id": "get_then_create_event_tool"
       },
       "setup": {
-        "duration": 0.01428320910781622,
+        "duration": 0.07031949236989021,
         "outcome": "passed"
       },
       "call": {
-        "duration": 15.402638916159049,
+        "duration": 2.363899651914835,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 418,
-          "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len((None or []))\n +    where None = ChatCompletionMessage(content='{\"name\": \"get_event\", \"parameters\": {\"date\": {\"description\": \"Date of the event in ISO format\", \"type\": \"string\"}, \"time\": {\"description\": \"Event Time (HH:MM)\", \"type\": \"string\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"description\": \"Date of the event in ISO format\", \"type\": \"string\"}, \"time\": {\"description\": \"Event Time (HH:MM)\", \"type\": \"string\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"description\": \"Date of the event in ISO format\", \"type\": \"string\", \"value\": \"2025-03-03\"}, \"time\": {\"description\": \"Event Time (HH:MM)\", \"type\": \"string\", \"value\": \"10:00\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"description\": \"Date of the event in ISO format\", \"type\": \"string\", \"value\": \"2025-03-03\"}, \"time\": {\"description\": \"Event Time (HH:MM)\", \"type\": \"string\", \"value\": \"10:00\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"description\": \"Date of the event in ISO format\", \"type\": \"string\", \"value\": \"2025-03-03\"}, \"time\": {\"description\": \"Event Time (HH:MM)\", \"type\": \"string\", \"value\": \"10:00\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"description\": \"Date of the event...: \"Date of the event in ISO format\", \"type\": \"string\", \"value\": \"2025-03-03\"}, \"time\": {\"description\": \"Event Time (HH:MM)\", \"type\": \"string\", \"value\": \"10:00\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"description\": \"Date of the event in ISO format\", \"type\": \"string\", \"value\": \"2025-03-03\"}, \"time\": {\"description\": \"Event Time (HH:MM)\", \"type\": \"string\", \"value\": \"10:00\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"description\": \"Date of the event in ISO format\", \"type\": \"string\", \"value\": \"2025-03-03\"}, \"time\": {\"description\": \"Event Time (HH:MM)\", \"type\": \"string\", \"value\": \"10:00\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"description\": \"Date of the event in ISO format\", \"type\": \"string\", \"value\": \"2025-03-03\"}, \"time\": {\"description\": \"Event Time (HH:MM)\", \"type\": \"string\", \"value\": \"10:00\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"description\": \"Date of the event in ISO format\", \"type\": \"string\", \"value\": \"2025-03-03\"}, \"time\": {\"description\": \"Event Time (HH:MM)\", \"type\": \"string\", \"value\": \"10:00\"}}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 439,
+          "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len((None or []))\n +    where None = ChatCompletionMessage(content='{\"name\": \"get_event\", \"parameters\": {\"date\": {\"date\": \"March 3rd\"}, \"time\": {\"time\": \"10 am\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"date\": \"2025-03-03\"}, \"time\": {\"time\": \"10:00\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"date\": \"2025-03-03\"}, \"time\": {\"time\": \"10:00\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"date\": \"2025-03-03\"}, \"time\": {\"time\": \"10:00\"}}}assistant\\n\\nThe function provided is not sufficient for me to answer the question.assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"date\": \"2025-03-03\"}, \"time\": {\"time\": \"10:00\"}}}assistant\\n\\nThe function provided is not sufficient for me to answer the question.', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 418,
+            "lineno": 439,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool]>>\nopenai_client = <openai.OpenAI object at 0x107332cb0>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n>           assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len((None or []))\nE            +    where None = ChatCompletionMessage(content='{\"name\": \"get_event\", \"parameters\": {\"date\": {\"description\": \"Date of the event in ISO format\", \"type\": \"string\"}, \"time\": {\"description\": \"Event Time (HH:MM)\", \"type\": \"string\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"description\": \"Date of the event in ISO format\", \"type\": \"string\"}, \"time\": {\"description\": \"Event Time (HH:MM)\", \"type\": \"string\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"description\": \"Date of the event in ISO format\", \"type\": \"string\", \"value\": \"2025-03-03\"}, \"time\": {\"description\": \"Event Time (HH:MM)\", \"type\": \"string\", \"value\": \"10:00\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"description\": \"Date of the event in ISO format\", \"type\": \"string\", \"value\": \"2025-03-03\"}, \"time\": {\"description\": \"Event Time (HH:MM)\", \"type\": \"string\", \"value\": \"10:00\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"description\": \"Date of the event in ISO format\", \"type\": \"string\", \"value\": \"2025-03-03\"}, \"time\": {\"description\": \"Event Time (HH:MM)\", \"type\": \"string\", \"value\": \"10:00\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"description\": \"Date of the event...: \"Date of the event in ISO format\", \"type\": \"string\", \"value\": \"2025-03-03\"}, \"time\": {\"description\": \"Event Time (HH:MM)\", \"type\": \"string\", \"value\": \"10:00\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"description\": \"Date of the event in ISO format\", \"type\": \"string\", \"value\": \"2025-03-03\"}, \"time\": {\"description\": \"Event Time (HH:MM)\", \"type\": \"string\", \"value\": \"10:00\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"description\": \"Date of the event in ISO format\", \"type\": \"string\", \"value\": \"2025-03-03\"}, \"time\": {\"description\": \"Event Time (HH:MM)\", \"type\": \"string\", \"value\": \"10:00\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"description\": \"Date of the event in ISO format\", \"type\": \"string\", \"value\": \"2025-03-03\"}, \"time\": {\"description\": \"Event Time (HH:MM)\", \"type\": \"string\", \"value\": \"10:00\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"description\": \"Date of the event in ISO format\", \"type\": \"string\", \"value\": \"2025-03-03\"}, \"time\": {\"description\": \"Event Time (HH:MM)\", \"type\": \"string\", \"value\": \"10:00\"}}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:418: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acda3dff0>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n>           assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len((None or []))\nE            +    where None = ChatCompletionMessage(content='{\"name\": \"get_event\", \"parameters\": {\"date\": {\"date\": \"March 3rd\"}, \"time\": {\"time\": \"10 am\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"date\": \"2025-03-03\"}, \"time\": {\"time\": \"10:00\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"date\": \"2025-03-03\"}, \"time\": {\"time\": \"10:00\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"date\": \"2025-03-03\"}, \"time\": {\"time\": \"10:00\"}}}assistant\\n\\nThe function provided is not sufficient for me to answer the question.assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"date\": \"2025-03-03\"}, \"time\": {\"time\": \"10:00\"}}}assistant\\n\\nThe function provided is not sufficient for me to answer the question.', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError"
       },
       "teardown": {
-        "duration": 0.0004401251208037138,
+        "duration": 0.0003245687112212181,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool]",
-      "lineno": 359,
+      "lineno": 380,
       "outcome": "failed",
       "keywords": [
         "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool]",
@@ -2571,34 +2601,34 @@
         "case_id": "compare_monthly_expense_tool"
       },
       "setup": {
-        "duration": 0.021037542028352618,
+        "duration": 0.07069017831236124,
         "outcome": "passed"
       },
       "call": {
-        "duration": 6.548705333843827,
+        "duration": 1.8757586162537336,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 418,
-          "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len((None or []))\n +    where None = ChatCompletionMessage(content='{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": {\"description\": \"Month of the year (1-12)\", \"type\": \"integer\"}, \"year\": {\"description\": \"Year\", \"type\": \"integer\"}}}assistant\\n\\n{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": {\"description\": \"Month of the year (1-12)\", \"type\": \"integer\"}, \"year\": {\"description\": \"Year\", \"type\": \"integer\"}}}assistant\\n\\n{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": {\"description\": \"Month of the year (1-12)\", \"type\": \"integer\"}, \"year\": {\"description\": \"Year\", \"type\": \"integer\"}}}assistant\\n\\n{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": {\"description\": \"Month of the year (1-12)\", \"type\": \"integer\", \"value\": 1}, \"year\": {\"description\": \"Year\", \"type\": \"integer\", \"value\": 2025}}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 439,
+          "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len((None or []))\n +    where None = ChatCompletionMessage(content='{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": {\"description\": \"Month of the year (1-12)\", \"type\": \"integer\"}, \"year\": {\"description\": \"Year\", \"type\": \"integer\"}}}assistant\\n\\n{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": {\"description\": \"Month of the year (1-12)\", \"type\": \"integer\"}, \"year\": {\"description\": \"Year\", \"type\": \"integer\"}}}assistant\\n\\n{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": {\"description\": \"Month of the year (1-12)\", \"type\": \"integer\", \"value\": 1}, \"year\": {\"description\": \"Year\", \"type\": \"integer\", \"value\": 2025}}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 418,
+            "lineno": 439,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool]>>\nopenai_client = <openai.OpenAI object at 0x107332740>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n>           assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len((None or []))\nE            +    where None = ChatCompletionMessage(content='{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": {\"description\": \"Month of the year (1-12)\", \"type\": \"integer\"}, \"year\": {\"description\": \"Year\", \"type\": \"integer\"}}}assistant\\n\\n{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": {\"description\": \"Month of the year (1-12)\", \"type\": \"integer\"}, \"year\": {\"description\": \"Year\", \"type\": \"integer\"}}}assistant\\n\\n{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": {\"description\": \"Month of the year (1-12)\", \"type\": \"integer\"}, \"year\": {\"description\": \"Year\", \"type\": \"integer\"}}}assistant\\n\\n{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": {\"description\": \"Month of the year (1-12)\", \"type\": \"integer\", \"value\": 1}, \"year\": {\"description\": \"Year\", \"type\": \"integer\", \"value\": 2025}}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:418: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acda3d5a0>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n>           assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len((None or []))\nE            +    where None = ChatCompletionMessage(content='{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": {\"description\": \"Month of the year (1-12)\", \"type\": \"integer\"}, \"year\": {\"description\": \"Year\", \"type\": \"integer\"}}}assistant\\n\\n{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": {\"description\": \"Month of the year (1-12)\", \"type\": \"integer\"}, \"year\": {\"description\": \"Year\", \"type\": \"integer\"}}}assistant\\n\\n{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": {\"description\": \"Month of the year (1-12)\", \"type\": \"integer\", \"value\": 1}, \"year\": {\"description\": \"Year\", \"type\": \"integer\", \"value\": 2025}}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError"
       },
       "teardown": {
-        "duration": 0.00035033305175602436,
+        "duration": 0.00030215736478567123,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool]",
-      "lineno": 359,
+      "lineno": 380,
       "outcome": "failed",
       "keywords": [
         "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool]",
@@ -2617,34 +2647,34 @@
         "case_id": "text_then_weather_tool"
       },
       "setup": {
-        "duration": 0.00768870790489018,
+        "duration": 0.07024750486016273,
         "outcome": "passed"
       },
       "call": {
-        "duration": 3.410787041997537,
+        "duration": 2.9532439298927784,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 418,
-          "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len((None or []))\n +    where None = ChatCompletionMessage(content='To answer the question about the weather in San Francisco, we can directly utilize the provided function `get_weather` as it matches the context of the query.\\n\\nThe function `get_weather` requires a `location` parameter. Given that San Francisco is a city and assuming California (CA) is the state, we can directly fit the query into the provided function format.\\n\\nHere\\'s the response in the required JSON format:\\n\\n```json\\n{\\n  \"name\": \"get_weather\",\\n  \"parameters\": {\\n    \"location\": \"San Francisco, CA\"\\n  }\\n}\\n```', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 467,
+          "message": "AssertionError: Expected one of ['sol'] in content, but got: 'Since there's no function defined to directly answer \"What's the name of the Sun in latin?\", I'll assume there's a general knowledge or information retrieval function available. Let's call it \"get_general_knowledge\". \n  \n  Here is a potential JSON response for a function call:\n  \n  {\"name\": \"get_general_knowledge\", \"parameters\": {\"query\": \"Latin name of the Sun\"}} \n  \n  However, the exact function and parameter names might vary based on the actual function definitions available. If we consider the given function \"get_weather\" and its parameters, it doesn't fit the prompt. Therefore, based on a hypothetical \"get_general_knowledge\" function, the response is provided. \n  \n  If the actual available functions were listed, a more accurate response could be provided. \n  \n  For the sake of the given prompt and assuming the presence of a \"get_general_knowledge\" function, the response is:\n  \n  {\"name\": \"get_general_knowledge\", \"parameters\": {\"query\": \"Latin name of the Sun\"}}'\nassert False\n +  where False = any(<generator object test_chat_non_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f1acd9d54d0>)"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 418,
+            "lineno": 467,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x107362740>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n>           assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len((None or []))\nE            +    where None = ChatCompletionMessage(content='To answer the question about the weather in San Francisco, we can directly utilize the provided function `get_weather` as it matches the context of the query.\\n\\nThe function `get_weather` requires a `location` parameter. Given that San Francisco is a city and assuming California (CA) is the state, we can directly fit the query into the provided function format.\\n\\nHere\\'s the response in the required JSON format:\\n\\n```json\\n{\\n  \"name\": \"get_weather\",\\n  \"parameters\": {\\n    \"location\": \"San Francisco, CA\"\\n  }\\n}\\n```', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:418: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acda3e230>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n            assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\n    \n            if num_tool_calls > 0:\n                tool_call = assistant_message.tool_calls[0]\n                assert tool_call.function.name == expected[\"tool_name\"], (\n                    f\"Expected tool '{expected['tool_name']}', got '{tool_call.function.name}'\"\n                )\n                # Parse the JSON string arguments before comparing\n                actual_arguments = json.loads(tool_call.function.arguments)\n                assert actual_arguments == expected[\"tool_arguments\"], (\n                    f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n                )\n    \n                # Prepare and append the tool response for the next turn\n                tool_response = tool_responses.pop(0)\n                messages.append(\n                    {\n                        \"role\": \"tool\",\n                        \"tool_call_id\": tool_call.id,\n                        \"content\": tool_response[\"response\"],\n                    }\n                )\n            else:\n                assert assistant_message.content is not None, \"Expected content, but none received.\"\n                expected_answers = expected[\"answer\"]  # This is now a list\n                content_lower = assistant_message.content.lower()\n>               assert any(ans.lower() in content_lower for ans in expected_answers), (\n                    f\"Expected one of {expected_answers} in content, but got: '{assistant_message.content}'\"\n                )\nE               AssertionError: Expected one of ['sol'] in content, but got: 'Since there's no function defined to directly answer \"What's the name of the Sun in latin?\", I'll assume there's a general knowledge or information retrieval function available. Let's call it \"get_general_knowledge\". \nE                 \nE                 Here is a potential JSON response for a function call:\nE                 \nE                 {\"name\": \"get_general_knowledge\", \"parameters\": {\"query\": \"Latin name of the Sun\"}} \nE                 \nE                 However, the exact function and parameter names might vary based on the actual function definitions available. If we consider the given function \"get_weather\" and its parameters, it doesn't fit the prompt. Therefore, based on a hypothetical \"get_general_knowledge\" function, the response is provided. \nE                 \nE                 If the actual available functions were listed, a more accurate response could be provided. \nE                 \nE                 For the sake of the given prompt and assuming the presence of a \"get_general_knowledge\" function, the response is:\nE                 \nE                 {\"name\": \"get_general_knowledge\", \"parameters\": {\"query\": \"Latin name of the Sun\"}}'\nE               assert False\nE                +  where False = any(<generator object test_chat_non_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f1acd9d54d0>)\n\ntests/verifications/openai_api/test_chat_completion.py:467: AssertionError"
       },
       "teardown": {
-        "duration": 0.0002946250606328249,
+        "duration": 0.00038253143429756165,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text]",
-      "lineno": 359,
+      "lineno": 380,
       "outcome": "failed",
       "keywords": [
         "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text]",
@@ -2663,34 +2693,34 @@
         "case_id": "weather_tool_then_text"
       },
       "setup": {
-        "duration": 0.009200166910886765,
+        "duration": 0.07193771284073591,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.5177558751311153,
+        "duration": 0.9909431086853147,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 418,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 439,
           "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len((None or []))\n +    where None = ChatCompletionMessage(content='{\"name\": \"get_weather\", \"parameters\": {\"location\": \"San Francisco, CA\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 418,
+            "lineno": 439,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text]>>\nopenai_client = <openai.OpenAI object at 0x106c6d2a0>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n>           assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len((None or []))\nE            +    where None = ChatCompletionMessage(content='{\"name\": \"get_weather\", \"parameters\": {\"location\": \"San Francisco, CA\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:418: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdb91570>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n>           assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len((None or []))\nE            +    where None = ChatCompletionMessage(content='{\"name\": \"get_weather\", \"parameters\": {\"location\": \"San Francisco, CA\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError"
       },
       "teardown": {
-        "duration": 0.00025020912289619446,
+        "duration": 0.0003658318892121315,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool]",
-      "lineno": 359,
+      "lineno": 380,
       "outcome": "failed",
       "keywords": [
         "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool]",
@@ -2709,34 +2739,34 @@
         "case_id": "add_product_tool"
       },
       "setup": {
-        "duration": 0.007124624913558364,
+        "duration": 0.0702557684853673,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.6132153749931604,
+        "duration": 0.8836336443200707,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 418,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 439,
           "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len((None or []))\n +    where None = ChatCompletionMessage(content='{\"name\": \"addProduct\", \"parameters\": {\"name\": \"Widget\", \"price\": 19.99, \"inStock\": true, \"tags\": [\"new\", \"sale\"]}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 418,
+            "lineno": 439,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool]>>\nopenai_client = <openai.OpenAI object at 0x1065fe590>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n>           assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len((None or []))\nE            +    where None = ChatCompletionMessage(content='{\"name\": \"addProduct\", \"parameters\": {\"name\": \"Widget\", \"price\": 19.99, \"inStock\": true, \"tags\": [\"new\", \"sale\"]}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:418: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdbec2e0>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n>           assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len((None or []))\nE            +    where None = ChatCompletionMessage(content='{\"name\": \"addProduct\", \"parameters\": {\"name\": \"Widget\", \"price\": 19.99, \"inStock\": true, \"tags\": [\"new\", \"sale\"]}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError"
       },
       "teardown": {
-        "duration": 0.0003745418507605791,
+        "duration": 0.00036840979009866714,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool]",
-      "lineno": 359,
+      "lineno": 380,
       "outcome": "failed",
       "keywords": [
         "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool]",
@@ -2755,34 +2785,34 @@
         "case_id": "get_then_create_event_tool"
       },
       "setup": {
-        "duration": 0.01410404103808105,
+        "duration": 0.07019469328224659,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.3956649999599904,
+        "duration": 7.394101745449007,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 418,
-          "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len((None or []))\n +    where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}<|uniquepaddingtoken557|>---\"\"\"\"\"\"\"\"\"---\" \" \" \"\"\" \" \" \"Interaction\"\"\\n\\nI am unable to execute this task as it exceeds the limitations of the functions I have at hand.\"', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 439,
+          "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len((None or []))\n +    where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}} \"\\n\\n{\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}assistant\\n\\n{\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}assistant\\n\\nimport json\\noutput = json.loads(\"{\\\\\"type\\\\\": \\\\\"function\\\\\", \\\\\"name\\\\\": \\\\\"get_event\\\\\", \\\\\"parameters\\\\\": {\\\\\"date\\\\\": \\\\\"2025-03-03\\\\\", \\\\\"time\\\\\": \\\\\"10:00\\\\\"}}\")\\nprint(output)assistant\\n\\nimport json\\n\\n# Define the input data\\ndata = \\'[{\"type\": \"function\", \"name\": \"create_event\", \"parameters\": {\"name\": \"Meeting\", \"date\": \"2025-03-03\", \"time\": \"10:00\", \"location\": \"Conference Room\", \"participants\": [\"John\", \"Jane\"]}}, {\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}]\\'\\n\\n# Parse the JSON string into a Python object\\njson_data = json.loads(data)\\n\\n# Print the parsed data\\nprint(json_data)assistant\\n\\nimport json\\n\\ndef check_meeting(data):\\n    # Parse the JSON string into a Python object\\n    json_data = json.loads(data)\\n\\n    # Check if there is a meeting on March 3rd at 10 am\\n    for item in json_data:\\n        if item[\\'type\\'] == \\'function\\' and item[\\'name\\'] == \\'get_event\\':\\n            date = item[\\'parameters\\'][\\'date\\']\\n            time = item[\\'parameters\\'][\\'time\\']\\n            # Assuming you have a function to check if there is a meeting at the given date and time\\n            # For simplicity, let\\'s assume the function is called \\'has_meeting\\'\\n            # if has_meeting(date, time):\\n            #     return \\'Yes\\'\\n            # else:\\n            #     return \\'No\\'\\n            return \\'Yes\\'  # For demonstration purposes, let\\'s assume there is a meeting\\n\\n    return \\'No\\'\\n\\ndata = \\'[{\"type\": \"function\", \"name\": \"create_event\", \"parameters\": {\"name\": \"Meeting\", \"date\": \"2025-03-03\", \"time\": \"10:00\", \"location\": \"Conference Room\", \"participants\": [\"John\", \"Jane\"]}}, {\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}]\\'\\nprint(check_meeting(data))assistant\\n\\nYes.', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 418,
+            "lineno": 439,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool]>>\nopenai_client = <openai.OpenAI object at 0x10736b250>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n>           assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len((None or []))\nE            +    where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}<|uniquepaddingtoken557|>---\"\"\"\"\"\"\"\"\"---\" \" \" \"\"\" \" \" \"Interaction\"\"\\n\\nI am unable to execute this task as it exceeds the limitations of the functions I have at hand.\"', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:418: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdd76110>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n>           assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len((None or []))\nE            +    where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}} \"\\n\\n{\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}assistant\\n\\n{\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}assistant\\n\\nimport json\\noutput = json.loads(\"{\\\\\"type\\\\\": \\\\\"function\\\\\", \\\\\"name\\\\\": \\\\\"get_event\\\\\", \\\\\"parameters\\\\\": {\\\\\"date\\\\\": \\\\\"2025-03-03\\\\\", \\\\\"time\\\\\": \\\\\"10:00\\\\\"}}\")\\nprint(output)assistant\\n\\nimport json\\n\\n# Define the input data\\ndata = \\'[{\"type\": \"function\", \"name\": \"create_event\", \"parameters\": {\"name\": \"Meeting\", \"date\": \"2025-03-03\", \"time\": \"10:00\", \"location\": \"Conference Room\", \"participants\": [\"John\", \"Jane\"]}}, {\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}]\\'\\n\\n# Parse the JSON string into a Python object\\njson_data = json.loads(data)\\n\\n# Print the parsed data\\nprint(json_data)assistant\\n\\nimport json\\n\\ndef check_meeting(data):\\n    # Parse the JSON string into a Python object\\n    json_data = json.loads(data)\\n\\n    # Check if there is a meeting on March 3rd at 10 am\\n    for item in json_data:\\n        if item[\\'type\\'] == \\'function\\' and item[\\'name\\'] == \\'get_event\\':\\n            date = item[\\'parameters\\'][\\'date\\']\\n            time = item[\\'parameters\\'][\\'time\\']\\n            # Assuming you have a function to check if there is a meeting at the given date and time\\n            # For simplicity, let\\'s assume the function is called \\'has_meeting\\'\\n            # if has_meeting(date, time):\\n            #     return \\'Yes\\'\\n            # else:\\n            #     return \\'No\\'\\n            return \\'Yes\\'  # For demonstration purposes, let\\'s assume there is a meeting\\n\\n    return \\'No\\'\\n\\ndata = \\'[{\"type\": \"function\", \"name\": \"create_event\", \"parameters\": {\"name\": \"Meeting\", \"date\": \"2025-03-03\", \"time\": \"10:00\", \"location\": \"Conference Room\", \"participants\": [\"John\", \"Jane\"]}}, {\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}]\\'\\nprint(check_meeting(data))assistant\\n\\nYes.', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError"
       },
       "teardown": {
-        "duration": 0.00041033304296433926,
+        "duration": 0.0003475993871688843,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool]",
-      "lineno": 359,
+      "lineno": 380,
       "outcome": "failed",
       "keywords": [
         "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool]",
@@ -2801,34 +2831,34 @@
         "case_id": "compare_monthly_expense_tool"
       },
       "setup": {
-        "duration": 0.027331124991178513,
+        "duration": 0.07140176557004452,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.465563999954611,
+        "duration": 1.5649437978863716,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 418,
-          "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len((None or []))\n +    where None = ChatCompletionMessage(content='{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": 1, \"year\": 2024}}\"\\n\\nThe provided JSON describes a function `getMonthlyExpenseSummary` that takes two parameters: `month` and `year`. The prompt asks for the monthly expense in January of this year. Assuming the current year is 2024, we can fill in the appropriate values for `month` and `year`.\\n\\nThe value for `month` should be `1` (January is the first month), and the value for `year` should be `2024`.\\n\\nTherefore, the appropriate function call with its arguments is:assistant\\n\\nimport datetime\\n\\n# Get the current year\\ncurrent_year = datetime.datetime.now().year\\n\\n# The function call with its arguments\\nprint({\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": 1, \"year\": current_year}})\"{\\\\\"name\\\\\": \\\\\"getMonthlyExpenseSummary\\\\\", \\\\\"parameters\\\\\": {\\\\\"month\\\\\": 1, \\\\\"year\\\\\": 2024}}\"assistant\\n\\nThe final response is: {\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": 1, \"year\": 2024}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 439,
+          "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len((None or []))\n +    where None = ChatCompletionMessage(content='{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": 1, \"year\": 2024}}\"\" \"\" \" \"\"\"\"\"\"\"\"\"\"\"\"\" \"\" \"\"\" \"}\",\"\" \" \"}\",\"\" \" \"}\",\"\" \" \"{\" \"name\" \": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": 1, \"year\": 2024}}\"', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 418,
+            "lineno": 439,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool]>>\nopenai_client = <openai.OpenAI object at 0x107305840>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n>           assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len((None or []))\nE            +    where None = ChatCompletionMessage(content='{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": 1, \"year\": 2024}}\"\\n\\nThe provided JSON describes a function `getMonthlyExpenseSummary` that takes two parameters: `month` and `year`. The prompt asks for the monthly expense in January of this year. Assuming the current year is 2024, we can fill in the appropriate values for `month` and `year`.\\n\\nThe value for `month` should be `1` (January is the first month), and the value for `year` should be `2024`.\\n\\nTherefore, the appropriate function call with its arguments is:assistant\\n\\nimport datetime\\n\\n# Get the current year\\ncurrent_year = datetime.datetime.now().year\\n\\n# The function call with its arguments\\nprint({\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": 1, \"year\": current_year}})\"{\\\\\"name\\\\\": \\\\\"getMonthlyExpenseSummary\\\\\", \\\\\"parameters\\\\\": {\\\\\"month\\\\\": 1, \\\\\"year\\\\\": 2024}}\"assistant\\n\\nThe final response is: {\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": 1, \"year\": 2024}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:418: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acd9b4640>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n>           assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len((None or []))\nE            +    where None = ChatCompletionMessage(content='{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": 1, \"year\": 2024}}\"\" \"\" \" \"\"\"\"\"\"\"\"\"\"\"\"\" \"\" \"\"\" \"}\",\"\" \" \"}\",\"\" \" \"}\",\"\" \" \"{\" \"name\" \": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": 1, \"year\": 2024}}\"', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError"
       },
       "teardown": {
-        "duration": 0.0005783340893685818,
+        "duration": 0.00034684035927057266,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool]",
-      "lineno": 450,
+      "lineno": 471,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool]",
@@ -2847,34 +2877,34 @@
         "case_id": "text_then_weather_tool"
       },
       "setup": {
-        "duration": 0.016343542141839862,
+        "duration": 0.07161083538085222,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.6930254579056054,
+        "duration": 0.972024847753346,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 529,
-          "message": "AssertionError: Expected one of ['sol'] in content, but got: 'I cannot accomplish this task as it requires capabilities beyond those offered by the provided functions.'\nassert False\n +  where False = any(<generator object test_chat_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x10738e0a0>)"
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 550,
+          "message": "AssertionError: Expected one of ['sol'] in content, but got: 'I cannot perform this task as it requires additional functionality that is not available in the given functions.'\nassert False\n +  where False = any(<generator object test_chat_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f1acd9d4510>)"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 529,
+            "lineno": 550,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x10736b820>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\n    \n            if num_tool_calls > 0:\n                # Use the first accumulated tool call for assertion\n                tool_call = accumulated_tool_calls[0]\n                assert tool_call[\"function\"][\"name\"] == expected[\"tool_name\"], (\n                    f\"Expected tool '{expected['tool_name']}', got '{tool_call['function']['name']}'\"\n                )\n                # Parse the accumulated arguments string for comparison\n                actual_arguments = json.loads(tool_call[\"function\"][\"arguments\"])\n                assert actual_arguments == expected[\"tool_arguments\"], (\n                    f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n                )\n    \n                # Prepare and append the tool response for the next turn\n                tool_response = tool_responses.pop(0)\n                messages.append(\n                    {\n                        \"role\": \"tool\",\n                        \"tool_call_id\": tool_call[\"id\"],\n                        \"content\": tool_response[\"response\"],\n                    }\n                )\n            else:\n                assert accumulated_content is not None and accumulated_content != \"\", \"Expected content, but none received.\"\n                expected_answers = expected[\"answer\"]\n                content_lower = accumulated_content.lower()\n>               assert any(ans.lower() in content_lower for ans in expected_answers), (\n                    f\"Expected one of {expected_answers} in content, but got: '{accumulated_content}'\"\n                )\nE               AssertionError: Expected one of ['sol'] in content, but got: 'I cannot accomplish this task as it requires capabilities beyond those offered by the provided functions.'\nE               assert False\nE                +  where False = any(<generator object test_chat_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x10738e0a0>)\n\ntests/verifications/openai_api/test_chat_completion.py:529: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdab0c10>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\n    \n            if num_tool_calls > 0:\n                # Use the first accumulated tool call for assertion\n                tool_call = accumulated_tool_calls[0]\n                assert tool_call[\"function\"][\"name\"] == expected[\"tool_name\"], (\n                    f\"Expected tool '{expected['tool_name']}', got '{tool_call['function']['name']}'\"\n                )\n                # Parse the accumulated arguments string for comparison\n                actual_arguments = json.loads(tool_call[\"function\"][\"arguments\"])\n                assert actual_arguments == expected[\"tool_arguments\"], (\n                    f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n                )\n    \n                # Prepare and append the tool response for the next turn\n                tool_response = tool_responses.pop(0)\n                messages.append(\n                    {\n                        \"role\": \"tool\",\n                        \"tool_call_id\": tool_call[\"id\"],\n                        \"content\": tool_response[\"response\"],\n                    }\n                )\n            else:\n                assert accumulated_content is not None and accumulated_content != \"\", \"Expected content, but none received.\"\n                expected_answers = expected[\"answer\"]\n                content_lower = accumulated_content.lower()\n>               assert any(ans.lower() in content_lower for ans in expected_answers), (\n                    f\"Expected one of {expected_answers} in content, but got: '{accumulated_content}'\"\n                )\nE               AssertionError: Expected one of ['sol'] in content, but got: 'I cannot perform this task as it requires additional functionality that is not available in the given functions.'\nE               assert False\nE                +  where False = any(<generator object test_chat_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f1acd9d4510>)\n\ntests/verifications/openai_api/test_chat_completion.py:550: AssertionError"
       },
       "teardown": {
-        "duration": 0.00024741701781749725,
+        "duration": 0.0003080591559410095,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text]",
-      "lineno": 450,
+      "lineno": 471,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text]",
@@ -2893,34 +2923,34 @@
         "case_id": "weather_tool_then_text"
       },
       "setup": {
-        "duration": 0.007791666081175208,
+        "duration": 0.07267874106764793,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.4420052089262754,
+        "duration": 0.632216920144856,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 500,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 521,
           "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len(([] or []))"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 500,
+            "lineno": 521,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text]>>\nopenai_client = <openai.OpenAI object at 0x10657b8b0>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n>           assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:500: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdbfbc70>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n>           assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError"
       },
       "teardown": {
-        "duration": 0.000628374982625246,
+        "duration": 0.0003350367769598961,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool]",
-      "lineno": 450,
+      "lineno": 471,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool]",
@@ -2939,34 +2969,34 @@
         "case_id": "add_product_tool"
       },
       "setup": {
-        "duration": 0.013015333097428083,
+        "duration": 0.0707720061764121,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.6754761249758303,
+        "duration": 0.9429405080154538,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 500,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 521,
           "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len(([] or []))"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 500,
+            "lineno": 521,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool]>>\nopenai_client = <openai.OpenAI object at 0x107354ee0>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n>           assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:500: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdac0130>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n>           assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError"
       },
       "teardown": {
-        "duration": 0.000581083819270134,
+        "duration": 0.0002858620136976242,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool]",
-      "lineno": 450,
+      "lineno": 471,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool]",
@@ -2985,34 +3015,34 @@
         "case_id": "get_then_create_event_tool"
       },
       "setup": {
-        "duration": 0.0128930420614779,
+        "duration": 0.06923680566251278,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.367436750093475,
+        "duration": 0.7107308339327574,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 500,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 521,
           "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len(([] or []))"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 500,
+            "lineno": 521,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool]>>\nopenai_client = <openai.OpenAI object at 0x1065fc550>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n>           assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:500: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdaaeb60>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n>           assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError"
       },
       "teardown": {
-        "duration": 0.00024812505580484867,
+        "duration": 0.0003181472420692444,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool]",
-      "lineno": 450,
+      "lineno": 471,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool]",
@@ -3031,34 +3061,34 @@
         "case_id": "compare_monthly_expense_tool"
       },
       "setup": {
-        "duration": 0.006677915807813406,
+        "duration": 0.07021687645465136,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.5142939588986337,
+        "duration": 0.7717038569971919,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 500,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 521,
           "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len(([] or []))"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 500,
+            "lineno": 521,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool]>>\nopenai_client = <openai.OpenAI object at 0x106c596f0>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n>           assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:500: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdbd04f0>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n>           assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError"
       },
       "teardown": {
-        "duration": 0.0002248329110443592,
+        "duration": 0.00030398648232221603,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool]",
-      "lineno": 450,
+      "lineno": 471,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool]",
@@ -3077,34 +3107,34 @@
         "case_id": "text_then_weather_tool"
       },
       "setup": {
-        "duration": 0.008392333984375,
+        "duration": 0.07320436742156744,
         "outcome": "passed"
       },
       "call": {
-        "duration": 9.519045708002523,
+        "duration": 1.2869794629514217,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 500,
-          "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len(([] or []))"
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 550,
+          "message": "AssertionError: Expected one of ['sol'] in content, but got: '{\"name\": \"get_weather\", \"parameters\": {\"description\": \"Get the current weather\", \"parameters\": {\"type\": \"object\", \"properties\": {\"location\": {\"description\": \"The city and state (both required) (e.g. San Francisco, CA.\", \"type\": \"string\"}}}, \"required\": [\"location\"]}}'\nassert False\n +  where False = any(<generator object test_chat_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f1acd9b8e40>)"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 500,
+            "lineno": 550,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x106c9f490>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n>           assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:500: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acda57a60>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\n    \n            if num_tool_calls > 0:\n                # Use the first accumulated tool call for assertion\n                tool_call = accumulated_tool_calls[0]\n                assert tool_call[\"function\"][\"name\"] == expected[\"tool_name\"], (\n                    f\"Expected tool '{expected['tool_name']}', got '{tool_call['function']['name']}'\"\n                )\n                # Parse the accumulated arguments string for comparison\n                actual_arguments = json.loads(tool_call[\"function\"][\"arguments\"])\n                assert actual_arguments == expected[\"tool_arguments\"], (\n                    f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n                )\n    \n                # Prepare and append the tool response for the next turn\n                tool_response = tool_responses.pop(0)\n                messages.append(\n                    {\n                        \"role\": \"tool\",\n                        \"tool_call_id\": tool_call[\"id\"],\n                        \"content\": tool_response[\"response\"],\n                    }\n                )\n            else:\n                assert accumulated_content is not None and accumulated_content != \"\", \"Expected content, but none received.\"\n                expected_answers = expected[\"answer\"]\n                content_lower = accumulated_content.lower()\n>               assert any(ans.lower() in content_lower for ans in expected_answers), (\n                    f\"Expected one of {expected_answers} in content, but got: '{accumulated_content}'\"\n                )\nE               AssertionError: Expected one of ['sol'] in content, but got: '{\"name\": \"get_weather\", \"parameters\": {\"description\": \"Get the current weather\", \"parameters\": {\"type\": \"object\", \"properties\": {\"location\": {\"description\": \"The city and state (both required) (e.g. San Francisco, CA.\", \"type\": \"string\"}}}, \"required\": [\"location\"]}}'\nE               assert False\nE                +  where False = any(<generator object test_chat_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f1acd9b8e40>)\n\ntests/verifications/openai_api/test_chat_completion.py:550: AssertionError"
       },
       "teardown": {
-        "duration": 0.00019570882432162762,
+        "duration": 0.0003076540306210518,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text]",
-      "lineno": 450,
+      "lineno": 471,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text]",
@@ -3123,34 +3153,34 @@
         "case_id": "weather_tool_then_text"
       },
       "setup": {
-        "duration": 0.009688499849289656,
+        "duration": 0.0732570867985487,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.9869634578935802,
+        "duration": 0.9204158475622535,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 500,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 521,
           "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len(([] or []))"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 500,
+            "lineno": 521,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text]>>\nopenai_client = <openai.OpenAI object at 0x1065bdae0>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n>           assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:500: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdaaf1c0>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n>           assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError"
       },
       "teardown": {
-        "duration": 0.0002135841641575098,
+        "duration": 0.000310627743601799,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool]",
-      "lineno": 450,
+      "lineno": 471,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool]",
@@ -3169,34 +3199,34 @@
         "case_id": "add_product_tool"
       },
       "setup": {
-        "duration": 0.007028624881058931,
+        "duration": 0.07232664246112108,
         "outcome": "passed"
       },
       "call": {
-        "duration": 4.688094082986936,
+        "duration": 3.829266043379903,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 500,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 521,
           "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len(([] or []))"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 500,
+            "lineno": 521,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool]>>\nopenai_client = <openai.OpenAI object at 0x1073ec250>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n>           assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:500: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdbbc220>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n>           assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError"
       },
       "teardown": {
-        "duration": 0.00026954198256134987,
+        "duration": 0.00034091807901859283,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool]",
-      "lineno": 450,
+      "lineno": 471,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool]",
@@ -3215,34 +3245,34 @@
         "case_id": "get_then_create_event_tool"
       },
       "setup": {
-        "duration": 0.006646708119660616,
+        "duration": 0.07045515719801188,
         "outcome": "passed"
       },
       "call": {
-        "duration": 15.899775499943644,
+        "duration": 6.550140863284469,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 500,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 521,
           "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len(([] or []))"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 500,
+            "lineno": 521,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool]>>\nopenai_client = <openai.OpenAI object at 0x1065c6f20>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n>           assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:500: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdc0d3f0>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n>           assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError"
       },
       "teardown": {
-        "duration": 0.0004787910729646683,
+        "duration": 0.0003092316910624504,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool]",
-      "lineno": 450,
+      "lineno": 471,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool]",
@@ -3261,34 +3291,34 @@
         "case_id": "compare_monthly_expense_tool"
       },
       "setup": {
-        "duration": 0.016487207962200046,
+        "duration": 0.07400601450353861,
         "outcome": "passed"
       },
       "call": {
-        "duration": 3.922360667027533,
+        "duration": 3.142588397487998,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 500,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 521,
           "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len(([] or []))"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 500,
+            "lineno": 521,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool]>>\nopenai_client = <openai.OpenAI object at 0x1073791e0>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n>           assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:500: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdb52ce0>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n>           assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError"
       },
       "teardown": {
-        "duration": 0.00043979217298328876,
+        "duration": 0.0003124792128801346,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool]",
-      "lineno": 450,
+      "lineno": 471,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool]",
@@ -3307,34 +3337,34 @@
         "case_id": "text_then_weather_tool"
       },
       "setup": {
-        "duration": 0.013401374919340014,
+        "duration": 0.07049713470041752,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.2223200001753867,
+        "duration": 4.074657499790192,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 529,
-          "message": "AssertionError: Expected one of ['sol'] in content, but got: '{\"name\": \"get_weather\", \"parameters\": {\"location\": \"Rome, Italy\"}} is not the best response here.\n  \n  Since we don't have a function that directly answers \"What's the name of the Sun in latin?\", a more appropriate response would be to say that there's no function available to answer this question. However, to follow the given format and assuming there's an implicit expectation to still attempt an answer or provide a closest match:\n  \n  {\"name\": \"get_weather\", \"parameters\": {\"location\": \"Invalid input, no relation to weather\"}} is still not a valid response.\n  \n  A correct response according to the given constraints isn't feasible. However, to fit the required format and indicating a function that could be related or a default, if there was a \"get_fact\" function:\n  \n  {\"name\": \"get_fact\", \"parameters\": {\"query\": \"Latin name of the Sun\"}} \n  \n  But since \"get_fact\" isn't defined in the prompt, and sticking strictly to the given function:\n  \n  There isn't a proper function to call.\n  \n  For the sake of compliance, let's assume an unrelated function was to be used due to lack of information.\n  \n  The best course of action is to indicate that the provided function definitions don't directly support answering the question about the Latin name of the Sun.'\nassert False\n +  where False = any(<generator object test_chat_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x1074b9bd0>)"
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 550,
+          "message": "AssertionError: Expected one of ['sol'] in content, but got: 'Since the provided text describes a JSON schema for a function call to get the weather, and the prompt asks for the name of the Sun in Latin, we need to identify a suitable function that can provide this information. However, the given schema is for a \"get_weather\" function, which doesn't directly relate to the question about the Sun's name in Latin.\n  \n  Assuming there's another function available that can provide information about celestial bodies or their names in different languages, we might look for something like \"get_celestial_body_info\" or a similar function.\n  \n  However, based on the given format and the information provided, it seems there's an implication that we should directly provide a response in the specified JSON format for a hypothetical or related function. Let's assume a function named \"get_celestial_body_name\" that takes parameters like \"body\" and \"language\".\n  \n  Given the constraint of the format and assuming a function that fits, we might construct a response like:\n  \n  ```json\n  {\n    \"name\": \"get_celestial_body_name\",\n    \"parameters\": {\n      \"body\": \"Sun\",\n      \"language\": \"Latin\"\n    }\n  }\n  ```\n  \n  This response implies the existence of a function \"get_celestial_body_name\" that can take the name of a celestial body and a language as input and return the name of the celestial body in that language. \n  \n  So, the response is:\n  {\"name\": \"get_celestial_body_name\", \"parameters\": {\"body\": \"Sun\", \"language\": \"Latin\"}}'\nassert False\n +  where False = any(<generator object test_chat_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f1acdaba030>)"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 529,
+            "lineno": 550,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x107307f10>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\n    \n            if num_tool_calls > 0:\n                # Use the first accumulated tool call for assertion\n                tool_call = accumulated_tool_calls[0]\n                assert tool_call[\"function\"][\"name\"] == expected[\"tool_name\"], (\n                    f\"Expected tool '{expected['tool_name']}', got '{tool_call['function']['name']}'\"\n                )\n                # Parse the accumulated arguments string for comparison\n                actual_arguments = json.loads(tool_call[\"function\"][\"arguments\"])\n                assert actual_arguments == expected[\"tool_arguments\"], (\n                    f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n                )\n    \n                # Prepare and append the tool response for the next turn\n                tool_response = tool_responses.pop(0)\n                messages.append(\n                    {\n                        \"role\": \"tool\",\n                        \"tool_call_id\": tool_call[\"id\"],\n                        \"content\": tool_response[\"response\"],\n                    }\n                )\n            else:\n                assert accumulated_content is not None and accumulated_content != \"\", \"Expected content, but none received.\"\n                expected_answers = expected[\"answer\"]\n                content_lower = accumulated_content.lower()\n>               assert any(ans.lower() in content_lower for ans in expected_answers), (\n                    f\"Expected one of {expected_answers} in content, but got: '{accumulated_content}'\"\n                )\nE               AssertionError: Expected one of ['sol'] in content, but got: '{\"name\": \"get_weather\", \"parameters\": {\"location\": \"Rome, Italy\"}} is not the best response here.\nE                 \nE                 Since we don't have a function that directly answers \"What's the name of the Sun in latin?\", a more appropriate response would be to say that there's no function available to answer this question. However, to follow the given format and assuming there's an implicit expectation to still attempt an answer or provide a closest match:\nE                 \nE                 {\"name\": \"get_weather\", \"parameters\": {\"location\": \"Invalid input, no relation to weather\"}} is still not a valid response.\nE                 \nE                 A correct response according to the given constraints isn't feasible. However, to fit the required format and indicating a function that could be related or a default, if there was a \"get_fact\" function:\nE                 \nE                 {\"name\": \"get_fact\", \"parameters\": {\"query\": \"Latin name of the Sun\"}} \nE                 \nE                 But since \"get_fact\" isn't defined in the prompt, and sticking strictly to the given function:\nE                 \nE                 There isn't a proper function to call.\nE                 \nE                 For the sake of compliance, let's assume an unrelated function was to be used due to lack of information.\nE                 \nE                 The best course of action is to indicate that the provided function definitions don't directly support answering the question about the Latin name of the Sun.'\nE               assert False\nE                +  where False = any(<generator object test_chat_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x1074b9bd0>)\n\ntests/verifications/openai_api/test_chat_completion.py:529: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acda32d70>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\n    \n            if num_tool_calls > 0:\n                # Use the first accumulated tool call for assertion\n                tool_call = accumulated_tool_calls[0]\n                assert tool_call[\"function\"][\"name\"] == expected[\"tool_name\"], (\n                    f\"Expected tool '{expected['tool_name']}', got '{tool_call['function']['name']}'\"\n                )\n                # Parse the accumulated arguments string for comparison\n                actual_arguments = json.loads(tool_call[\"function\"][\"arguments\"])\n                assert actual_arguments == expected[\"tool_arguments\"], (\n                    f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n                )\n    \n                # Prepare and append the tool response for the next turn\n                tool_response = tool_responses.pop(0)\n                messages.append(\n                    {\n                        \"role\": \"tool\",\n                        \"tool_call_id\": tool_call[\"id\"],\n                        \"content\": tool_response[\"response\"],\n                    }\n                )\n            else:\n                assert accumulated_content is not None and accumulated_content != \"\", \"Expected content, but none received.\"\n                expected_answers = expected[\"answer\"]\n                content_lower = accumulated_content.lower()\n>               assert any(ans.lower() in content_lower for ans in expected_answers), (\n                    f\"Expected one of {expected_answers} in content, but got: '{accumulated_content}'\"\n                )\nE               AssertionError: Expected one of ['sol'] in content, but got: 'Since the provided text describes a JSON schema for a function call to get the weather, and the prompt asks for the name of the Sun in Latin, we need to identify a suitable function that can provide this information. However, the given schema is for a \"get_weather\" function, which doesn't directly relate to the question about the Sun's name in Latin.\nE                 \nE                 Assuming there's another function available that can provide information about celestial bodies or their names in different languages, we might look for something like \"get_celestial_body_info\" or a similar function.\nE                 \nE                 However, based on the given format and the information provided, it seems there's an implication that we should directly provide a response in the specified JSON format for a hypothetical or related function. Let's assume a function named \"get_celestial_body_name\" that takes parameters like \"body\" and \"language\".\nE                 \nE                 Given the constraint of the format and assuming a function that fits, we might construct a response like:\nE                 \nE                 ```json\nE                 {\nE                   \"name\": \"get_celestial_body_name\",\nE                   \"parameters\": {\nE                     \"body\": \"Sun\",\nE                     \"language\": \"Latin\"\nE                   }\nE                 }\nE                 ```\nE                 \nE                 This response implies the existence of a function \"get_celestial_body_name\" that can take the name of a celestial body and a language as input and return the name of the celestial body in that language. \nE                 \nE                 So, the response is:\nE                 {\"name\": \"get_celestial_body_name\", \"parameters\": {\"body\": \"Sun\", \"language\": \"Latin\"}}'\nE               assert False\nE                +  where False = any(<generator object test_chat_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f1acdaba030>)\n\ntests/verifications/openai_api/test_chat_completion.py:550: AssertionError"
       },
       "teardown": {
-        "duration": 0.00047154095955193043,
+        "duration": 0.00031174439936876297,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text]",
-      "lineno": 450,
+      "lineno": 471,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text]",
@@ -3353,34 +3383,34 @@
         "case_id": "weather_tool_then_text"
       },
       "setup": {
-        "duration": 0.01485933386720717,
+        "duration": 0.07156828418374062,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.6193458330817521,
+        "duration": 0.6585372854024172,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 500,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 521,
           "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len(([] or []))"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 500,
+            "lineno": 521,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text]>>\nopenai_client = <openai.OpenAI object at 0x1073a7e50>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n>           assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:500: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdb6cca0>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n>           assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError"
       },
       "teardown": {
-        "duration": 0.000300833024084568,
+        "duration": 0.0003233151510357857,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool]",
-      "lineno": 450,
+      "lineno": 471,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool]",
@@ -3399,34 +3429,34 @@
         "case_id": "add_product_tool"
       },
       "setup": {
-        "duration": 0.012684250017628074,
+        "duration": 0.07135927956551313,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.5173197500407696,
+        "duration": 1.0483367526903749,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 500,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 521,
           "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len(([] or []))"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 500,
+            "lineno": 521,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool]>>\nopenai_client = <openai.OpenAI object at 0x107304d60>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n>           assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:500: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acda577c0>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n>           assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError"
       },
       "teardown": {
-        "duration": 0.00047266692854464054,
+        "duration": 0.00028971116989851,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool]",
-      "lineno": 450,
+      "lineno": 471,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool]",
@@ -3445,34 +3475,34 @@
         "case_id": "get_then_create_event_tool"
       },
       "setup": {
-        "duration": 0.01282945810817182,
+        "duration": 0.07051362749189138,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.990155333885923,
+        "duration": 4.592376064509153,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 500,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 521,
           "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len(([] or []))"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 500,
+            "lineno": 521,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool]>>\nopenai_client = <openai.OpenAI object at 0x106c9e470>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n>           assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:500: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acd9f5f30>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n>           assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError"
       },
       "teardown": {
-        "duration": 0.00027558300644159317,
+        "duration": 0.00029074493795633316,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool]",
-      "lineno": 450,
+      "lineno": 471,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool]",
@@ -3491,31 +3521,231 @@
         "case_id": "compare_monthly_expense_tool"
       },
       "setup": {
-        "duration": 0.008087666006758809,
+        "duration": 0.07347700279206038,
         "outcome": "passed"
       },
       "call": {
-        "duration": 3.6024099169299006,
+        "duration": 1.5335856154561043,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 500,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 521,
           "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n +  where 0 = len(([] or []))"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 500,
+            "lineno": 521,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool]>>\nopenai_client = <openai.OpenAI object at 0x107325060>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n>           assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:500: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdbd1360>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n>           assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\nE           AssertionError: Expected 1 tool calls, but got 0\nE           assert 0 == 1\nE            +  where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError"
       },
       "teardown": {
-        "duration": 0.0010035419836640358,
+        "duration": 0.0003180811181664467,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama-v3p3-70b-instruct-stream=False]",
+      "lineno": 554,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama-v3p3-70b-instruct-stream=False]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama-v3p3-70b-instruct-stream=False",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
+        "case_id": "stream=False"
+      },
+      "setup": {
+        "duration": 0.07250582799315453,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.00022417306900024414,
+        "outcome": "skipped",
+        "longrepr": "('/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 561, 'Skipped: Skipping test_chat_multi_turn_multiple_images for model accounts/fireworks/models/llama-v3p3-70b-instruct on provider fireworks based on config.')"
+      },
+      "teardown": {
+        "duration": 0.0036543207243084908,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama-v3p3-70b-instruct-stream=True]",
+      "lineno": 554,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama-v3p3-70b-instruct-stream=True]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama-v3p3-70b-instruct-stream=True",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
+        "case_id": "stream=True"
+      },
+      "setup": {
+        "duration": 0.07320290431380272,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.0002203313633799553,
+        "outcome": "skipped",
+        "longrepr": "('/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 561, 'Skipped: Skipping test_chat_multi_turn_multiple_images for model accounts/fireworks/models/llama-v3p3-70b-instruct on provider fireworks based on config.')"
+      },
+      "teardown": {
+        "duration": 0.00035103876143693924,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-scout-instruct-basic-stream=False]",
+      "lineno": 554,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-scout-instruct-basic-stream=False]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama4-scout-instruct-basic-stream=False",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama4-scout-instruct-basic",
+        "case_id": "stream=False"
+      },
+      "setup": {
+        "duration": 0.07001570798456669,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 6.779760396108031,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00023057777434587479,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-scout-instruct-basic-stream=True]",
+      "lineno": 554,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-scout-instruct-basic-stream=True]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama4-scout-instruct-basic-stream=True",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama4-scout-instruct-basic",
+        "case_id": "stream=True"
+      },
+      "setup": {
+        "duration": 0.07039657514542341,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 4.335017805919051,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00023656059056520462,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-maverick-instruct-basic-stream=False]",
+      "lineno": 554,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-maverick-instruct-basic-stream=False]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama4-maverick-instruct-basic-stream=False",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
+        "case_id": "stream=False"
+      },
+      "setup": {
+        "duration": 0.07107001543045044,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 5.857806807383895,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00028312671929597855,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-maverick-instruct-basic-stream=True]",
+      "lineno": 554,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-maverick-instruct-basic-stream=True]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama4-maverick-instruct-basic-stream=True",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
+        "case_id": "stream=True"
+      },
+      "setup": {
+        "duration": 0.07257402781397104,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 5.412369452416897,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0018147435039281845,
         "outcome": "passed"
       }
     }
   ],
-  "run_timestamp": 1744679046
+  "run_timestamp": 1744918193
 }
diff --git a/tests/verifications/test_results/meta_reference.json b/tests/verifications/test_results/meta_reference.json
new file mode 100644
index 000000000..9f9a6de82
--- /dev/null
+++ b/tests/verifications/test_results/meta_reference.json
@@ -0,0 +1,1097 @@
+{
+  "created": 1744918847.712677,
+  "duration": 215.2132911682129,
+  "exitcode": 0,
+  "root": "/home/erichuang/llama-stack",
+  "environment": {},
+  "summary": {
+    "passed": 28,
+    "total": 28,
+    "collected": 28
+  },
+  "collectors": [
+    {
+      "nodeid": "",
+      "outcome": "passed",
+      "result": [
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py",
+          "type": "Module"
+        }
+      ]
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py",
+      "outcome": "passed",
+      "result": [
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
+          "type": "Function",
+          "lineno": 114
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
+          "type": "Function",
+          "lineno": 114
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+          "type": "Function",
+          "lineno": 138
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+          "type": "Function",
+          "lineno": 157
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
+          "type": "Function",
+          "lineno": 181
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
+          "type": "Function",
+          "lineno": 181
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
+          "type": "Function",
+          "lineno": 204
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
+          "type": "Function",
+          "lineno": 204
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+          "type": "Function",
+          "lineno": 226
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+          "type": "Function",
+          "lineno": 250
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+          "type": "Function",
+          "lineno": 278
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+          "type": "Function",
+          "lineno": 302
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+          "type": "Function",
+          "lineno": 329
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+          "type": "Function",
+          "lineno": 352
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]",
+          "type": "Function",
+          "lineno": 380
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]",
+          "type": "Function",
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=False]",
+          "type": "Function",
+          "lineno": 554
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=True]",
+          "type": "Function",
+          "lineno": 554
+        }
+      ]
+    }
+  ],
+  "tests": [
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
+      "lineno": 95,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-earth",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "earth"
+      },
+      "setup": {
+        "duration": 0.09800294879823923,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 4.066351721994579,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00025077443569898605,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
+      "lineno": 95,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "saturn"
+      },
+      "setup": {
+        "duration": 0.07197055127471685,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.1918699434027076,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00027959980070590973,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
+      "lineno": 114,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-earth",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "earth"
+      },
+      "setup": {
+        "duration": 0.07294174749404192,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 2.027987685985863,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00026049185544252396,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
+      "lineno": 114,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "saturn"
+      },
+      "setup": {
+        "duration": 0.0741243390366435,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.2185465842485428,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002712178975343704,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+      "lineno": 138,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.07473955396562815,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 10.396870554424822,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00025566015392541885,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+      "lineno": 157,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.07153997663408518,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 10.59731453191489,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002689240500330925,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
+      "lineno": 181,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "calendar"
+      },
+      "setup": {
+        "duration": 0.07629724312573671,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 5.293915126472712,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002626115456223488,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
+      "lineno": 181,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-math",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "math"
+      },
+      "setup": {
+        "duration": 0.07231003511697054,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 19.020215207710862,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00025262776762247086,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
+      "lineno": 204,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "calendar"
+      },
+      "setup": {
+        "duration": 0.07291634101420641,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 6.105666604824364,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00027642492204904556,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
+      "lineno": 204,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-math",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "math"
+      },
+      "setup": {
+        "duration": 0.07050449773669243,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 19.080777555704117,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.000232757069170475,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+      "lineno": 226,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.07927203364670277,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.7760327504947782,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00024862587451934814,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+      "lineno": 250,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.07514432724565268,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.7971448050811887,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002687377855181694,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+      "lineno": 278,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.07167623657733202,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.6906132427975535,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0003270544111728668,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+      "lineno": 302,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.0725558316335082,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.9245227407664061,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002602478489279747,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+      "lineno": 329,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.07299680262804031,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 31.90802155341953,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00023696757853031158,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+      "lineno": 352,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.07331038825213909,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 39.341348845511675,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00022847391664981842,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]",
+      "lineno": 380,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "text_then_weather_tool"
+      },
+      "setup": {
+        "duration": 0.10512833576649427,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 2.9590865215286613,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002405792474746704,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]",
+      "lineno": 380,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "weather_tool_then_text"
+      },
+      "setup": {
+        "duration": 0.07294358871877193,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.7672317335382104,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0003217160701751709,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]",
+      "lineno": 380,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "add_product_tool"
+      },
+      "setup": {
+        "duration": 0.11179900728166103,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 2.411543940193951,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00023025460541248322,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]",
+      "lineno": 380,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "get_then_create_event_tool"
+      },
+      "setup": {
+        "duration": 0.07234534807503223,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 4.438527720049024,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00028106197714805603,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]",
+      "lineno": 380,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "compare_monthly_expense_tool"
+      },
+      "setup": {
+        "duration": 0.06979168020188808,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 3.186668715439737,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002599591389298439,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]",
+      "lineno": 471,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "text_then_weather_tool"
+      },
+      "setup": {
+        "duration": 0.07083943020552397,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 2.31697681453079,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00029378384351730347,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]",
+      "lineno": 471,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "weather_tool_then_text"
+      },
+      "setup": {
+        "duration": 0.07374998275190592,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.7863417640328407,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00025129225105047226,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]",
+      "lineno": 471,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "add_product_tool"
+      },
+      "setup": {
+        "duration": 0.07009322382509708,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 2.248749589547515,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00022566411644220352,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]",
+      "lineno": 471,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "get_then_create_event_tool"
+      },
+      "setup": {
+        "duration": 0.10290939453989267,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 4.644147016108036,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002319561317563057,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]",
+      "lineno": 471,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "compare_monthly_expense_tool"
+      },
+      "setup": {
+        "duration": 0.07125874608755112,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 3.2340452317148447,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002202410250902176,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=False]",
+      "lineno": 554,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=False]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=False",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "stream=False"
+      },
+      "setup": {
+        "duration": 0.07085523661226034,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 17.7453119084239,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00037308502942323685,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=True]",
+      "lineno": 554,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=True]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=True",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "stream=True"
+      },
+      "setup": {
+        "duration": 0.07670701760798693,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 12.663874679245055,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0008251797407865524,
+        "outcome": "passed"
+      }
+    }
+  ],
+  "run_timestamp": 1744918631
+}
diff --git a/tests/verifications/test_results/openai.json b/tests/verifications/test_results/openai.json
index 32a2a2b82..f40b8f532 100644
--- a/tests/verifications/test_results/openai.json
+++ b/tests/verifications/test_results/openai.json
@@ -1,13 +1,13 @@
 {
-  "created": 1744679497.440863,
-  "duration": 102.70424389839172,
+  "created": 1744918586.2136743,
+  "duration": 136.56194758415222,
   "exitcode": 0,
-  "root": "/Users/erichuang/projects/llama-stack",
+  "root": "/home/erichuang/llama-stack",
   "environment": {},
   "summary": {
-    "passed": 52,
-    "total": 52,
-    "collected": 52
+    "passed": 56,
+    "total": 56,
+    "collected": 56
   },
   "collectors": [
     {
@@ -27,262 +27,282 @@
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-earth]",
           "type": "Function",
-          "lineno": 74
+          "lineno": 95
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-saturn]",
           "type": "Function",
-          "lineno": 74
+          "lineno": 95
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-mini-earth]",
           "type": "Function",
-          "lineno": 74
+          "lineno": 95
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-mini-saturn]",
           "type": "Function",
-          "lineno": 74
+          "lineno": 95
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-earth]",
           "type": "Function",
-          "lineno": 93
+          "lineno": 114
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-saturn]",
           "type": "Function",
-          "lineno": 93
+          "lineno": 114
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-mini-earth]",
           "type": "Function",
-          "lineno": 93
+          "lineno": 114
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-mini-saturn]",
           "type": "Function",
-          "lineno": 93
+          "lineno": 114
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[gpt-4o-case0]",
           "type": "Function",
-          "lineno": 117
+          "lineno": 138
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[gpt-4o-mini-case0]",
           "type": "Function",
-          "lineno": 117
+          "lineno": 138
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[gpt-4o-case0]",
           "type": "Function",
-          "lineno": 136
+          "lineno": 157
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[gpt-4o-mini-case0]",
           "type": "Function",
-          "lineno": 136
+          "lineno": 157
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-calendar]",
           "type": "Function",
-          "lineno": 160
+          "lineno": 181
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-math]",
           "type": "Function",
-          "lineno": 160
+          "lineno": 181
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-mini-calendar]",
           "type": "Function",
-          "lineno": 160
+          "lineno": 181
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-mini-math]",
           "type": "Function",
-          "lineno": 160
+          "lineno": 181
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-calendar]",
           "type": "Function",
-          "lineno": 183
+          "lineno": 204
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-math]",
           "type": "Function",
-          "lineno": 183
+          "lineno": 204
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-mini-calendar]",
           "type": "Function",
-          "lineno": 183
+          "lineno": 204
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-mini-math]",
           "type": "Function",
-          "lineno": 183
+          "lineno": 204
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[gpt-4o-case0]",
           "type": "Function",
-          "lineno": 205
+          "lineno": 226
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[gpt-4o-mini-case0]",
           "type": "Function",
-          "lineno": 205
+          "lineno": 226
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[gpt-4o-case0]",
           "type": "Function",
-          "lineno": 229
+          "lineno": 250
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[gpt-4o-mini-case0]",
           "type": "Function",
-          "lineno": 229
+          "lineno": 250
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[gpt-4o-case0]",
           "type": "Function",
-          "lineno": 257
+          "lineno": 278
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[gpt-4o-mini-case0]",
           "type": "Function",
-          "lineno": 257
+          "lineno": 278
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[gpt-4o-case0]",
           "type": "Function",
-          "lineno": 281
+          "lineno": 302
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[gpt-4o-mini-case0]",
           "type": "Function",
-          "lineno": 281
+          "lineno": 302
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[gpt-4o-case0]",
           "type": "Function",
-          "lineno": 308
+          "lineno": 329
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[gpt-4o-mini-case0]",
           "type": "Function",
-          "lineno": 308
+          "lineno": 329
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[gpt-4o-case0]",
           "type": "Function",
-          "lineno": 331
+          "lineno": 352
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[gpt-4o-mini-case0]",
           "type": "Function",
-          "lineno": 331
+          "lineno": 352
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-text_then_weather_tool]",
           "type": "Function",
-          "lineno": 359
+          "lineno": 380
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-weather_tool_then_text]",
           "type": "Function",
-          "lineno": 359
+          "lineno": 380
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-add_product_tool]",
           "type": "Function",
-          "lineno": 359
+          "lineno": 380
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-get_then_create_event_tool]",
           "type": "Function",
-          "lineno": 359
+          "lineno": 380
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-compare_monthly_expense_tool]",
           "type": "Function",
-          "lineno": 359
+          "lineno": 380
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-text_then_weather_tool]",
           "type": "Function",
-          "lineno": 359
+          "lineno": 380
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-weather_tool_then_text]",
           "type": "Function",
-          "lineno": 359
+          "lineno": 380
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-add_product_tool]",
           "type": "Function",
-          "lineno": 359
+          "lineno": 380
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-get_then_create_event_tool]",
           "type": "Function",
-          "lineno": 359
+          "lineno": 380
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-compare_monthly_expense_tool]",
           "type": "Function",
-          "lineno": 359
+          "lineno": 380
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-text_then_weather_tool]",
           "type": "Function",
-          "lineno": 450
+          "lineno": 471
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-weather_tool_then_text]",
           "type": "Function",
-          "lineno": 450
+          "lineno": 471
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-add_product_tool]",
           "type": "Function",
-          "lineno": 450
+          "lineno": 471
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-get_then_create_event_tool]",
           "type": "Function",
-          "lineno": 450
+          "lineno": 471
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-compare_monthly_expense_tool]",
           "type": "Function",
-          "lineno": 450
+          "lineno": 471
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-text_then_weather_tool]",
           "type": "Function",
-          "lineno": 450
+          "lineno": 471
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-weather_tool_then_text]",
           "type": "Function",
-          "lineno": 450
+          "lineno": 471
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-add_product_tool]",
           "type": "Function",
-          "lineno": 450
+          "lineno": 471
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-get_then_create_event_tool]",
           "type": "Function",
-          "lineno": 450
+          "lineno": 471
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-compare_monthly_expense_tool]",
           "type": "Function",
-          "lineno": 450
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[gpt-4o-stream=False]",
+          "type": "Function",
+          "lineno": 554
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[gpt-4o-stream=True]",
+          "type": "Function",
+          "lineno": 554
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[gpt-4o-mini-stream=False]",
+          "type": "Function",
+          "lineno": 554
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[gpt-4o-mini-stream=True]",
+          "type": "Function",
+          "lineno": 554
         }
       ]
     }
@@ -290,7 +310,7 @@
   "tests": [
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-earth]",
-      "lineno": 74,
+      "lineno": 95,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[gpt-4o-earth]",
@@ -309,21 +329,21 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.09044458298012614,
+        "duration": 0.09683514852076769,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.3071064590476453,
+        "duration": 1.2521671634167433,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0003990421537309885,
+        "duration": 0.0002309884876012802,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-saturn]",
-      "lineno": 74,
+      "lineno": 95,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[gpt-4o-saturn]",
@@ -342,21 +362,21 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.015266708098351955,
+        "duration": 0.08609516825526953,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.3942135840188712,
+        "duration": 0.8818014115095139,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0006840829737484455,
+        "duration": 0.0002558426931500435,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-mini-earth]",
-      "lineno": 74,
+      "lineno": 95,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[gpt-4o-mini-earth]",
@@ -375,21 +395,21 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.028802334098145366,
+        "duration": 0.07237763796001673,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.40633770800195634,
+        "duration": 0.44337860122323036,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0006945421919226646,
+        "duration": 0.00027293339371681213,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-mini-saturn]",
-      "lineno": 74,
+      "lineno": 95,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[gpt-4o-mini-saturn]",
@@ -408,21 +428,21 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.01865937514230609,
+        "duration": 0.07486020587384701,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.7515070410445333,
+        "duration": 0.7754815155640244,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0002985831815749407,
+        "duration": 0.00026193633675575256,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-earth]",
-      "lineno": 93,
+      "lineno": 114,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_basic[gpt-4o-earth]",
@@ -441,21 +461,21 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.011108374921604991,
+        "duration": 0.07270221784710884,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.3914629169739783,
+        "duration": 0.5725504904985428,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0006979589816182852,
+        "duration": 0.00025644712150096893,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-saturn]",
-      "lineno": 93,
+      "lineno": 114,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_basic[gpt-4o-saturn]",
@@ -474,21 +494,21 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.02875337516888976,
+        "duration": 0.07263980247080326,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.5632798750884831,
+        "duration": 0.6277077253907919,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.004012458026409149,
+        "duration": 0.0002706516534090042,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-mini-earth]",
-      "lineno": 93,
+      "lineno": 114,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_basic[gpt-4o-mini-earth]",
@@ -507,21 +527,21 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.0143584581092,
+        "duration": 0.07290142774581909,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.36101250001229346,
+        "duration": 0.45955433789640665,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0005384159740060568,
+        "duration": 0.0002704532817006111,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-mini-saturn]",
-      "lineno": 93,
+      "lineno": 114,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_basic[gpt-4o-mini-saturn]",
@@ -540,21 +560,21 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.017127499915659428,
+        "duration": 0.0736015671864152,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.8120857500471175,
+        "duration": 1.1738686058670282,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0005928750615566969,
+        "duration": 0.00026966072618961334,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[gpt-4o-case0]",
-      "lineno": 117,
+      "lineno": 138,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_image[gpt-4o-case0]",
@@ -573,21 +593,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.023183667100965977,
+        "duration": 0.07560365367680788,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.8612758750095963,
+        "duration": 2.4073661137372255,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0005042918492108583,
+        "duration": 0.0002443268895149231,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[gpt-4o-mini-case0]",
-      "lineno": 117,
+      "lineno": 138,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_image[gpt-4o-mini-case0]",
@@ -606,21 +626,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.007410250138491392,
+        "duration": 0.06925276480615139,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.3748936660122126,
+        "duration": 2.777276105247438,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00045658298768103123,
+        "duration": 0.0002748873084783554,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[gpt-4o-case0]",
-      "lineno": 136,
+      "lineno": 157,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_image[gpt-4o-case0]",
@@ -639,21 +659,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.023792708991095424,
+        "duration": 0.07098669931292534,
         "outcome": "passed"
       },
       "call": {
-        "duration": 3.1502402499318123,
+        "duration": 3.0149426590651274,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0010152498725801706,
+        "duration": 0.0002702716737985611,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[gpt-4o-mini-case0]",
-      "lineno": 136,
+      "lineno": 157,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_image[gpt-4o-mini-case0]",
@@ -672,21 +692,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.01887162495404482,
+        "duration": 0.07316321693360806,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.070013999938965,
+        "duration": 2.401849321089685,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0005797501653432846,
+        "duration": 0.0003180522471666336,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-calendar]",
-      "lineno": 160,
+      "lineno": 181,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[gpt-4o-calendar]",
@@ -705,21 +725,21 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.017477875109761953,
+        "duration": 0.07038832642138004,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.7350135410670191,
+        "duration": 1.0188098661601543,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00046616699546575546,
+        "duration": 0.00027244072407484055,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-math]",
-      "lineno": 160,
+      "lineno": 181,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[gpt-4o-math]",
@@ -738,21 +758,21 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.033007249934598804,
+        "duration": 0.07331131957471371,
         "outcome": "passed"
       },
       "call": {
-        "duration": 5.031138291116804,
+        "duration": 7.0907115917652845,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00032295798882842064,
+        "duration": 0.0003256639465689659,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-mini-calendar]",
-      "lineno": 160,
+      "lineno": 181,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[gpt-4o-mini-calendar]",
@@ -771,21 +791,21 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.014672457939013839,
+        "duration": 0.0749899847432971,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.7515842081047595,
+        "duration": 0.6721736947074533,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00034395791590213776,
+        "duration": 0.0002617714926600456,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-mini-math]",
-      "lineno": 160,
+      "lineno": 181,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[gpt-4o-mini-math]",
@@ -804,21 +824,21 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.02985133300535381,
+        "duration": 0.07268172968178988,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.388004041975364,
+        "duration": 2.6800331017002463,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00038116704672574997,
+        "duration": 0.0002518612891435623,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-calendar]",
-      "lineno": 183,
+      "lineno": 204,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_structured_output[gpt-4o-calendar]",
@@ -837,21 +857,21 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.017887332942336798,
+        "duration": 0.07150284852832556,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.0018641669303179,
+        "duration": 0.6667193034663796,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0005486670415848494,
+        "duration": 0.00025727134197950363,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-math]",
-      "lineno": 183,
+      "lineno": 204,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_structured_output[gpt-4o-math]",
@@ -870,21 +890,21 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.0158015841152519,
+        "duration": 0.07039738819003105,
         "outcome": "passed"
       },
       "call": {
-        "duration": 7.285852208966389,
+        "duration": 4.870940984226763,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0003417080733925104,
+        "duration": 0.00025987718254327774,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-mini-calendar]",
-      "lineno": 183,
+      "lineno": 204,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_structured_output[gpt-4o-mini-calendar]",
@@ -903,21 +923,21 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.014434333890676498,
+        "duration": 0.07166357431560755,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.9268912919797003,
+        "duration": 0.9911826532334089,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00046200002543628216,
+        "duration": 0.00028301775455474854,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-mini-math]",
-      "lineno": 183,
+      "lineno": 204,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_structured_output[gpt-4o-mini-math]",
@@ -936,21 +956,21 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.01635808404535055,
+        "duration": 0.07489973120391369,
         "outcome": "passed"
       },
       "call": {
-        "duration": 3.7341703751590103,
+        "duration": 5.81621040776372,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0004277920816093683,
+        "duration": 0.00027776509523391724,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[gpt-4o-case0]",
-      "lineno": 205,
+      "lineno": 226,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_tool_calling[gpt-4o-case0]",
@@ -969,21 +989,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.021756208036094904,
+        "duration": 0.0709689250215888,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.6105514578521252,
+        "duration": 0.6838962603360415,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0004747910425066948,
+        "duration": 0.00038875360041856766,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[gpt-4o-mini-case0]",
-      "lineno": 205,
+      "lineno": 226,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_tool_calling[gpt-4o-mini-case0]",
@@ -1002,21 +1022,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.015522167086601257,
+        "duration": 0.07440952491015196,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.9731334580574185,
+        "duration": 0.6124099707230926,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0003415420651435852,
+        "duration": 0.00031805597245693207,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[gpt-4o-case0]",
-      "lineno": 229,
+      "lineno": 250,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_tool_calling[gpt-4o-case0]",
@@ -1035,21 +1055,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.014343583025038242,
+        "duration": 0.07558728754520416,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.5453979168087244,
+        "duration": 1.0413735723122954,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0011145840398967266,
+        "duration": 0.00026555173099040985,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[gpt-4o-mini-case0]",
-      "lineno": 229,
+      "lineno": 250,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_tool_calling[gpt-4o-mini-case0]",
@@ -1068,21 +1088,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.017669249791651964,
+        "duration": 0.07159029692411423,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.6310562079306692,
+        "duration": 0.619917850010097,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0006836249958723783,
+        "duration": 0.00026798900216817856,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[gpt-4o-case0]",
-      "lineno": 257,
+      "lineno": 278,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_tool_choice_required[gpt-4o-case0]",
@@ -1101,21 +1121,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.016614832915365696,
+        "duration": 0.10359053406864405,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.6914504591841251,
+        "duration": 0.6396236326545477,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0004829999525099993,
+        "duration": 0.000257750041782856,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[gpt-4o-mini-case0]",
-      "lineno": 257,
+      "lineno": 278,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_tool_choice_required[gpt-4o-mini-case0]",
@@ -1134,21 +1154,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.03217837493866682,
+        "duration": 0.07243514712899923,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.4917086660861969,
+        "duration": 0.6169720906764269,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0005399580113589764,
+        "duration": 0.0002462640404701233,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[gpt-4o-case0]",
-      "lineno": 281,
+      "lineno": 302,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_tool_choice_required[gpt-4o-case0]",
@@ -1167,21 +1187,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.01154208299703896,
+        "duration": 0.07266584690660238,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.5663661658763885,
+        "duration": 0.9391414495185018,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0008221250027418137,
+        "duration": 0.0003280108794569969,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[gpt-4o-mini-case0]",
-      "lineno": 281,
+      "lineno": 302,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_tool_choice_required[gpt-4o-mini-case0]",
@@ -1200,21 +1220,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.013238833984360099,
+        "duration": 0.08437065314501524,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.6098562499973923,
+        "duration": 0.6935106571763754,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00045654200948774815,
+        "duration": 0.00027523748576641083,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[gpt-4o-case0]",
-      "lineno": 308,
+      "lineno": 329,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_tool_choice_none[gpt-4o-case0]",
@@ -1233,21 +1253,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.014951375080272555,
+        "duration": 0.07208988349884748,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.5425659997854382,
+        "duration": 0.6744982637465,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0002112078946083784,
+        "duration": 0.0002555781975388527,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[gpt-4o-mini-case0]",
-      "lineno": 308,
+      "lineno": 329,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_tool_choice_none[gpt-4o-mini-case0]",
@@ -1266,21 +1286,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.010041083907708526,
+        "duration": 0.07785151246935129,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.7337456250097603,
+        "duration": 0.6253539212048054,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00042791711166501045,
+        "duration": 0.00028202030807733536,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[gpt-4o-case0]",
-      "lineno": 331,
+      "lineno": 352,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_tool_choice_none[gpt-4o-case0]",
@@ -1299,21 +1319,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.007236667210236192,
+        "duration": 0.0911521203815937,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.4192167909350246,
+        "duration": 0.7869452070444822,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0010569579899311066,
+        "duration": 0.00043197907507419586,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[gpt-4o-mini-case0]",
-      "lineno": 331,
+      "lineno": 352,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_tool_choice_none[gpt-4o-mini-case0]",
@@ -1332,21 +1352,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.01997062494046986,
+        "duration": 0.10472878441214561,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.6866283339913934,
+        "duration": 0.6786438375711441,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0010521251242607832,
+        "duration": 0.00025699567049741745,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-text_then_weather_tool]",
-      "lineno": 359,
+      "lineno": 380,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-text_then_weather_tool]",
@@ -1365,21 +1385,21 @@
         "case_id": "text_then_weather_tool"
       },
       "setup": {
-        "duration": 0.017386124935001135,
+        "duration": 0.07002853509038687,
         "outcome": "passed"
       },
       "call": {
-        "duration": 4.425433791941032,
+        "duration": 2.395758199505508,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00043645803816616535,
+        "duration": 0.0002955012023448944,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-weather_tool_then_text]",
-      "lineno": 359,
+      "lineno": 380,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-weather_tool_then_text]",
@@ -1398,21 +1418,21 @@
         "case_id": "weather_tool_then_text"
       },
       "setup": {
-        "duration": 0.014067957876250148,
+        "duration": 0.07316868472844362,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.205255625071004,
+        "duration": 1.3224441464990377,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0004651669878512621,
+        "duration": 0.0002612341195344925,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-add_product_tool]",
-      "lineno": 359,
+      "lineno": 380,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-add_product_tool]",
@@ -1431,21 +1451,21 @@
         "case_id": "add_product_tool"
       },
       "setup": {
-        "duration": 0.016634040977805853,
+        "duration": 0.10713072493672371,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.4360020828898996,
+        "duration": 1.0061814906075597,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0004704580642282963,
+        "duration": 0.0002610785886645317,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-get_then_create_event_tool]",
-      "lineno": 359,
+      "lineno": 380,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-get_then_create_event_tool]",
@@ -1464,21 +1484,21 @@
         "case_id": "get_then_create_event_tool"
       },
       "setup": {
-        "duration": 0.015702415956184268,
+        "duration": 0.07267123833298683,
         "outcome": "passed"
       },
       "call": {
-        "duration": 5.882555708056316,
+        "duration": 4.26907461322844,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.003662874922156334,
+        "duration": 0.00025866832584142685,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-compare_monthly_expense_tool]",
-      "lineno": 359,
+      "lineno": 380,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-compare_monthly_expense_tool]",
@@ -1497,21 +1517,21 @@
         "case_id": "compare_monthly_expense_tool"
       },
       "setup": {
-        "duration": 0.020038041984662414,
+        "duration": 0.07208938524127007,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.2738899998366833,
+        "duration": 2.8186135441064835,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0004929169081151485,
+        "duration": 0.00026924535632133484,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-text_then_weather_tool]",
-      "lineno": 359,
+      "lineno": 380,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-text_then_weather_tool]",
@@ -1530,21 +1550,21 @@
         "case_id": "text_then_weather_tool"
       },
       "setup": {
-        "duration": 0.007982166949659586,
+        "duration": 0.07148494757711887,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.7494398748967797,
+        "duration": 2.1276168935000896,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0005488330498337746,
+        "duration": 0.00024427566677331924,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-weather_tool_then_text]",
-      "lineno": 359,
+      "lineno": 380,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-weather_tool_then_text]",
@@ -1563,21 +1583,21 @@
         "case_id": "weather_tool_then_text"
       },
       "setup": {
-        "duration": 0.007455583196133375,
+        "duration": 0.07107946090400219,
         "outcome": "passed"
       },
       "call": {
-        "duration": 5.338647875003517,
+        "duration": 1.1634307894855738,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0005507499445229769,
+        "duration": 0.00030216481536626816,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-add_product_tool]",
-      "lineno": 359,
+      "lineno": 380,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-add_product_tool]",
@@ -1596,21 +1616,21 @@
         "case_id": "add_product_tool"
       },
       "setup": {
-        "duration": 0.01675066608004272,
+        "duration": 0.07261826191097498,
         "outcome": "passed"
       },
       "call": {
-        "duration": 4.016703582834452,
+        "duration": 1.4525672728195786,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0005397920031100512,
+        "duration": 0.0002602897584438324,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-get_then_create_event_tool]",
-      "lineno": 359,
+      "lineno": 380,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-get_then_create_event_tool]",
@@ -1629,21 +1649,21 @@
         "case_id": "get_then_create_event_tool"
       },
       "setup": {
-        "duration": 0.009890957968309522,
+        "duration": 0.0710728308185935,
         "outcome": "passed"
       },
       "call": {
-        "duration": 3.9003724998328835,
+        "duration": 4.533652591519058,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0005802921950817108,
+        "duration": 0.0002704774960875511,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-compare_monthly_expense_tool]",
-      "lineno": 359,
+      "lineno": 380,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-compare_monthly_expense_tool]",
@@ -1662,21 +1682,21 @@
         "case_id": "compare_monthly_expense_tool"
       },
       "setup": {
-        "duration": 0.021778207970783114,
+        "duration": 0.0781267425045371,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.3824402918107808,
+        "duration": 2.160066588781774,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0008852919563651085,
+        "duration": 0.0002731531858444214,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-text_then_weather_tool]",
-      "lineno": 450,
+      "lineno": 471,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_multi_turn_tool_calling[gpt-4o-text_then_weather_tool]",
@@ -1695,21 +1715,21 @@
         "case_id": "text_then_weather_tool"
       },
       "setup": {
-        "duration": 0.021121500059962273,
+        "duration": 0.07118126843124628,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.362067250069231,
+        "duration": 2.068133544176817,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0007184590213000774,
+        "duration": 0.0002514524385333061,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-weather_tool_then_text]",
-      "lineno": 450,
+      "lineno": 471,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_multi_turn_tool_calling[gpt-4o-weather_tool_then_text]",
@@ -1728,21 +1748,21 @@
         "case_id": "weather_tool_then_text"
       },
       "setup": {
-        "duration": 0.01677604205906391,
+        "duration": 0.07241942081600428,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.4576394581235945,
+        "duration": 1.1098179938271642,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0005367500707507133,
+        "duration": 0.00028003379702568054,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-add_product_tool]",
-      "lineno": 450,
+      "lineno": 471,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_multi_turn_tool_calling[gpt-4o-add_product_tool]",
@@ -1761,21 +1781,21 @@
         "case_id": "add_product_tool"
       },
       "setup": {
-        "duration": 0.010623916983604431,
+        "duration": 0.07439264003187418,
         "outcome": "passed"
       },
       "call": {
-        "duration": 3.295967958169058,
+        "duration": 1.0720843756571412,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0005429999437183142,
+        "duration": 0.00026407837867736816,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-get_then_create_event_tool]",
-      "lineno": 450,
+      "lineno": 471,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_multi_turn_tool_calling[gpt-4o-get_then_create_event_tool]",
@@ -1794,21 +1814,21 @@
         "case_id": "get_then_create_event_tool"
       },
       "setup": {
-        "duration": 0.014912083046510816,
+        "duration": 0.07028928305953741,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.7422334579750896,
+        "duration": 5.23135226033628,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.001017916016280651,
+        "duration": 0.0002559954300522804,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-compare_monthly_expense_tool]",
-      "lineno": 450,
+      "lineno": 471,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_multi_turn_tool_calling[gpt-4o-compare_monthly_expense_tool]",
@@ -1827,21 +1847,21 @@
         "case_id": "compare_monthly_expense_tool"
       },
       "setup": {
-        "duration": 0.014568000100553036,
+        "duration": 0.0733694015070796,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.4006296249572188,
+        "duration": 2.3011497305706143,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.000492083141580224,
+        "duration": 0.0002724975347518921,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-text_then_weather_tool]",
-      "lineno": 450,
+      "lineno": 471,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-text_then_weather_tool]",
@@ -1860,21 +1880,21 @@
         "case_id": "text_then_weather_tool"
       },
       "setup": {
-        "duration": 0.01243741693906486,
+        "duration": 0.07319487817585468,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.858031083131209,
+        "duration": 2.060736038722098,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0012166248634457588,
+        "duration": 0.0002620834857225418,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-weather_tool_then_text]",
-      "lineno": 450,
+      "lineno": 471,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-weather_tool_then_text]",
@@ -1893,21 +1913,21 @@
         "case_id": "weather_tool_then_text"
       },
       "setup": {
-        "duration": 0.017216125037521124,
+        "duration": 0.07086801622062922,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.4033057920169085,
+        "duration": 1.1969546489417553,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00047016702592372894,
+        "duration": 0.00023349467664957047,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-add_product_tool]",
-      "lineno": 450,
+      "lineno": 471,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-add_product_tool]",
@@ -1926,21 +1946,21 @@
         "case_id": "add_product_tool"
       },
       "setup": {
-        "duration": 0.019779917085543275,
+        "duration": 0.07276885025203228,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.5427470421418548,
+        "duration": 2.2494191862642765,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0007832080591470003,
+        "duration": 0.0002493094652891159,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-get_then_create_event_tool]",
-      "lineno": 450,
+      "lineno": 471,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-get_then_create_event_tool]",
@@ -1959,21 +1979,21 @@
         "case_id": "get_then_create_event_tool"
       },
       "setup": {
-        "duration": 0.019053417025133967,
+        "duration": 0.07039583195000887,
         "outcome": "passed"
       },
       "call": {
-        "duration": 4.038398916134611,
+        "duration": 4.528189226053655,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00048545910976827145,
+        "duration": 0.00025649741291999817,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-compare_monthly_expense_tool]",
-      "lineno": 450,
+      "lineno": 471,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-compare_monthly_expense_tool]",
@@ -1992,18 +2012,150 @@
         "case_id": "compare_monthly_expense_tool"
       },
       "setup": {
-        "duration": 0.01692862482741475,
+        "duration": 0.07187813706696033,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.849576957989484,
+        "duration": 2.446169280447066,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0032055408228188753,
+        "duration": 0.00024812109768390656,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[gpt-4o-stream=False]",
+      "lineno": 554,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_multi_turn_multiple_images[gpt-4o-stream=False]",
+        "parametrize",
+        "pytestmark",
+        "gpt-4o-stream=False",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "gpt-4o",
+        "case_id": "stream=False"
+      },
+      "setup": {
+        "duration": 0.07299137767404318,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 8.35237762145698,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00026817526668310165,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[gpt-4o-stream=True]",
+      "lineno": 554,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_multi_turn_multiple_images[gpt-4o-stream=True]",
+        "parametrize",
+        "pytestmark",
+        "gpt-4o-stream=True",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "gpt-4o",
+        "case_id": "stream=True"
+      },
+      "setup": {
+        "duration": 0.07363969460129738,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 4.653971025720239,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00026602670550346375,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[gpt-4o-mini-stream=False]",
+      "lineno": 554,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_multi_turn_multiple_images[gpt-4o-mini-stream=False]",
+        "parametrize",
+        "pytestmark",
+        "gpt-4o-mini-stream=False",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "gpt-4o-mini",
+        "case_id": "stream=False"
+      },
+      "setup": {
+        "duration": 0.07377734407782555,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 9.776036521419883,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.000254971906542778,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[gpt-4o-mini-stream=True]",
+      "lineno": 554,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_multi_turn_multiple_images[gpt-4o-mini-stream=True]",
+        "parametrize",
+        "pytestmark",
+        "gpt-4o-mini-stream=True",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "gpt-4o-mini",
+        "case_id": "stream=True"
+      },
+      "setup": {
+        "duration": 0.07054048776626587,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 12.58133109845221,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0013354746624827385,
         "outcome": "passed"
       }
     }
   ],
-  "run_timestamp": 1744679391
+  "run_timestamp": 1744918448
 }
diff --git a/tests/verifications/test_results/together.json b/tests/verifications/test_results/together.json
index 44e831936..2d74b8cca 100644
--- a/tests/verifications/test_results/together.json
+++ b/tests/verifications/test_results/together.json
@@ -1,15 +1,15 @@
 {
-  "created": 1744679387.346831,
-  "duration": 90.31976795196533,
+  "created": 1744918192.9299376,
+  "duration": 126.91354608535767,
   "exitcode": 1,
-  "root": "/Users/erichuang/projects/llama-stack",
+  "root": "/home/erichuang/llama-stack",
   "environment": {},
   "summary": {
-    "passed": 37,
-    "failed": 39,
-    "skipped": 2,
-    "total": 78,
-    "collected": 78
+    "passed": 40,
+    "failed": 40,
+    "skipped": 4,
+    "total": 84,
+    "collected": 84
   },
   "collectors": [
     {
@@ -29,392 +29,422 @@
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]",
           "type": "Function",
-          "lineno": 74
+          "lineno": 95
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]",
           "type": "Function",
-          "lineno": 74
+          "lineno": 95
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
           "type": "Function",
-          "lineno": 74
+          "lineno": 95
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
           "type": "Function",
-          "lineno": 74
+          "lineno": 95
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]",
           "type": "Function",
-          "lineno": 74
+          "lineno": 95
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]",
           "type": "Function",
-          "lineno": 74
+          "lineno": 95
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]",
           "type": "Function",
-          "lineno": 93
+          "lineno": 114
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]",
           "type": "Function",
-          "lineno": 93
+          "lineno": 114
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
           "type": "Function",
-          "lineno": 93
+          "lineno": 114
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
           "type": "Function",
-          "lineno": 93
+          "lineno": 114
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]",
           "type": "Function",
-          "lineno": 93
+          "lineno": 114
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]",
           "type": "Function",
-          "lineno": 93
+          "lineno": 114
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
           "type": "Function",
-          "lineno": 117
+          "lineno": 138
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
           "type": "Function",
-          "lineno": 117
+          "lineno": 138
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
           "type": "Function",
-          "lineno": 117
+          "lineno": 138
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
           "type": "Function",
-          "lineno": 136
+          "lineno": 157
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
           "type": "Function",
-          "lineno": 136
+          "lineno": 157
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
           "type": "Function",
-          "lineno": 136
+          "lineno": 157
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]",
           "type": "Function",
-          "lineno": 160
+          "lineno": 181
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]",
           "type": "Function",
-          "lineno": 160
+          "lineno": 181
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
           "type": "Function",
-          "lineno": 160
+          "lineno": 181
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
           "type": "Function",
-          "lineno": 160
+          "lineno": 181
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]",
           "type": "Function",
-          "lineno": 160
+          "lineno": 181
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]",
           "type": "Function",
-          "lineno": 160
+          "lineno": 181
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]",
           "type": "Function",
-          "lineno": 183
+          "lineno": 204
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]",
           "type": "Function",
-          "lineno": 183
+          "lineno": 204
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
           "type": "Function",
-          "lineno": 183
+          "lineno": 204
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
           "type": "Function",
-          "lineno": 183
+          "lineno": 204
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]",
           "type": "Function",
-          "lineno": 183
+          "lineno": 204
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]",
           "type": "Function",
-          "lineno": 183
+          "lineno": 204
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
           "type": "Function",
-          "lineno": 205
+          "lineno": 226
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
           "type": "Function",
-          "lineno": 205
+          "lineno": 226
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
           "type": "Function",
-          "lineno": 205
+          "lineno": 226
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
           "type": "Function",
-          "lineno": 229
+          "lineno": 250
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
           "type": "Function",
-          "lineno": 229
+          "lineno": 250
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
           "type": "Function",
-          "lineno": 229
+          "lineno": 250
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
           "type": "Function",
-          "lineno": 257
+          "lineno": 278
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
           "type": "Function",
-          "lineno": 257
+          "lineno": 278
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
           "type": "Function",
-          "lineno": 257
+          "lineno": 278
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
           "type": "Function",
-          "lineno": 281
+          "lineno": 302
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
           "type": "Function",
-          "lineno": 281
+          "lineno": 302
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
           "type": "Function",
-          "lineno": 281
+          "lineno": 302
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
           "type": "Function",
-          "lineno": 308
+          "lineno": 329
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
           "type": "Function",
-          "lineno": 308
+          "lineno": 329
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
           "type": "Function",
-          "lineno": 308
+          "lineno": 329
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
           "type": "Function",
-          "lineno": 331
+          "lineno": 352
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
           "type": "Function",
-          "lineno": 331
+          "lineno": 352
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
           "type": "Function",
-          "lineno": 331
+          "lineno": 352
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]",
           "type": "Function",
-          "lineno": 359
+          "lineno": 380
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]",
           "type": "Function",
-          "lineno": 359
+          "lineno": 380
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool]",
           "type": "Function",
-          "lineno": 359
+          "lineno": 380
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]",
           "type": "Function",
-          "lineno": 359
+          "lineno": 380
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]",
           "type": "Function",
-          "lineno": 359
+          "lineno": 380
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]",
           "type": "Function",
-          "lineno": 359
+          "lineno": 380
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]",
           "type": "Function",
-          "lineno": 359
+          "lineno": 380
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]",
           "type": "Function",
-          "lineno": 359
+          "lineno": 380
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]",
           "type": "Function",
-          "lineno": 359
+          "lineno": 380
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]",
           "type": "Function",
-          "lineno": 359
+          "lineno": 380
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]",
           "type": "Function",
-          "lineno": 359
+          "lineno": 380
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]",
           "type": "Function",
-          "lineno": 359
+          "lineno": 380
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]",
           "type": "Function",
-          "lineno": 359
+          "lineno": 380
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]",
           "type": "Function",
-          "lineno": 359
+          "lineno": 380
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]",
           "type": "Function",
-          "lineno": 359
+          "lineno": 380
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]",
           "type": "Function",
-          "lineno": 450
+          "lineno": 471
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]",
           "type": "Function",
-          "lineno": 450
+          "lineno": 471
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool]",
           "type": "Function",
-          "lineno": 450
+          "lineno": 471
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]",
           "type": "Function",
-          "lineno": 450
+          "lineno": 471
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]",
           "type": "Function",
-          "lineno": 450
+          "lineno": 471
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]",
           "type": "Function",
-          "lineno": 450
+          "lineno": 471
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]",
           "type": "Function",
-          "lineno": 450
+          "lineno": 471
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]",
           "type": "Function",
-          "lineno": 450
+          "lineno": 471
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]",
           "type": "Function",
-          "lineno": 450
+          "lineno": 471
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]",
           "type": "Function",
-          "lineno": 450
+          "lineno": 471
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]",
           "type": "Function",
-          "lineno": 450
+          "lineno": 471
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]",
           "type": "Function",
-          "lineno": 450
+          "lineno": 471
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]",
           "type": "Function",
-          "lineno": 450
+          "lineno": 471
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]",
           "type": "Function",
-          "lineno": 450
+          "lineno": 471
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]",
           "type": "Function",
-          "lineno": 450
+          "lineno": 471
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=False]",
+          "type": "Function",
+          "lineno": 554
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=True]",
+          "type": "Function",
+          "lineno": 554
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=False]",
+          "type": "Function",
+          "lineno": 554
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=True]",
+          "type": "Function",
+          "lineno": 554
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=False]",
+          "type": "Function",
+          "lineno": 554
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=True]",
+          "type": "Function",
+          "lineno": 554
         }
       ]
     }
@@ -422,7 +452,7 @@
   "tests": [
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]",
-      "lineno": 74,
+      "lineno": 95,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]",
@@ -441,21 +471,21 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.1559112500399351,
+        "duration": 0.11939296405762434,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.3692209171131253,
+        "duration": 0.6422080835327506,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00021362490952014923,
+        "duration": 0.0002934802323579788,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]",
-      "lineno": 74,
+      "lineno": 95,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]",
@@ -474,21 +504,21 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.007326166843995452,
+        "duration": 0.07340026367455721,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.49173945817165077,
+        "duration": 0.6134521719068289,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00034487503580749035,
+        "duration": 0.00031049735844135284,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
-      "lineno": 74,
+      "lineno": 95,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
@@ -507,21 +537,21 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.021014458034187555,
+        "duration": 0.07351398840546608,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.36956487502902746,
+        "duration": 0.898847377859056,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0007119579240679741,
+        "duration": 0.0002735760062932968,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
-      "lineno": 74,
+      "lineno": 95,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
@@ -540,21 +570,21 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.011922625126317143,
+        "duration": 0.08612977154552937,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.7763332079630345,
+        "duration": 0.6511319326236844,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0004842919297516346,
+        "duration": 0.0003559151664376259,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]",
-      "lineno": 74,
+      "lineno": 95,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]",
@@ -573,21 +603,21 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.023896750062704086,
+        "duration": 0.08106738794595003,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.9817597079090774,
+        "duration": 1.206272155046463,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0004768748767673969,
+        "duration": 0.0003584325313568115,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]",
-      "lineno": 74,
+      "lineno": 95,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]",
@@ -606,21 +636,21 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.07423937506973743,
+        "duration": 0.0796442786231637,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.3721332079730928,
+        "duration": 0.4815350500866771,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00020033284090459347,
+        "duration": 0.00025806669145822525,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]",
-      "lineno": 93,
+      "lineno": 114,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]",
@@ -639,21 +669,21 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.010166750056669116,
+        "duration": 0.07231954019516706,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.41266337502747774,
+        "duration": 1.1521263290196657,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00034358282573521137,
+        "duration": 0.00032721273601055145,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]",
-      "lineno": 93,
+      "lineno": 114,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]",
@@ -672,21 +702,21 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.016687541967257857,
+        "duration": 0.07364387530833483,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.7235856249462813,
+        "duration": 1.0600289879366755,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00027179205790162086,
+        "duration": 0.00028987880796194077,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
-      "lineno": 93,
+      "lineno": 114,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
@@ -705,34 +735,34 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.012556416913866997,
+        "duration": 0.07162868417799473,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.27039612480439246,
+        "duration": 0.2930005770176649,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 111,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 132,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 111,
+            "lineno": 132,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]>>\nopenai_client = <openai.OpenAI object at 0x10e10d930>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'earth', 'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:111: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]>>\nopenai_client = <openai.OpenAI object at 0x7f42743e7760>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'earth', 'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:132: IndexError"
       },
       "teardown": {
-        "duration": 0.0002312080468982458,
+        "duration": 0.0004123607650399208,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
-      "lineno": 93,
+      "lineno": 114,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
@@ -751,34 +781,34 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.006413874914869666,
+        "duration": 0.07553945016115904,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.36463545891456306,
+        "duration": 0.4265708066523075,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 111,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 132,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 111,
+            "lineno": 132,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]>>\nopenai_client = <openai.OpenAI object at 0x107b73a00>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'saturn', 'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:111: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]>>\nopenai_client = <openai.OpenAI object at 0x7f42742571f0>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'saturn', 'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:132: IndexError"
       },
       "teardown": {
-        "duration": 0.00023154192604124546,
+        "duration": 0.0003767991438508034,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]",
-      "lineno": 93,
+      "lineno": 114,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]",
@@ -797,34 +827,34 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.015633082948625088,
+        "duration": 0.07143466174602509,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.8896284159272909,
+        "duration": 1.0281891459599137,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 111,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 132,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 111,
+            "lineno": 132,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]>>\nopenai_client = <openai.OpenAI object at 0x107bd7400>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'earth', 'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:111: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]>>\nopenai_client = <openai.OpenAI object at 0x7f4274278310>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'earth', 'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:132: IndexError"
       },
       "teardown": {
-        "duration": 0.0006587498355656862,
+        "duration": 0.0003773234784603119,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]",
-      "lineno": 93,
+      "lineno": 114,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]",
@@ -843,34 +873,34 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.012669583084061742,
+        "duration": 0.07092289440333843,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.3499396659899503,
+        "duration": 0.4124102909117937,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 111,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 132,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 111,
+            "lineno": 132,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]>>\nopenai_client = <openai.OpenAI object at 0x107b736d0>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'saturn', 'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:111: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]>>\nopenai_client = <openai.OpenAI object at 0x7f42743e7310>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'saturn', 'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:132: IndexError"
       },
       "teardown": {
-        "duration": 0.00024912506341934204,
+        "duration": 0.0003204820677638054,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
-      "lineno": 117,
+      "lineno": 138,
       "outcome": "skipped",
       "keywords": [
         "test_chat_non_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
@@ -889,22 +919,22 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.0153201250359416,
+        "duration": 0.07159135863184929,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.0001901669893413782,
+        "duration": 0.0002104705199599266,
         "outcome": "skipped",
-        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 126, 'Skipped: Skipping test_chat_non_streaming_image for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')"
+        "longrepr": "('/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 147, 'Skipped: Skipping test_chat_non_streaming_image for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')"
       },
       "teardown": {
-        "duration": 0.00012779212556779385,
+        "duration": 0.0003354400396347046,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
-      "lineno": 117,
+      "lineno": 138,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
@@ -923,21 +953,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.008855124935507774,
+        "duration": 0.0744061404839158,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.37906050006859,
+        "duration": 2.2864254424348474,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0004904591478407383,
+        "duration": 0.000246487557888031,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
-      "lineno": 117,
+      "lineno": 138,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
@@ -956,21 +986,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.017166708130389452,
+        "duration": 0.07066962588578463,
         "outcome": "passed"
       },
       "call": {
-        "duration": 4.003400916932151,
+        "duration": 4.47614302393049,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00042724981904029846,
+        "duration": 0.00034836214035749435,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
-      "lineno": 136,
+      "lineno": 157,
       "outcome": "skipped",
       "keywords": [
         "test_chat_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
@@ -989,22 +1019,22 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.007232750067487359,
+        "duration": 0.09739464800804853,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.0001449580304324627,
+        "duration": 0.0003191335126757622,
         "outcome": "skipped",
-        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 145, 'Skipped: Skipping test_chat_streaming_image for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')"
+        "longrepr": "('/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 166, 'Skipped: Skipping test_chat_streaming_image for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')"
       },
       "teardown": {
-        "duration": 0.0001349160447716713,
+        "duration": 0.00026350561529397964,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
-      "lineno": 136,
+      "lineno": 157,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
@@ -1023,34 +1053,34 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.007052165921777487,
+        "duration": 0.10561292432248592,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.4663615000899881,
+        "duration": 2.6175378002226353,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 154,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 175,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 154,
+            "lineno": 175,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]>>\nopenai_client = <openai.OpenAI object at 0x10e1f9930>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_image(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:154: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f427415f430>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_image(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:175: IndexError"
       },
       "teardown": {
-        "duration": 0.0005696250591427088,
+        "duration": 0.0003682933747768402,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
-      "lineno": 136,
+      "lineno": 157,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
@@ -1069,34 +1099,34 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.01214433298446238,
+        "duration": 0.07195662055164576,
         "outcome": "passed"
       },
       "call": {
-        "duration": 3.902559082955122,
+        "duration": 3.2985631534829736,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 154,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 175,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 154,
+            "lineno": 175,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]>>\nopenai_client = <openai.OpenAI object at 0x10e1dd840>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_image(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:154: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f42741c7550>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_image(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:175: IndexError"
       },
       "teardown": {
-        "duration": 0.000591374933719635,
+        "duration": 0.0003777453675866127,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]",
-      "lineno": 160,
+      "lineno": 181,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]",
@@ -1115,21 +1145,21 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.01478054211474955,
+        "duration": 0.0733196372166276,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.569845792138949,
+        "duration": 0.40959454514086246,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00038724998012185097,
+        "duration": 0.00029125437140464783,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]",
-      "lineno": 160,
+      "lineno": 181,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]",
@@ -1148,21 +1178,21 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.014717916958034039,
+        "duration": 0.07248916011303663,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.1819656670559198,
+        "duration": 3.498455540277064,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0002410421147942543,
+        "duration": 0.00023921672254800797,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
-      "lineno": 160,
+      "lineno": 181,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
@@ -1181,21 +1211,21 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.006486707832664251,
+        "duration": 0.07911352813243866,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.5623017910402268,
+        "duration": 0.6717434097081423,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00032504182308912277,
+        "duration": 0.00025916099548339844,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
-      "lineno": 160,
+      "lineno": 181,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
@@ -1214,21 +1244,21 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.009171125013381243,
+        "duration": 0.07156322989612818,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.6005691669415683,
+        "duration": 3.698870756663382,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00023995805531740189,
+        "duration": 0.0002654632553458214,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]",
-      "lineno": 160,
+      "lineno": 181,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]",
@@ -1247,21 +1277,21 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.009700333932414651,
+        "duration": 0.07457748707383871,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.4192442081402987,
+        "duration": 0.8891718471422791,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00040241610258817673,
+        "duration": 0.0002395138144493103,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]",
-      "lineno": 160,
+      "lineno": 181,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]",
@@ -1280,21 +1310,21 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.006938542006537318,
+        "duration": 0.07155069429427385,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.1736337919719517,
+        "duration": 3.276700599119067,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00019279099069535732,
+        "duration": 0.0002568913623690605,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]",
-      "lineno": 183,
+      "lineno": 204,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]",
@@ -1313,21 +1343,21 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.008775749942287803,
+        "duration": 0.07365360390394926,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.5588400410488248,
+        "duration": 0.7638470390811563,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00040091690607368946,
+        "duration": 0.00027653202414512634,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]",
-      "lineno": 183,
+      "lineno": 204,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]",
@@ -1346,21 +1376,21 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.01844154205173254,
+        "duration": 0.07424602191895247,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.205772665794939,
+        "duration": 3.622116087935865,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00021091708913445473,
+        "duration": 0.0002861013635993004,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
-      "lineno": 183,
+      "lineno": 204,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
@@ -1379,34 +1409,34 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.015595750184729695,
+        "duration": 0.07192372716963291,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.6904467919375747,
+        "duration": 0.5049019353464246,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 202,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 223,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 202,
+            "lineno": 223,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]>>\nopenai_client = <openai.OpenAI object at 0x10e1f5db0>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'calendar', 'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'cont...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            response_format=case[\"input\"][\"response_format\"],\n            stream=True,\n        )\n        maybe_json_content = \"\"\n        for chunk in response:\n>           maybe_json_content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:202: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]>>\nopenai_client = <openai.OpenAI object at 0x7f4274178c10>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'calendar', 'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'cont...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            response_format=case[\"input\"][\"response_format\"],\n            stream=True,\n        )\n        maybe_json_content = \"\"\n        for chunk in response:\n>           maybe_json_content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:223: IndexError"
       },
       "teardown": {
-        "duration": 0.0002907498273998499,
+        "duration": 0.00036794692277908325,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
-      "lineno": 183,
+      "lineno": 204,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
@@ -1425,34 +1455,34 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.008272957988083363,
+        "duration": 0.07304532174021006,
         "outcome": "passed"
       },
       "call": {
-        "duration": 3.499622541014105,
+        "duration": 2.961389934644103,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 202,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 223,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 202,
+            "lineno": 223,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]>>\nopenai_client = <openai.OpenAI object at 0x10e13baf0>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'math', 'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solut... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            response_format=case[\"input\"][\"response_format\"],\n            stream=True,\n        )\n        maybe_json_content = \"\"\n        for chunk in response:\n>           maybe_json_content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:202: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]>>\nopenai_client = <openai.OpenAI object at 0x7f42741786d0>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'math', 'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solut... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            response_format=case[\"input\"][\"response_format\"],\n            stream=True,\n        )\n        maybe_json_content = \"\"\n        for chunk in response:\n>           maybe_json_content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:223: IndexError"
       },
       "teardown": {
-        "duration": 0.0005947079043835402,
+        "duration": 0.0003312695771455765,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]",
-      "lineno": 183,
+      "lineno": 204,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]",
@@ -1471,34 +1501,34 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.013340875040739775,
+        "duration": 0.07350922282785177,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.42789591709151864,
+        "duration": 0.6764275450259447,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 202,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 223,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 202,
+            "lineno": 223,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]>>\nopenai_client = <openai.OpenAI object at 0x10e1c41f0>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'calendar', 'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'cont...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            response_format=case[\"input\"][\"response_format\"],\n            stream=True,\n        )\n        maybe_json_content = \"\"\n        for chunk in response:\n>           maybe_json_content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:202: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]>>\nopenai_client = <openai.OpenAI object at 0x7f427420ff40>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'calendar', 'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'cont...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            response_format=case[\"input\"][\"response_format\"],\n            stream=True,\n        )\n        maybe_json_content = \"\"\n        for chunk in response:\n>           maybe_json_content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:223: IndexError"
       },
       "teardown": {
-        "duration": 0.0003039578441530466,
+        "duration": 0.0003826189786195755,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]",
-      "lineno": 183,
+      "lineno": 204,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]",
@@ -1517,34 +1547,34 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.01058275019749999,
+        "duration": 0.07295230869203806,
         "outcome": "passed"
       },
       "call": {
-        "duration": 5.795635707909241,
+        "duration": 10.689278944395483,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 202,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 223,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 202,
+            "lineno": 223,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]>>\nopenai_client = <openai.OpenAI object at 0x10e1f4f10>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'math', 'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solut... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            response_format=case[\"input\"][\"response_format\"],\n            stream=True,\n        )\n        maybe_json_content = \"\"\n        for chunk in response:\n>           maybe_json_content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:202: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]>>\nopenai_client = <openai.OpenAI object at 0x7f427415eb60>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'math', 'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solut... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            response_format=case[\"input\"][\"response_format\"],\n            stream=True,\n        )\n        maybe_json_content = \"\"\n        for chunk in response:\n>           maybe_json_content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:223: IndexError"
       },
       "teardown": {
-        "duration": 0.0005178749561309814,
+        "duration": 0.0004014279693365097,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
-      "lineno": 205,
+      "lineno": 226,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
@@ -1563,21 +1593,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.014336749911308289,
+        "duration": 0.09202722646296024,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.451304541900754,
+        "duration": 0.8140280386433005,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0004718329291790724,
+        "duration": 0.0003595082089304924,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
-      "lineno": 205,
+      "lineno": 226,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
@@ -1596,21 +1626,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.01625004201196134,
+        "duration": 0.09484888892620802,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.5111537908669561,
+        "duration": 0.3706049248576164,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00046774977818131447,
+        "duration": 0.0003290809690952301,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
-      "lineno": 205,
+      "lineno": 226,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
@@ -1629,21 +1659,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.015832332894206047,
+        "duration": 0.10521113499999046,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.8238586660008878,
+        "duration": 0.36842701490968466,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0006185418460518122,
+        "duration": 0.00031410157680511475,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
-      "lineno": 229,
+      "lineno": 250,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
@@ -1662,21 +1692,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.007832166040316224,
+        "duration": 0.10422383341938257,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.685583250131458,
+        "duration": 0.6454980997368693,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0004414590075612068,
+        "duration": 0.0002997415140271187,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
-      "lineno": 229,
+      "lineno": 250,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
@@ -1695,39 +1725,39 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.021764083998277783,
+        "duration": 0.09408890828490257,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.35617320891469717,
+        "duration": 0.36066764686256647,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 587,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 688,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 247,
+            "lineno": 268,
             "message": ""
           },
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 587,
+            "lineno": 688,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]>>\nopenai_client = <openai.OpenAI object at 0x10e482140>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=True,\n        )\n    \n>       _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:247: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x10e48fd30>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:587: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f42741c44f0>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=True,\n        )\n    \n>       _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:268: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f4274268760>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
       },
       "teardown": {
-        "duration": 0.0005425831768661737,
+        "duration": 0.00035039614886045456,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
-      "lineno": 229,
+      "lineno": 250,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
@@ -1746,39 +1776,39 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.016708041075617075,
+        "duration": 0.07232134602963924,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.49443637509830296,
+        "duration": 0.4706049496307969,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 587,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 688,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 247,
+            "lineno": 268,
             "message": ""
           },
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 587,
+            "lineno": 688,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]>>\nopenai_client = <openai.OpenAI object at 0x10e1026e0>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=True,\n        )\n    \n>       _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:247: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x10e45f640>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:587: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f427417ee60>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=True,\n        )\n    \n>       _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:268: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f427416d960>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
       },
       "teardown": {
-        "duration": 0.0002642078325152397,
+        "duration": 0.00039384420961141586,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
-      "lineno": 257,
+      "lineno": 278,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_tool_choice_required[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
@@ -1797,21 +1827,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.009570583933964372,
+        "duration": 0.07465469185262918,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.5232214999850839,
+        "duration": 0.4374591317027807,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0006591668352484703,
+        "duration": 0.0003099888563156128,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
-      "lineno": 257,
+      "lineno": 278,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
@@ -1830,21 +1860,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.01567283389158547,
+        "duration": 0.07351493183523417,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.4465816249139607,
+        "duration": 0.4368853671476245,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0003922500181943178,
+        "duration": 0.00026369933038949966,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
-      "lineno": 257,
+      "lineno": 278,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
@@ -1863,21 +1893,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.021711332956328988,
+        "duration": 0.07258845027536154,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.5361095829866827,
+        "duration": 0.940508272498846,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0003099590539932251,
+        "duration": 0.00032961275428533554,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
-      "lineno": 281,
+      "lineno": 302,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_tool_choice_required[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
@@ -1896,21 +1926,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.009334125090390444,
+        "duration": 0.07273276895284653,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.5789772500284016,
+        "duration": 0.6150273764505982,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00037712487392127514,
+        "duration": 0.0002876110374927521,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
-      "lineno": 281,
+      "lineno": 302,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
@@ -1929,39 +1959,39 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.019614499993622303,
+        "duration": 0.07505382597446442,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.444399792002514,
+        "duration": 0.5026597818359733,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 587,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 688,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 300,
+            "lineno": 321,
             "message": ""
           },
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 587,
+            "lineno": 688,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]>>\nopenai_client = <openai.OpenAI object at 0x10e491a50>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"required\",  # Force tool call\n            stream=True,\n        )\n    \n>       _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:300: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x10e100280>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:587: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f42742aa050>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"required\",  # Force tool call\n            stream=True,\n        )\n    \n>       _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:321: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f42741e9810>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
       },
       "teardown": {
-        "duration": 0.0004192921333014965,
+        "duration": 0.0003487151116132736,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
-      "lineno": 281,
+      "lineno": 302,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
@@ -1980,39 +2010,39 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.012822834076359868,
+        "duration": 0.07343385275453329,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.6777042911853641,
+        "duration": 0.720921658910811,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 587,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 688,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 300,
+            "lineno": 321,
             "message": ""
           },
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 587,
+            "lineno": 688,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]>>\nopenai_client = <openai.OpenAI object at 0x10e4ef460>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"required\",  # Force tool call\n            stream=True,\n        )\n    \n>       _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:300: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x10e45c7f0>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:587: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f427416dab0>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"required\",  # Force tool call\n            stream=True,\n        )\n    \n>       _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:321: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f427447c340>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
       },
       "teardown": {
-        "duration": 0.0004483328666538,
+        "duration": 0.0004109758883714676,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
-      "lineno": 308,
+      "lineno": 329,
       "outcome": "failed",
       "keywords": [
         "test_chat_non_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
@@ -2031,34 +2061,34 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.011924332939088345,
+        "duration": 0.07189673464745283,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.4756374170538038,
+        "duration": 0.403152690269053,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 328,
-          "message": "AssertionError: Expected no tool calls when tool_choice='none'\nassert [ChatCompletionMessageToolCall(id='call_nfd8oz9wmhlwf4mqglcaokrs', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)] is None\n +  where [ChatCompletionMessageToolCall(id='call_nfd8oz9wmhlwf4mqglcaokrs', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_nfd8oz9wmhlwf4mqglcaokrs', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]).tool_calls\n +    where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_nfd8oz9wmhlwf4mqglcaokrs', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_nfd8oz9wmhlwf4mqglcaokrs', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]), seed=13421903014786785000).message"
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 349,
+          "message": "AssertionError: Expected no tool calls when tool_choice='none'\nassert [ChatCompletionMessageToolCall(id='call_xx4eg2o4wladhs7i0gy8d2cb', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)] is None\n +  where [ChatCompletionMessageToolCall(id='call_xx4eg2o4wladhs7i0gy8d2cb', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_xx4eg2o4wladhs7i0gy8d2cb', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]).tool_calls\n +    where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_xx4eg2o4wladhs7i0gy8d2cb', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_xx4eg2o4wladhs7i0gy8d2cb', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]), seed=4867562177231181000).message"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 328,
+            "lineno": 349,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]>>\nopenai_client = <openai.OpenAI object at 0x10e48e6b0>\nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"none\",\n            stream=False,\n        )\n    \n        assert response.choices[0].message.role == \"assistant\"\n>       assert response.choices[0].message.tool_calls is None, \"Expected no tool calls when tool_choice='none'\"\nE       AssertionError: Expected no tool calls when tool_choice='none'\nE       assert [ChatCompletionMessageToolCall(id='call_nfd8oz9wmhlwf4mqglcaokrs', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)] is None\nE        +  where [ChatCompletionMessageToolCall(id='call_nfd8oz9wmhlwf4mqglcaokrs', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_nfd8oz9wmhlwf4mqglcaokrs', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]).tool_calls\nE        +    where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_nfd8oz9wmhlwf4mqglcaokrs', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_nfd8oz9wmhlwf4mqglcaokrs', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]), seed=13421903014786785000).message\n\ntests/verifications/openai_api/test_chat_completion.py:328: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f42741eb670>\nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"none\",\n            stream=False,\n        )\n    \n        assert response.choices[0].message.role == \"assistant\"\n>       assert response.choices[0].message.tool_calls is None, \"Expected no tool calls when tool_choice='none'\"\nE       AssertionError: Expected no tool calls when tool_choice='none'\nE       assert [ChatCompletionMessageToolCall(id='call_xx4eg2o4wladhs7i0gy8d2cb', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)] is None\nE        +  where [ChatCompletionMessageToolCall(id='call_xx4eg2o4wladhs7i0gy8d2cb', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_xx4eg2o4wladhs7i0gy8d2cb', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]).tool_calls\nE        +    where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_xx4eg2o4wladhs7i0gy8d2cb', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_xx4eg2o4wladhs7i0gy8d2cb', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]), seed=4867562177231181000).message\n\ntests/verifications/openai_api/test_chat_completion.py:349: AssertionError"
       },
       "teardown": {
-        "duration": 0.0004585420247167349,
+        "duration": 0.00037758704274892807,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
-      "lineno": 308,
+      "lineno": 329,
       "outcome": "failed",
       "keywords": [
         "test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
@@ -2077,34 +2107,34 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.013246082933619618,
+        "duration": 0.07282305508852005,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.5618870409671217,
+        "duration": 0.4538485202938318,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 328,
-          "message": "AssertionError: Expected no tool calls when tool_choice='none'\nassert [ChatCompletionMessageToolCall(id='call_h5u55eksczab7xtg8oot43k5', function=Function(arguments='{\"location\":\"San Francisco, United States\"}', name='get_weather'), type='function', index=0)] is None\n +  where [ChatCompletionMessageToolCall(id='call_h5u55eksczab7xtg8oot43k5', function=Function(arguments='{\"location\":\"San Francisco, United States\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_h5u55eksczab7xtg8oot43k5', function=Function(arguments='{\"location\":\"San Francisco, United States\"}', name='get_weather'), type='function', index=0)]).tool_calls\n +    where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_h5u55eksczab7xtg8oot43k5', function=Function(arguments='{\"location\":\"San Francisco, United States\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_h5u55eksczab7xtg8oot43k5', function=Function(arguments='{\"location\":\"San Francisco, United States\"}', name='get_weather'), type='function', index=0)]), seed=None).message"
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 349,
+          "message": "AssertionError: Expected no tool calls when tool_choice='none'\nassert [ChatCompletionMessageToolCall(id='call_6gehr7flf4gaqu65prmi1pca', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] is None\n +  where [ChatCompletionMessageToolCall(id='call_6gehr7flf4gaqu65prmi1pca', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_6gehr7flf4gaqu65prmi1pca', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]).tool_calls\n +    where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_6gehr7flf4gaqu65prmi1pca', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_6gehr7flf4gaqu65prmi1pca', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]), seed=None).message"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 328,
+            "lineno": 349,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]>>\nopenai_client = <openai.OpenAI object at 0x10e4ef2e0>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"none\",\n            stream=False,\n        )\n    \n        assert response.choices[0].message.role == \"assistant\"\n>       assert response.choices[0].message.tool_calls is None, \"Expected no tool calls when tool_choice='none'\"\nE       AssertionError: Expected no tool calls when tool_choice='none'\nE       assert [ChatCompletionMessageToolCall(id='call_h5u55eksczab7xtg8oot43k5', function=Function(arguments='{\"location\":\"San Francisco, United States\"}', name='get_weather'), type='function', index=0)] is None\nE        +  where [ChatCompletionMessageToolCall(id='call_h5u55eksczab7xtg8oot43k5', function=Function(arguments='{\"location\":\"San Francisco, United States\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_h5u55eksczab7xtg8oot43k5', function=Function(arguments='{\"location\":\"San Francisco, United States\"}', name='get_weather'), type='function', index=0)]).tool_calls\nE        +    where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_h5u55eksczab7xtg8oot43k5', function=Function(arguments='{\"location\":\"San Francisco, United States\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_h5u55eksczab7xtg8oot43k5', function=Function(arguments='{\"location\":\"San Francisco, United States\"}', name='get_weather'), type='function', index=0)]), seed=None).message\n\ntests/verifications/openai_api/test_chat_completion.py:328: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f4274247160>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"none\",\n            stream=False,\n        )\n    \n        assert response.choices[0].message.role == \"assistant\"\n>       assert response.choices[0].message.tool_calls is None, \"Expected no tool calls when tool_choice='none'\"\nE       AssertionError: Expected no tool calls when tool_choice='none'\nE       assert [ChatCompletionMessageToolCall(id='call_6gehr7flf4gaqu65prmi1pca', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] is None\nE        +  where [ChatCompletionMessageToolCall(id='call_6gehr7flf4gaqu65prmi1pca', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_6gehr7flf4gaqu65prmi1pca', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]).tool_calls\nE        +    where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_6gehr7flf4gaqu65prmi1pca', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_6gehr7flf4gaqu65prmi1pca', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]), seed=None).message\n\ntests/verifications/openai_api/test_chat_completion.py:349: AssertionError"
       },
       "teardown": {
-        "duration": 0.00025883293710649014,
+        "duration": 0.0003799665719270706,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
-      "lineno": 308,
+      "lineno": 329,
       "outcome": "failed",
       "keywords": [
         "test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
@@ -2123,34 +2153,34 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.008055417099967599,
+        "duration": 0.07050042506307364,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.32869591703638434,
+        "duration": 0.3740060832351446,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 328,
-          "message": "AssertionError: Expected no tool calls when tool_choice='none'\nassert [ChatCompletionMessageToolCall(id='call_s1bz9v57b8uizqy2i81869pb', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] is None\n +  where [ChatCompletionMessageToolCall(id='call_s1bz9v57b8uizqy2i81869pb', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_s1bz9v57b8uizqy2i81869pb', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]).tool_calls\n +    where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_s1bz9v57b8uizqy2i81869pb', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_s1bz9v57b8uizqy2i81869pb', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]), seed=None).message"
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 349,
+          "message": "AssertionError: Expected no tool calls when tool_choice='none'\nassert [ChatCompletionMessageToolCall(id='call_ngwnt1xmgxipkswdhdepisni', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] is None\n +  where [ChatCompletionMessageToolCall(id='call_ngwnt1xmgxipkswdhdepisni', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_ngwnt1xmgxipkswdhdepisni', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]).tool_calls\n +    where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_ngwnt1xmgxipkswdhdepisni', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_ngwnt1xmgxipkswdhdepisni', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]), seed=None).message"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 328,
+            "lineno": 349,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]>>\nopenai_client = <openai.OpenAI object at 0x10e4fa620>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"none\",\n            stream=False,\n        )\n    \n        assert response.choices[0].message.role == \"assistant\"\n>       assert response.choices[0].message.tool_calls is None, \"Expected no tool calls when tool_choice='none'\"\nE       AssertionError: Expected no tool calls when tool_choice='none'\nE       assert [ChatCompletionMessageToolCall(id='call_s1bz9v57b8uizqy2i81869pb', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] is None\nE        +  where [ChatCompletionMessageToolCall(id='call_s1bz9v57b8uizqy2i81869pb', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_s1bz9v57b8uizqy2i81869pb', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]).tool_calls\nE        +    where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_s1bz9v57b8uizqy2i81869pb', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_s1bz9v57b8uizqy2i81869pb', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]), seed=None).message\n\ntests/verifications/openai_api/test_chat_completion.py:328: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f42742f3220>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"none\",\n            stream=False,\n        )\n    \n        assert response.choices[0].message.role == \"assistant\"\n>       assert response.choices[0].message.tool_calls is None, \"Expected no tool calls when tool_choice='none'\"\nE       AssertionError: Expected no tool calls when tool_choice='none'\nE       assert [ChatCompletionMessageToolCall(id='call_ngwnt1xmgxipkswdhdepisni', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] is None\nE        +  where [ChatCompletionMessageToolCall(id='call_ngwnt1xmgxipkswdhdepisni', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_ngwnt1xmgxipkswdhdepisni', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]).tool_calls\nE        +    where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_ngwnt1xmgxipkswdhdepisni', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_ngwnt1xmgxipkswdhdepisni', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]), seed=None).message\n\ntests/verifications/openai_api/test_chat_completion.py:349: AssertionError"
       },
       "teardown": {
-        "duration": 0.0003937501460313797,
+        "duration": 0.0003066370263695717,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
-      "lineno": 331,
+      "lineno": 352,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
@@ -2169,34 +2199,34 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.013460749993100762,
+        "duration": 0.06983672920614481,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.35879983310587704,
+        "duration": 0.6774894064292312,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 355,
-          "message": "AssertionError: Expected no tool call chunks when tool_choice='none'\nassert not [ChoiceDeltaToolCall(index=0, id='call_q472clmnii99ps1fxqtv8qvr', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\n +  where [ChoiceDeltaToolCall(index=0, id='call_q472clmnii99ps1fxqtv8qvr', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_q472clmnii99ps1fxqtv8qvr', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls"
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 376,
+          "message": "AssertionError: Expected no tool call chunks when tool_choice='none'\nassert not [ChoiceDeltaToolCall(index=0, id='call_emdpbpvm77rqbzz66arrzv5w', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\n +  where [ChoiceDeltaToolCall(index=0, id='call_emdpbpvm77rqbzz66arrzv5w', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_emdpbpvm77rqbzz66arrzv5w', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 355,
+            "lineno": 376,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]>>\nopenai_client = <openai.OpenAI object at 0x10e102050>\nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"none\",\n            stream=True,\n        )\n    \n        content = \"\"\n        for chunk in stream:\n            delta = chunk.choices[0].delta\n            if delta.content:\n                content += delta.content\n>           assert not delta.tool_calls, \"Expected no tool call chunks when tool_choice='none'\"\nE           AssertionError: Expected no tool call chunks when tool_choice='none'\nE           assert not [ChoiceDeltaToolCall(index=0, id='call_q472clmnii99ps1fxqtv8qvr', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\nE            +  where [ChoiceDeltaToolCall(index=0, id='call_q472clmnii99ps1fxqtv8qvr', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_q472clmnii99ps1fxqtv8qvr', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:355: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f427430d480>\nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"none\",\n            stream=True,\n        )\n    \n        content = \"\"\n        for chunk in stream:\n            delta = chunk.choices[0].delta\n            if delta.content:\n                content += delta.content\n>           assert not delta.tool_calls, \"Expected no tool call chunks when tool_choice='none'\"\nE           AssertionError: Expected no tool call chunks when tool_choice='none'\nE           assert not [ChoiceDeltaToolCall(index=0, id='call_emdpbpvm77rqbzz66arrzv5w', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\nE            +  where [ChoiceDeltaToolCall(index=0, id='call_emdpbpvm77rqbzz66arrzv5w', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_emdpbpvm77rqbzz66arrzv5w', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:376: AssertionError"
       },
       "teardown": {
-        "duration": 0.0002649170346558094,
+        "duration": 0.0003580348566174507,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
-      "lineno": 331,
+      "lineno": 352,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
@@ -2215,34 +2245,34 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.0068365419283509254,
+        "duration": 0.07331710867583752,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.5351063329726458,
+        "duration": 0.38044120091944933,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 355,
-          "message": "AssertionError: Expected no tool call chunks when tool_choice='none'\nassert not [ChoiceDeltaToolCall(index=0, id='call_l3roc57o2pn9b70f0dcgil53', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\n +  where [ChoiceDeltaToolCall(index=0, id='call_l3roc57o2pn9b70f0dcgil53', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_l3roc57o2pn9b70f0dcgil53', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls"
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 376,
+          "message": "AssertionError: Expected no tool call chunks when tool_choice='none'\nassert not [ChoiceDeltaToolCall(index=0, id='call_g85q6ysacljgjczgq8r30tjv', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\n +  where [ChoiceDeltaToolCall(index=0, id='call_g85q6ysacljgjczgq8r30tjv', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_g85q6ysacljgjczgq8r30tjv', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 355,
+            "lineno": 376,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]>>\nopenai_client = <openai.OpenAI object at 0x10e4fb3d0>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"none\",\n            stream=True,\n        )\n    \n        content = \"\"\n        for chunk in stream:\n            delta = chunk.choices[0].delta\n            if delta.content:\n                content += delta.content\n>           assert not delta.tool_calls, \"Expected no tool call chunks when tool_choice='none'\"\nE           AssertionError: Expected no tool call chunks when tool_choice='none'\nE           assert not [ChoiceDeltaToolCall(index=0, id='call_l3roc57o2pn9b70f0dcgil53', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\nE            +  where [ChoiceDeltaToolCall(index=0, id='call_l3roc57o2pn9b70f0dcgil53', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_l3roc57o2pn9b70f0dcgil53', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:355: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f42745f3970>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"none\",\n            stream=True,\n        )\n    \n        content = \"\"\n        for chunk in stream:\n            delta = chunk.choices[0].delta\n            if delta.content:\n                content += delta.content\n>           assert not delta.tool_calls, \"Expected no tool call chunks when tool_choice='none'\"\nE           AssertionError: Expected no tool call chunks when tool_choice='none'\nE           assert not [ChoiceDeltaToolCall(index=0, id='call_g85q6ysacljgjczgq8r30tjv', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\nE            +  where [ChoiceDeltaToolCall(index=0, id='call_g85q6ysacljgjczgq8r30tjv', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_g85q6ysacljgjczgq8r30tjv', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:376: AssertionError"
       },
       "teardown": {
-        "duration": 0.0004712918307632208,
+        "duration": 0.0003765234723687172,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
-      "lineno": 331,
+      "lineno": 352,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
@@ -2261,34 +2291,34 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.014073874801397324,
+        "duration": 0.07194581907242537,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.6729549579322338,
+        "duration": 0.37374384608119726,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 355,
-          "message": "AssertionError: Expected no tool call chunks when tool_choice='none'\nassert not [ChoiceDeltaToolCall(index=0, id='call_ktw831i0p838mzvnnaylf6fp', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\n +  where [ChoiceDeltaToolCall(index=0, id='call_ktw831i0p838mzvnnaylf6fp', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_ktw831i0p838mzvnnaylf6fp', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls"
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 376,
+          "message": "AssertionError: Expected no tool call chunks when tool_choice='none'\nassert not [ChoiceDeltaToolCall(index=0, id='call_zq6x10vfu9pkxme6pm9zxouk', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\n +  where [ChoiceDeltaToolCall(index=0, id='call_zq6x10vfu9pkxme6pm9zxouk', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_zq6x10vfu9pkxme6pm9zxouk', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 355,
+            "lineno": 376,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]>>\nopenai_client = <openai.OpenAI object at 0x10e46e8f0>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"none\",\n            stream=True,\n        )\n    \n        content = \"\"\n        for chunk in stream:\n            delta = chunk.choices[0].delta\n            if delta.content:\n                content += delta.content\n>           assert not delta.tool_calls, \"Expected no tool call chunks when tool_choice='none'\"\nE           AssertionError: Expected no tool call chunks when tool_choice='none'\nE           assert not [ChoiceDeltaToolCall(index=0, id='call_ktw831i0p838mzvnnaylf6fp', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\nE            +  where [ChoiceDeltaToolCall(index=0, id='call_ktw831i0p838mzvnnaylf6fp', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_ktw831i0p838mzvnnaylf6fp', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:355: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f42741c4520>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"none\",\n            stream=True,\n        )\n    \n        content = \"\"\n        for chunk in stream:\n            delta = chunk.choices[0].delta\n            if delta.content:\n                content += delta.content\n>           assert not delta.tool_calls, \"Expected no tool call chunks when tool_choice='none'\"\nE           AssertionError: Expected no tool call chunks when tool_choice='none'\nE           assert not [ChoiceDeltaToolCall(index=0, id='call_zq6x10vfu9pkxme6pm9zxouk', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\nE            +  where [ChoiceDeltaToolCall(index=0, id='call_zq6x10vfu9pkxme6pm9zxouk', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_zq6x10vfu9pkxme6pm9zxouk', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:376: AssertionError"
       },
       "teardown": {
-        "duration": 0.000251916004344821,
+        "duration": 0.0003813542425632477,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]",
-      "lineno": 359,
+      "lineno": 380,
       "outcome": "failed",
       "keywords": [
         "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]",
@@ -2307,34 +2337,34 @@
         "case_id": "text_then_weather_tool"
       },
       "setup": {
-        "duration": 0.009340125136077404,
+        "duration": 0.07330320309847593,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.3328715830575675,
+        "duration": 0.4314677305519581,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 418,
-          "message": "AssertionError: Expected 0 tool calls, but got 1\nassert 1 == 0\n +  where 1 = len(([ChatCompletionMessageToolCall(id='call_3rr948zuvun0533y4oyyep0z', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)]))\n +    where [ChatCompletionMessageToolCall(id='call_3rr948zuvun0533y4oyyep0z', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_3rr948zuvun0533y4oyyep0z', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)]).tool_calls"
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 439,
+          "message": "AssertionError: Expected 0 tool calls, but got 1\nassert 1 == 0\n +  where 1 = len(([ChatCompletionMessageToolCall(id='call_l05cckdk5mooai2iyfucg4s8', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)]))\n +    where [ChatCompletionMessageToolCall(id='call_l05cckdk5mooai2iyfucg4s8', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_l05cckdk5mooai2iyfucg4s8', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)]).tool_calls"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 418,
+            "lineno": 439,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x10e41a530>\nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n>           assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\nE           AssertionError: Expected 0 tool calls, but got 1\nE           assert 1 == 0\nE            +  where 1 = len(([ChatCompletionMessageToolCall(id='call_3rr948zuvun0533y4oyyep0z', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)]))\nE            +    where [ChatCompletionMessageToolCall(id='call_3rr948zuvun0533y4oyyep0z', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_3rr948zuvun0533y4oyyep0z', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)]).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:418: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f4274148ca0>\nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n>           assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\nE           AssertionError: Expected 0 tool calls, but got 1\nE           assert 1 == 0\nE            +  where 1 = len(([ChatCompletionMessageToolCall(id='call_l05cckdk5mooai2iyfucg4s8', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)]))\nE            +    where [ChatCompletionMessageToolCall(id='call_l05cckdk5mooai2iyfucg4s8', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_l05cckdk5mooai2iyfucg4s8', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)]).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError"
       },
       "teardown": {
-        "duration": 0.00042020808905363083,
+        "duration": 0.00040314625948667526,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]",
-      "lineno": 359,
+      "lineno": 380,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]",
@@ -2353,21 +2383,21 @@
         "case_id": "weather_tool_then_text"
       },
       "setup": {
-        "duration": 0.01490145898424089,
+        "duration": 0.07405277714133263,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.8346118750050664,
+        "duration": 0.8350177155807614,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00034404080361127853,
+        "duration": 0.00023361947387456894,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool]",
-      "lineno": 359,
+      "lineno": 380,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool]",
@@ -2386,21 +2416,21 @@
         "case_id": "add_product_tool"
       },
       "setup": {
-        "duration": 0.014493625145405531,
+        "duration": 0.07361320778727531,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.8973606249783188,
+        "duration": 1.0619212854653597,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00021345820277929306,
+        "duration": 0.0002395985648036003,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]",
-      "lineno": 359,
+      "lineno": 380,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]",
@@ -2419,22 +2449,22 @@
         "case_id": "get_then_create_event_tool"
       },
       "setup": {
-        "duration": 0.009358166949823499,
+        "duration": 0.07290417980402708,
         "outcome": "passed"
       },
       "call": {
-        "duration": 4.5295154170598835,
+        "duration": 4.241749887354672,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0002461671829223633,
+        "duration": 0.00027841050177812576,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]",
-      "lineno": 359,
-      "outcome": "failed",
+      "lineno": 380,
+      "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]",
         "parametrize",
@@ -2452,34 +2482,21 @@
         "case_id": "compare_monthly_expense_tool"
       },
       "setup": {
-        "duration": 0.009552374947816133,
+        "duration": 0.07301546633243561,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.34176899981684983,
-        "outcome": "failed",
-        "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 429,
-          "message": "AssertionError: Expected arguments '{'month': 1, 'year': 2025}', got '{'month': '1', 'year': '2025'}'\nassert {'month': '1', 'year': '2025'} == {'month': 1, 'year': 2025}\n  \n  Differing items:\n  {'month': '1'} != {'month': 1}\n  {'year': '2025'} != {'year': 2025}\n  \n  Full diff:\n    {...\n  \n  ...Full output truncated (7 lines hidden), use '-vv' to show"
-        },
-        "traceback": [
-          {
-            "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 429,
-            "message": "AssertionError"
-          }
-        ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]>>\nopenai_client = <openai.OpenAI object at 0x107bd7820>\nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n            assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\n    \n            if num_tool_calls > 0:\n                tool_call = assistant_message.tool_calls[0]\n                assert tool_call.function.name == expected[\"tool_name\"], (\n                    f\"Expected tool '{expected['tool_name']}', got '{tool_call.function.name}'\"\n                )\n                # Parse the JSON string arguments before comparing\n                actual_arguments = json.loads(tool_call.function.arguments)\n>               assert actual_arguments == expected[\"tool_arguments\"], (\n                    f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n                )\nE               AssertionError: Expected arguments '{'month': 1, 'year': 2025}', got '{'month': '1', 'year': '2025'}'\nE               assert {'month': '1', 'year': '2025'} == {'month': 1, 'year': 2025}\nE                 \nE                 Differing items:\nE                 {'month': '1'} != {'month': 1}\nE                 {'year': '2025'} != {'year': 2025}\nE                 \nE                 Full diff:\nE                   {...\nE                 \nE                 ...Full output truncated (7 lines hidden), use '-vv' to show\n\ntests/verifications/openai_api/test_chat_completion.py:429: AssertionError"
+        "duration": 2.0520667918026447,
+        "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.000527665950357914,
+        "duration": 0.0002469858154654503,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]",
-      "lineno": 359,
+      "lineno": 380,
       "outcome": "failed",
       "keywords": [
         "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]",
@@ -2498,34 +2515,34 @@
         "case_id": "text_then_weather_tool"
       },
       "setup": {
-        "duration": 0.012501416960731149,
+        "duration": 0.07405530381947756,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.585734374821186,
+        "duration": 0.48041669093072414,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 418,
-          "message": "AssertionError: Expected 0 tool calls, but got 2\nassert 2 == 0\n +  where 2 = len(([ChatCompletionMessageToolCall(id='call_4fm3kj059swz9no94n6fg54d', function=Function(arguments='{\"location\":\"Sun, NA\"}', name='get_weather'), type='function', index=0), ChatCompletionMessageToolCall(id='call_lzc5lo7y2p7wjyquvmvvzt64', function=Function(arguments='{\"name\":\"Sun\"}', name='get_latin_name'), type='function', index=1)]))\n +    where [ChatCompletionMessageToolCall(id='call_4fm3kj059swz9no94n6fg54d', function=Function(arguments='{\"location\":\"Sun, NA\"}', name='get_weather'), type='function', index=0), ChatCompletionMessageToolCall(id='call_lzc5lo7y2p7wjyquvmvvzt64', function=Function(arguments='{\"name\":\"Sun\"}', name='get_latin_name'), type='function', index=1)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_4fm3kj059swz9no94n6fg54d', function=Function(arguments='{\"location\":\"Sun, NA\"}', name='get_weather'), type='function', index=0), ChatCompletionMessageToolCall(id='call_lzc5lo7y2p7wjyquvmvvzt64', function=Function(arguments='{\"name\":\"Sun\"}', name='get_latin_name'), type='function', index=1)]).tool_calls"
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 467,
+          "message": "AssertionError: Expected one of ['sol'] in content, but got: 'I am not able to complete this task as it falls outside of the scope of the functions I have been given.'\nassert False\n +  where False = any(<generator object test_chat_non_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f4274057610>)"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 418,
+            "lineno": 467,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x10e19b910>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n>           assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\nE           AssertionError: Expected 0 tool calls, but got 2\nE           assert 2 == 0\nE            +  where 2 = len(([ChatCompletionMessageToolCall(id='call_4fm3kj059swz9no94n6fg54d', function=Function(arguments='{\"location\":\"Sun, NA\"}', name='get_weather'), type='function', index=0), ChatCompletionMessageToolCall(id='call_lzc5lo7y2p7wjyquvmvvzt64', function=Function(arguments='{\"name\":\"Sun\"}', name='get_latin_name'), type='function', index=1)]))\nE            +    where [ChatCompletionMessageToolCall(id='call_4fm3kj059swz9no94n6fg54d', function=Function(arguments='{\"location\":\"Sun, NA\"}', name='get_weather'), type='function', index=0), ChatCompletionMessageToolCall(id='call_lzc5lo7y2p7wjyquvmvvzt64', function=Function(arguments='{\"name\":\"Sun\"}', name='get_latin_name'), type='function', index=1)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_4fm3kj059swz9no94n6fg54d', function=Function(arguments='{\"location\":\"Sun, NA\"}', name='get_weather'), type='function', index=0), ChatCompletionMessageToolCall(id='call_lzc5lo7y2p7wjyquvmvvzt64', function=Function(arguments='{\"name\":\"Sun\"}', name='get_latin_name'), type='function', index=1)]).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:418: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f42740f7700>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n            assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\n    \n            if num_tool_calls > 0:\n                tool_call = assistant_message.tool_calls[0]\n                assert tool_call.function.name == expected[\"tool_name\"], (\n                    f\"Expected tool '{expected['tool_name']}', got '{tool_call.function.name}'\"\n                )\n                # Parse the JSON string arguments before comparing\n                actual_arguments = json.loads(tool_call.function.arguments)\n                assert actual_arguments == expected[\"tool_arguments\"], (\n                    f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n                )\n    \n                # Prepare and append the tool response for the next turn\n                tool_response = tool_responses.pop(0)\n                messages.append(\n                    {\n                        \"role\": \"tool\",\n                        \"tool_call_id\": tool_call.id,\n                        \"content\": tool_response[\"response\"],\n                    }\n                )\n            else:\n                assert assistant_message.content is not None, \"Expected content, but none received.\"\n                expected_answers = expected[\"answer\"]  # This is now a list\n                content_lower = assistant_message.content.lower()\n>               assert any(ans.lower() in content_lower for ans in expected_answers), (\n                    f\"Expected one of {expected_answers} in content, but got: '{assistant_message.content}'\"\n                )\nE               AssertionError: Expected one of ['sol'] in content, but got: 'I am not able to complete this task as it falls outside of the scope of the functions I have been given.'\nE               assert False\nE                +  where False = any(<generator object test_chat_non_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f4274057610>)\n\ntests/verifications/openai_api/test_chat_completion.py:467: AssertionError"
       },
       "teardown": {
-        "duration": 0.0003941669128835201,
+        "duration": 0.00035319291055202484,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]",
-      "lineno": 359,
+      "lineno": 380,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]",
@@ -2544,21 +2561,21 @@
         "case_id": "weather_tool_then_text"
       },
       "setup": {
-        "duration": 0.014057958032935858,
+        "duration": 0.0724497502669692,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.7121559998486191,
+        "duration": 0.832760401070118,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00048266700468957424,
+        "duration": 0.00026283878833055496,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]",
-      "lineno": 359,
+      "lineno": 380,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]",
@@ -2577,21 +2594,21 @@
         "case_id": "add_product_tool"
       },
       "setup": {
-        "duration": 0.02072141715325415,
+        "duration": 0.07180811651051044,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.0424797078594565,
+        "duration": 1.4359142612665892,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0004878339823335409,
+        "duration": 0.0002761436626315117,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]",
-      "lineno": 359,
+      "lineno": 380,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]",
@@ -2610,21 +2627,21 @@
         "case_id": "get_then_create_event_tool"
       },
       "setup": {
-        "duration": 0.018570583080872893,
+        "duration": 0.07503274269402027,
         "outcome": "passed"
       },
       "call": {
-        "duration": 3.4340267919469625,
+        "duration": 1.909641013480723,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00023016706109046936,
+        "duration": 0.0002613905817270279,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]",
-      "lineno": 359,
+      "lineno": 380,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]",
@@ -2643,21 +2660,21 @@
         "case_id": "compare_monthly_expense_tool"
       },
       "setup": {
-        "duration": 0.009570334106683731,
+        "duration": 0.07153380755335093,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.2068665840197355,
+        "duration": 2.695867782458663,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00051837507635355,
+        "duration": 0.00032124295830726624,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]",
-      "lineno": 359,
+      "lineno": 380,
       "outcome": "failed",
       "keywords": [
         "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]",
@@ -2676,34 +2693,34 @@
         "case_id": "text_then_weather_tool"
       },
       "setup": {
-        "duration": 0.01873366697691381,
+        "duration": 0.07275318540632725,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.5193468749057502,
+        "duration": 0.34551760647445917,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 446,
-          "message": "AssertionError: Expected one of ['sol'] in content, but got: '{\"name\": null, \"parameters\": null}'\nassert False\n +  where False = any(<generator object test_chat_non_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x10e4c0f90>)"
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 467,
+          "message": "AssertionError: Expected one of ['sol'] in content, but got: '{\"name\": null, \"parameters\": null}'\nassert False\n +  where False = any(<generator object test_chat_non_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f42742dd4d0>)"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 446,
+            "lineno": 467,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x1079fc160>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n            assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\n    \n            if num_tool_calls > 0:\n                tool_call = assistant_message.tool_calls[0]\n                assert tool_call.function.name == expected[\"tool_name\"], (\n                    f\"Expected tool '{expected['tool_name']}', got '{tool_call.function.name}'\"\n                )\n                # Parse the JSON string arguments before comparing\n                actual_arguments = json.loads(tool_call.function.arguments)\n                assert actual_arguments == expected[\"tool_arguments\"], (\n                    f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n                )\n    \n                # Prepare and append the tool response for the next turn\n                tool_response = tool_responses.pop(0)\n                messages.append(\n                    {\n                        \"role\": \"tool\",\n                        \"tool_call_id\": tool_call.id,\n                        \"content\": tool_response[\"response\"],\n                    }\n                )\n            else:\n                assert assistant_message.content is not None, \"Expected content, but none received.\"\n                expected_answers = expected[\"answer\"]  # This is now a list\n                content_lower = assistant_message.content.lower()\n>               assert any(ans.lower() in content_lower for ans in expected_answers), (\n                    f\"Expected one of {expected_answers} in content, but got: '{assistant_message.content}'\"\n                )\nE               AssertionError: Expected one of ['sol'] in content, but got: '{\"name\": null, \"parameters\": null}'\nE               assert False\nE                +  where False = any(<generator object test_chat_non_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x10e4c0f90>)\n\ntests/verifications/openai_api/test_chat_completion.py:446: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f427414b970>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n            assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\n    \n            if num_tool_calls > 0:\n                tool_call = assistant_message.tool_calls[0]\n                assert tool_call.function.name == expected[\"tool_name\"], (\n                    f\"Expected tool '{expected['tool_name']}', got '{tool_call.function.name}'\"\n                )\n                # Parse the JSON string arguments before comparing\n                actual_arguments = json.loads(tool_call.function.arguments)\n                assert actual_arguments == expected[\"tool_arguments\"], (\n                    f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n                )\n    \n                # Prepare and append the tool response for the next turn\n                tool_response = tool_responses.pop(0)\n                messages.append(\n                    {\n                        \"role\": \"tool\",\n                        \"tool_call_id\": tool_call.id,\n                        \"content\": tool_response[\"response\"],\n                    }\n                )\n            else:\n                assert assistant_message.content is not None, \"Expected content, but none received.\"\n                expected_answers = expected[\"answer\"]  # This is now a list\n                content_lower = assistant_message.content.lower()\n>               assert any(ans.lower() in content_lower for ans in expected_answers), (\n                    f\"Expected one of {expected_answers} in content, but got: '{assistant_message.content}'\"\n                )\nE               AssertionError: Expected one of ['sol'] in content, but got: '{\"name\": null, \"parameters\": null}'\nE               assert False\nE                +  where False = any(<generator object test_chat_non_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f42742dd4d0>)\n\ntests/verifications/openai_api/test_chat_completion.py:467: AssertionError"
       },
       "teardown": {
-        "duration": 0.0004933748859912157,
+        "duration": 0.0003842068836092949,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]",
-      "lineno": 359,
+      "lineno": 380,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]",
@@ -2722,21 +2739,21 @@
         "case_id": "weather_tool_then_text"
       },
       "setup": {
-        "duration": 0.014272749889642,
+        "duration": 0.07281951513141394,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.911199334077537,
+        "duration": 1.008104412816465,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00043049990199506283,
+        "duration": 0.00026233773678541183,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]",
-      "lineno": 359,
+      "lineno": 380,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]",
@@ -2755,21 +2772,21 @@
         "case_id": "add_product_tool"
       },
       "setup": {
-        "duration": 0.031040542060509324,
+        "duration": 0.07155719958245754,
         "outcome": "passed"
       },
       "call": {
-        "duration": 3.0026419160421938,
+        "duration": 2.3485742239281535,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00045104208402335644,
+        "duration": 0.0002629430964589119,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]",
-      "lineno": 359,
+      "lineno": 380,
       "outcome": "failed",
       "keywords": [
         "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]",
@@ -2788,34 +2805,34 @@
         "case_id": "get_then_create_event_tool"
       },
       "setup": {
-        "duration": 0.016529500018805265,
+        "duration": 0.07251190021634102,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.7563346249517053,
+        "duration": 2.9882029946893454,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 429,
-          "message": "AssertionError: Expected arguments '{'name': 'Team Building', 'date': '2025-03-03', 'time': '10:00', 'location': 'Main Conference Room', 'participants': ['Alice', 'Bob', 'Charlie']}', got '{'participants': '[\"Alice\", \"Bob\", \"Charlie\"]', 'location': 'Main Conference Room', 'name': 'Team Building', 'date': '2025-03-03', 'time': '10:00'}'\nassert {'date': '202...arlie\"]', ...} == {'date': '202...harlie'], ...}\n  \n  Omitting 4 identical items, use -vv to show\n  Differing items:\n  {'participants': '[\"Alice\", \"Bob\", \"Charlie\"]'} != {'participants': ['Alice', 'Bob', 'Charlie']}\n  \n  Full diff:\n    {...\n  \n  ...Full output truncated (11 lines hidden), use '-vv' to show"
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 450,
+          "message": "AssertionError: Expected arguments '{'name': 'Team Building', 'date': '2025-03-03', 'time': '10:00', 'location': 'Main Conference Room', 'participants': ['Alice', 'Bob', 'Charlie']}', got '{'date': '\"2025-03-03\"', 'location': '\"Main Conference Room\"', 'name': '\"Team Building\"', 'participants': ['Alice', 'Bob', 'Charlie'], 'time': '\"10:00\"'}'\nassert {'date': '\"20...harlie'], ...} == {'date': '202...harlie'], ...}\n  \n  Omitting 1 identical items, use -vv to show\n  Differing items:\n  {'date': '\"2025-03-03\"'} != {'date': '2025-03-03'}\n  {'name': '\"Team Building\"'} != {'name': 'Team Building'}\n  {'time': '\"10:00\"'} != {'time': '10:00'}\n  {'location': '\"Main Conference Room\"'} != {'location': 'Main Conference Room'}...\n  \n  ...Full output truncated (21 lines hidden), use '-vv' to show"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 429,
+            "lineno": 450,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]>>\nopenai_client = <openai.OpenAI object at 0x10e1c7430>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n            assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\n    \n            if num_tool_calls > 0:\n                tool_call = assistant_message.tool_calls[0]\n                assert tool_call.function.name == expected[\"tool_name\"], (\n                    f\"Expected tool '{expected['tool_name']}', got '{tool_call.function.name}'\"\n                )\n                # Parse the JSON string arguments before comparing\n                actual_arguments = json.loads(tool_call.function.arguments)\n>               assert actual_arguments == expected[\"tool_arguments\"], (\n                    f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n                )\nE               AssertionError: Expected arguments '{'name': 'Team Building', 'date': '2025-03-03', 'time': '10:00', 'location': 'Main Conference Room', 'participants': ['Alice', 'Bob', 'Charlie']}', got '{'participants': '[\"Alice\", \"Bob\", \"Charlie\"]', 'location': 'Main Conference Room', 'name': 'Team Building', 'date': '2025-03-03', 'time': '10:00'}'\nE               assert {'date': '202...arlie\"]', ...} == {'date': '202...harlie'], ...}\nE                 \nE                 Omitting 4 identical items, use -vv to show\nE                 Differing items:\nE                 {'participants': '[\"Alice\", \"Bob\", \"Charlie\"]'} != {'participants': ['Alice', 'Bob', 'Charlie']}\nE                 \nE                 Full diff:\nE                   {...\nE                 \nE                 ...Full output truncated (11 lines hidden), use '-vv' to show\n\ntests/verifications/openai_api/test_chat_completion.py:429: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f4274027af0>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n            assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\n    \n            if num_tool_calls > 0:\n                tool_call = assistant_message.tool_calls[0]\n                assert tool_call.function.name == expected[\"tool_name\"], (\n                    f\"Expected tool '{expected['tool_name']}', got '{tool_call.function.name}'\"\n                )\n                # Parse the JSON string arguments before comparing\n                actual_arguments = json.loads(tool_call.function.arguments)\n>               assert actual_arguments == expected[\"tool_arguments\"], (\n                    f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n                )\nE               AssertionError: Expected arguments '{'name': 'Team Building', 'date': '2025-03-03', 'time': '10:00', 'location': 'Main Conference Room', 'participants': ['Alice', 'Bob', 'Charlie']}', got '{'date': '\"2025-03-03\"', 'location': '\"Main Conference Room\"', 'name': '\"Team Building\"', 'participants': ['Alice', 'Bob', 'Charlie'], 'time': '\"10:00\"'}'\nE               assert {'date': '\"20...harlie'], ...} == {'date': '202...harlie'], ...}\nE                 \nE                 Omitting 1 identical items, use -vv to show\nE                 Differing items:\nE                 {'date': '\"2025-03-03\"'} != {'date': '2025-03-03'}\nE                 {'name': '\"Team Building\"'} != {'name': 'Team Building'}\nE                 {'time': '\"10:00\"'} != {'time': '10:00'}\nE                 {'location': '\"Main Conference Room\"'} != {'location': 'Main Conference Room'}...\nE                 \nE                 ...Full output truncated (21 lines hidden), use '-vv' to show\n\ntests/verifications/openai_api/test_chat_completion.py:450: AssertionError"
       },
       "teardown": {
-        "duration": 0.0005542081780731678,
+        "duration": 0.0003328891471028328,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]",
-      "lineno": 359,
+      "lineno": 380,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]",
@@ -2834,21 +2851,21 @@
         "case_id": "compare_monthly_expense_tool"
       },
       "setup": {
-        "duration": 0.013607957866042852,
+        "duration": 0.07363704219460487,
         "outcome": "passed"
       },
       "call": {
-        "duration": 3.0105869588442147,
+        "duration": 4.031332626007497,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0004793750122189522,
+        "duration": 0.0002817586064338684,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]",
-      "lineno": 450,
+      "lineno": 471,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]",
@@ -2867,34 +2884,34 @@
         "case_id": "text_then_weather_tool"
       },
       "setup": {
-        "duration": 0.01806124998256564,
+        "duration": 0.07673048228025436,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.3295827910769731,
+        "duration": 0.3994998000562191,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 500,
-          "message": "AssertionError: Expected 0 tool calls, but got 1\nassert 1 == 0\n +  where 1 = len(([{'function': {'arguments': '{\"location\":\"San Francisco, CA\"}', 'name': 'get_weather'}, 'id': 'call_l066e8oey2i8exeodczlv1mh', 'type': 'function'}]))"
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 521,
+          "message": "AssertionError: Expected 0 tool calls, but got 1\nassert 1 == 0\n +  where 1 = len(([{'function': {'arguments': '{\"location\":\"San Francisco, CA\"}', 'name': 'get_weather'}, 'id': 'call_dqcu28a6iyxlobv36c23k0qp', 'type': 'function'}]))"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 500,
+            "lineno": 521,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x10e45efe0>\nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n>           assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\nE           AssertionError: Expected 0 tool calls, but got 1\nE           assert 1 == 0\nE            +  where 1 = len(([{'function': {'arguments': '{\"location\":\"San Francisco, CA\"}', 'name': 'get_weather'}, 'id': 'call_l066e8oey2i8exeodczlv1mh', 'type': 'function'}]))\n\ntests/verifications/openai_api/test_chat_completion.py:500: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f4274179c30>\nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n>           assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\nE           AssertionError: Expected 0 tool calls, but got 1\nE           assert 1 == 0\nE            +  where 1 = len(([{'function': {'arguments': '{\"location\":\"San Francisco, CA\"}', 'name': 'get_weather'}, 'id': 'call_dqcu28a6iyxlobv36c23k0qp', 'type': 'function'}]))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError"
       },
       "teardown": {
-        "duration": 0.0002942080609500408,
+        "duration": 0.0003687366843223572,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]",
-      "lineno": 450,
+      "lineno": 471,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]",
@@ -2913,34 +2930,34 @@
         "case_id": "weather_tool_then_text"
       },
       "setup": {
-        "duration": 0.007637625094503164,
+        "duration": 0.07477510999888182,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.021851292112842,
+        "duration": 0.918418399989605,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 526,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 547,
           "message": "AssertionError: Expected content, but none received.\nassert ('' is not None and '' != '')"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 526,
+            "lineno": 547,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]>>\nopenai_client = <openai.OpenAI object at 0x10e409150>\nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\n    \n            if num_tool_calls > 0:\n                # Use the first accumulated tool call for assertion\n                tool_call = accumulated_tool_calls[0]\n                assert tool_call[\"function\"][\"name\"] == expected[\"tool_name\"], (\n                    f\"Expected tool '{expected['tool_name']}', got '{tool_call['function']['name']}'\"\n                )\n                # Parse the accumulated arguments string for comparison\n                actual_arguments = json.loads(tool_call[\"function\"][\"arguments\"])\n                assert actual_arguments == expected[\"tool_arguments\"], (\n                    f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n                )\n    \n                # Prepare and append the tool response for the next turn\n                tool_response = tool_responses.pop(0)\n                messages.append(\n                    {\n                        \"role\": \"tool\",\n                        \"tool_call_id\": tool_call[\"id\"],\n                        \"content\": tool_response[\"response\"],\n                    }\n                )\n            else:\n>               assert accumulated_content is not None and accumulated_content != \"\", \"Expected content, but none received.\"\nE               AssertionError: Expected content, but none received.\nE               assert ('' is not None and '' != '')\n\ntests/verifications/openai_api/test_chat_completion.py:526: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]>>\nopenai_client = <openai.OpenAI object at 0x7f427417a2c0>\nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\n    \n            if num_tool_calls > 0:\n                # Use the first accumulated tool call for assertion\n                tool_call = accumulated_tool_calls[0]\n                assert tool_call[\"function\"][\"name\"] == expected[\"tool_name\"], (\n                    f\"Expected tool '{expected['tool_name']}', got '{tool_call['function']['name']}'\"\n                )\n                # Parse the accumulated arguments string for comparison\n                actual_arguments = json.loads(tool_call[\"function\"][\"arguments\"])\n                assert actual_arguments == expected[\"tool_arguments\"], (\n                    f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n                )\n    \n                # Prepare and append the tool response for the next turn\n                tool_response = tool_responses.pop(0)\n                messages.append(\n                    {\n                        \"role\": \"tool\",\n                        \"tool_call_id\": tool_call[\"id\"],\n                        \"content\": tool_response[\"response\"],\n                    }\n                )\n            else:\n>               assert accumulated_content is not None and accumulated_content != \"\", \"Expected content, but none received.\"\nE               AssertionError: Expected content, but none received.\nE               assert ('' is not None and '' != '')\n\ntests/verifications/openai_api/test_chat_completion.py:547: AssertionError"
       },
       "teardown": {
-        "duration": 0.00036791712045669556,
+        "duration": 0.00036141276359558105,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool]",
-      "lineno": 450,
+      "lineno": 471,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool]",
@@ -2959,21 +2976,21 @@
         "case_id": "add_product_tool"
       },
       "setup": {
-        "duration": 0.013031583046540618,
+        "duration": 0.07217607088387012,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.8596610419917852,
+        "duration": 1.2676455974578857,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00042829103767871857,
+        "duration": 0.00024215038865804672,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]",
-      "lineno": 450,
+      "lineno": 471,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]",
@@ -2992,34 +3009,34 @@
         "case_id": "get_then_create_event_tool"
       },
       "setup": {
-        "duration": 0.015244666952639818,
+        "duration": 0.0713065592572093,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.0227877080906183,
+        "duration": 1.0453352769836783,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 526,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 547,
           "message": "AssertionError: Expected content, but none received.\nassert ('' is not None and '' != '')"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 526,
+            "lineno": 547,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]>>\nopenai_client = <openai.OpenAI object at 0x10e1c5030>\nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\n    \n            if num_tool_calls > 0:\n                # Use the first accumulated tool call for assertion\n                tool_call = accumulated_tool_calls[0]\n                assert tool_call[\"function\"][\"name\"] == expected[\"tool_name\"], (\n                    f\"Expected tool '{expected['tool_name']}', got '{tool_call['function']['name']}'\"\n                )\n                # Parse the accumulated arguments string for comparison\n                actual_arguments = json.loads(tool_call[\"function\"][\"arguments\"])\n                assert actual_arguments == expected[\"tool_arguments\"], (\n                    f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n                )\n    \n                # Prepare and append the tool response for the next turn\n                tool_response = tool_responses.pop(0)\n                messages.append(\n                    {\n                        \"role\": \"tool\",\n                        \"tool_call_id\": tool_call[\"id\"],\n                        \"content\": tool_response[\"response\"],\n                    }\n                )\n            else:\n>               assert accumulated_content is not None and accumulated_content != \"\", \"Expected content, but none received.\"\nE               AssertionError: Expected content, but none received.\nE               assert ('' is not None and '' != '')\n\ntests/verifications/openai_api/test_chat_completion.py:526: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f427415e0b0>\nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\n    \n            if num_tool_calls > 0:\n                # Use the first accumulated tool call for assertion\n                tool_call = accumulated_tool_calls[0]\n                assert tool_call[\"function\"][\"name\"] == expected[\"tool_name\"], (\n                    f\"Expected tool '{expected['tool_name']}', got '{tool_call['function']['name']}'\"\n                )\n                # Parse the accumulated arguments string for comparison\n                actual_arguments = json.loads(tool_call[\"function\"][\"arguments\"])\n                assert actual_arguments == expected[\"tool_arguments\"], (\n                    f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n                )\n    \n                # Prepare and append the tool response for the next turn\n                tool_response = tool_responses.pop(0)\n                messages.append(\n                    {\n                        \"role\": \"tool\",\n                        \"tool_call_id\": tool_call[\"id\"],\n                        \"content\": tool_response[\"response\"],\n                    }\n                )\n            else:\n>               assert accumulated_content is not None and accumulated_content != \"\", \"Expected content, but none received.\"\nE               AssertionError: Expected content, but none received.\nE               assert ('' is not None and '' != '')\n\ntests/verifications/openai_api/test_chat_completion.py:547: AssertionError"
       },
       "teardown": {
-        "duration": 0.00024933391250669956,
+        "duration": 0.00030668359249830246,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]",
-      "lineno": 450,
+      "lineno": 471,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]",
@@ -3038,34 +3055,34 @@
         "case_id": "compare_monthly_expense_tool"
       },
       "setup": {
-        "duration": 0.008626125054433942,
+        "duration": 0.07108221855014563,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.3212552920449525,
+        "duration": 1.034472893923521,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 512,
-          "message": "AssertionError: Expected arguments '{'month': 1, 'year': 2025}', got '{'month': '1', 'year': '2025'}'\nassert {'month': '1', 'year': '2025'} == {'month': 1, 'year': 2025}\n  \n  Differing items:\n  {'month': '1'} != {'month': 1}\n  {'year': '2025'} != {'year': 2025}\n  \n  Full diff:\n    {...\n  \n  ...Full output truncated (7 lines hidden), use '-vv' to show"
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 547,
+          "message": "AssertionError: Expected content, but none received.\nassert ('' is not None and '' != '')"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 512,
+            "lineno": 547,
             "message": "AssertionError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]>>\nopenai_client = <openai.OpenAI object at 0x10eb5ec80>\nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\n    \n            if num_tool_calls > 0:\n                # Use the first accumulated tool call for assertion\n                tool_call = accumulated_tool_calls[0]\n                assert tool_call[\"function\"][\"name\"] == expected[\"tool_name\"], (\n                    f\"Expected tool '{expected['tool_name']}', got '{tool_call['function']['name']}'\"\n                )\n                # Parse the accumulated arguments string for comparison\n                actual_arguments = json.loads(tool_call[\"function\"][\"arguments\"])\n>               assert actual_arguments == expected[\"tool_arguments\"], (\n                    f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n                )\nE               AssertionError: Expected arguments '{'month': 1, 'year': 2025}', got '{'month': '1', 'year': '2025'}'\nE               assert {'month': '1', 'year': '2025'} == {'month': 1, 'year': 2025}\nE                 \nE                 Differing items:\nE                 {'month': '1'} != {'month': 1}\nE                 {'year': '2025'} != {'year': 2025}\nE                 \nE                 Full diff:\nE                   {...\nE                 \nE                 ...Full output truncated (7 lines hidden), use '-vv' to show\n\ntests/verifications/openai_api/test_chat_completion.py:512: AssertionError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f42743b7a90>\nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\n    \n            if num_tool_calls > 0:\n                # Use the first accumulated tool call for assertion\n                tool_call = accumulated_tool_calls[0]\n                assert tool_call[\"function\"][\"name\"] == expected[\"tool_name\"], (\n                    f\"Expected tool '{expected['tool_name']}', got '{tool_call['function']['name']}'\"\n                )\n                # Parse the accumulated arguments string for comparison\n                actual_arguments = json.loads(tool_call[\"function\"][\"arguments\"])\n                assert actual_arguments == expected[\"tool_arguments\"], (\n                    f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n                )\n    \n                # Prepare and append the tool response for the next turn\n                tool_response = tool_responses.pop(0)\n                messages.append(\n                    {\n                        \"role\": \"tool\",\n                        \"tool_call_id\": tool_call[\"id\"],\n                        \"content\": tool_response[\"response\"],\n                    }\n                )\n            else:\n>               assert accumulated_content is not None and accumulated_content != \"\", \"Expected content, but none received.\"\nE               AssertionError: Expected content, but none received.\nE               assert ('' is not None and '' != '')\n\ntests/verifications/openai_api/test_chat_completion.py:547: AssertionError"
       },
       "teardown": {
-        "duration": 0.00020562508143484592,
+        "duration": 0.00035398639738559723,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]",
-      "lineno": 450,
+      "lineno": 471,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]",
@@ -3084,39 +3101,39 @@
         "case_id": "text_then_weather_tool"
       },
       "setup": {
-        "duration": 0.007338125025853515,
+        "duration": 0.07186305243521929,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.4175920831039548,
+        "duration": 1.8766405330970883,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 587,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 688,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 485,
+            "lineno": 506,
             "message": ""
           },
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 587,
+            "lineno": 688,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x10e4ed870>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:485: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x10e1f42b0>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:587: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f42743e54b0>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f42742f0820>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
       },
       "teardown": {
-        "duration": 0.00023462506942451,
+        "duration": 0.0003088880330324173,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]",
-      "lineno": 450,
+      "lineno": 471,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]",
@@ -3135,39 +3152,39 @@
         "case_id": "weather_tool_then_text"
       },
       "setup": {
-        "duration": 0.007788832997903228,
+        "duration": 0.0846314700320363,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.45610866602510214,
+        "duration": 0.40889575984328985,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 587,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 688,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 485,
+            "lineno": 506,
             "message": ""
           },
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 587,
+            "lineno": 688,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]>>\nopenai_client = <openai.OpenAI object at 0x10eb5d960>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:485: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x10e446e00>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:587: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]>>\nopenai_client = <openai.OpenAI object at 0x7f42742f2bc0>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f42740fd270>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
       },
       "teardown": {
-        "duration": 0.00021450011990964413,
+        "duration": 0.0003652172163128853,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]",
-      "lineno": 450,
+      "lineno": 471,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]",
@@ -3186,39 +3203,39 @@
         "case_id": "add_product_tool"
       },
       "setup": {
-        "duration": 0.006751166889443994,
+        "duration": 0.07273881137371063,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.7053082089405507,
+        "duration": 2.251293654553592,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 587,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 688,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 485,
+            "lineno": 506,
             "message": ""
           },
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 587,
+            "lineno": 688,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]>>\nopenai_client = <openai.OpenAI object at 0x10e19ba30>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:485: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x10e10f0a0>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:587: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f427420eda0>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f4273f940a0>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
       },
       "teardown": {
-        "duration": 0.00021783309057354927,
+        "duration": 0.00030664633959531784,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]",
-      "lineno": 450,
+      "lineno": 471,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]",
@@ -3237,39 +3254,39 @@
         "case_id": "get_then_create_event_tool"
       },
       "setup": {
-        "duration": 0.008729791967198253,
+        "duration": 0.071181770414114,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.5665898330044001,
+        "duration": 0.5708655547350645,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 587,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 688,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 485,
+            "lineno": 506,
             "message": ""
           },
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 587,
+            "lineno": 688,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]>>\nopenai_client = <openai.OpenAI object at 0x10e18bb50>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:485: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x107b3de10>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:587: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f42740fc910>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f4273f82b90>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
       },
       "teardown": {
-        "duration": 0.0002288338728249073,
+        "duration": 0.00036500580608844757,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]",
-      "lineno": 450,
+      "lineno": 471,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]",
@@ -3288,39 +3305,39 @@
         "case_id": "compare_monthly_expense_tool"
       },
       "setup": {
-        "duration": 0.009526000125333667,
+        "duration": 0.06934114638715982,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.1714977910742164,
+        "duration": 0.5055103581398726,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 587,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 688,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 485,
+            "lineno": 506,
             "message": ""
           },
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 587,
+            "lineno": 688,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]>>\nopenai_client = <openai.OpenAI object at 0x107a41b70>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:485: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x10e48f5b0>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:587: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f427410dea0>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f427430c580>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
       },
       "teardown": {
-        "duration": 0.00032483390532433987,
+        "duration": 0.00035354867577552795,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]",
-      "lineno": 450,
+      "lineno": 471,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]",
@@ -3339,39 +3356,39 @@
         "case_id": "text_then_weather_tool"
       },
       "setup": {
-        "duration": 0.010107750073075294,
+        "duration": 0.07129869516938925,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.26202141703106463,
+        "duration": 1.5799349313601851,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 587,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 688,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 485,
+            "lineno": 506,
             "message": ""
           },
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 587,
+            "lineno": 688,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x10e18af20>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:485: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x10e1a0ca0>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:587: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f427410c580>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f427417b3a0>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
       },
       "teardown": {
-        "duration": 0.00022558285854756832,
+        "duration": 0.00033699069172143936,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]",
-      "lineno": 450,
+      "lineno": 471,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]",
@@ -3390,39 +3407,39 @@
         "case_id": "weather_tool_then_text"
       },
       "setup": {
-        "duration": 0.008256082888692617,
+        "duration": 0.07074506860226393,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.3466235001105815,
+        "duration": 0.5245106862857938,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 587,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 688,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 485,
+            "lineno": 506,
             "message": ""
           },
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 587,
+            "lineno": 688,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]>>\nopenai_client = <openai.OpenAI object at 0x10e46eb00>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:485: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x10eb2b640>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:587: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]>>\nopenai_client = <openai.OpenAI object at 0x7f427430e590>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f4274268a90>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
       },
       "teardown": {
-        "duration": 0.000535458093509078,
+        "duration": 0.00042015407234430313,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]",
-      "lineno": 450,
+      "lineno": 471,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]",
@@ -3441,39 +3458,39 @@
         "case_id": "add_product_tool"
       },
       "setup": {
-        "duration": 0.0180504999589175,
+        "duration": 0.07020766660571098,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.8803812500555068,
+        "duration": 0.6389470677822828,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 587,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 688,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 485,
+            "lineno": 506,
             "message": ""
           },
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 587,
+            "lineno": 688,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]>>\nopenai_client = <openai.OpenAI object at 0x10e1dce50>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:485: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x10e45eb00>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:587: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f42741784f0>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f4274254bb0>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
       },
       "teardown": {
-        "duration": 0.00025062495842576027,
+        "duration": 0.00035757478326559067,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]",
-      "lineno": 450,
+      "lineno": 471,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]",
@@ -3492,39 +3509,39 @@
         "case_id": "get_then_create_event_tool"
       },
       "setup": {
-        "duration": 0.00993091706186533,
+        "duration": 0.07121358439326286,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.5258524999953806,
+        "duration": 0.5222592242062092,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 587,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 688,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 485,
+            "lineno": 506,
             "message": ""
           },
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 587,
+            "lineno": 688,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]>>\nopenai_client = <openai.OpenAI object at 0x10e1dcaf0>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:485: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x10e480790>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:587: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f42741e8ca0>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f427416c6a0>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
       },
       "teardown": {
-        "duration": 0.0002823749091476202,
+        "duration": 0.0003436664119362831,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]",
-      "lineno": 450,
+      "lineno": 471,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]",
@@ -3543,36 +3560,262 @@
         "case_id": "compare_monthly_expense_tool"
       },
       "setup": {
-        "duration": 0.047535917023196816,
+        "duration": 0.07017400953918695,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.4426498331595212,
+        "duration": 1.7245550760999322,
         "outcome": "failed",
         "crash": {
-          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 587,
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 688,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 485,
+            "lineno": 506,
             "message": ""
           },
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 587,
+            "lineno": 688,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]>>\nopenai_client = <openai.OpenAI object at 0x10e1dc310>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:485: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x10e459ab0>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:587: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f4274256b90>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f427415f0a0>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
       },
       "teardown": {
-        "duration": 0.0010368749499320984,
+        "duration": 0.0003162780776619911,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=False]",
+      "lineno": 554,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_multi_turn_multiple_images[meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=False]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=False",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+        "case_id": "stream=False"
+      },
+      "setup": {
+        "duration": 0.07253758516162634,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.00021537486463785172,
+        "outcome": "skipped",
+        "longrepr": "('/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 561, 'Skipped: Skipping test_chat_multi_turn_multiple_images for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')"
+      },
+      "teardown": {
+        "duration": 0.0004162406548857689,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=True]",
+      "lineno": 554,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_multi_turn_multiple_images[meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=True]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=True",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+        "case_id": "stream=True"
+      },
+      "setup": {
+        "duration": 0.07268107868731022,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.0002132616937160492,
+        "outcome": "skipped",
+        "longrepr": "('/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 561, 'Skipped: Skipping test_chat_multi_turn_multiple_images for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')"
+      },
+      "teardown": {
+        "duration": 0.00021094270050525665,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=False]",
+      "lineno": 554,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=False]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=False",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "stream=False"
+      },
+      "setup": {
+        "duration": 0.07398672867566347,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 4.383559702895582,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002781357616186142,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=True]",
+      "lineno": 554,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=True]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=True",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "stream=True"
+      },
+      "setup": {
+        "duration": 0.08006586041301489,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 2.16784877050668,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 596,
+          "message": "IndexError: list index out of range"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 596,
+            "message": "IndexError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=True]>>\nopenai_client = <openai.OpenAI object at 0x7f427416c490>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\nmulti_image_data = ['data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAIBAQEBAQIBAQECAgICAgQDAgICAgUEBAMEBgUGBgYFBgYGBwkIBgcJBwYGC...6pH9jaTzNv7vfRRXzubfxj9f8Pv8AkTz/AMX/ALbEz5Ly38lfMk/5Z/u64PxhqEZh+z/6rzvn2UUV5EvgPuzy/wAc6p5dt5ccibJpNkkdFFFec27mZ//Z']\nstream = True\n\n    @pytest.mark.parametrize(\"stream\", [False, True], ids=[\"stream=False\", \"stream=True\"])\n    def test_chat_multi_turn_multiple_images(\n        request, openai_client, model, provider, verification_config, multi_image_data, stream\n    ):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages_turn1 = [\n            {\n                \"role\": \"user\",\n                \"content\": [\n                    {\n                        \"type\": \"image_url\",\n                        \"image_url\": {\n                            \"url\": multi_image_data[0],\n                        },\n                    },\n                    {\n                        \"type\": \"image_url\",\n                        \"image_url\": {\n                            \"url\": multi_image_data[1],\n                        },\n                    },\n                    {\n                        \"type\": \"text\",\n                        \"text\": \"What furniture is in the first image that is not in the second image?\",\n                    },\n                ],\n            },\n        ]\n    \n        # First API call\n        response1 = openai_client.chat.completions.create(\n            model=model,\n            messages=messages_turn1,\n            stream=stream,\n        )\n        if stream:\n            message_content1 = \"\"\n            for chunk in response1:\n>               message_content1 += chunk.choices[0].delta.content or \"\"\nE               IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:596: IndexError"
+      },
+      "teardown": {
+        "duration": 0.0003619194030761719,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=False]",
+      "lineno": 554,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=False]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=False",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+        "case_id": "stream=False"
+      },
+      "setup": {
+        "duration": 0.0709412069991231,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 6.110534753650427,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002450142055749893,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=True]",
+      "lineno": 554,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=True]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=True",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+        "case_id": "stream=True"
+      },
+      "setup": {
+        "duration": 0.0725309094414115,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 2.291131243109703,
+        "outcome": "failed",
+        "crash": {
+          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 596,
+          "message": "IndexError: list index out of range"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 596,
+            "message": "IndexError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=True]>>\nopenai_client = <openai.OpenAI object at 0x7f42740eb0d0>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\nmulti_image_data = ['data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAIBAQEBAQIBAQECAgICAgQDAgICAgUEBAMEBgUGBgYFBgYGBwkIBgcJBwYGC...6pH9jaTzNv7vfRRXzubfxj9f8Pv8AkTz/AMX/ALbEz5Ly38lfMk/5Z/u64PxhqEZh+z/6rzvn2UUV5EvgPuzy/wAc6p5dt5ccibJpNkkdFFFec27mZ//Z']\nstream = True\n\n    @pytest.mark.parametrize(\"stream\", [False, True], ids=[\"stream=False\", \"stream=True\"])\n    def test_chat_multi_turn_multiple_images(\n        request, openai_client, model, provider, verification_config, multi_image_data, stream\n    ):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages_turn1 = [\n            {\n                \"role\": \"user\",\n                \"content\": [\n                    {\n                        \"type\": \"image_url\",\n                        \"image_url\": {\n                            \"url\": multi_image_data[0],\n                        },\n                    },\n                    {\n                        \"type\": \"image_url\",\n                        \"image_url\": {\n                            \"url\": multi_image_data[1],\n                        },\n                    },\n                    {\n                        \"type\": \"text\",\n                        \"text\": \"What furniture is in the first image that is not in the second image?\",\n                    },\n                ],\n            },\n        ]\n    \n        # First API call\n        response1 = openai_client.chat.completions.create(\n            model=model,\n            messages=messages_turn1,\n            stream=stream,\n        )\n        if stream:\n            message_content1 = \"\"\n            for chunk in response1:\n>               message_content1 += chunk.choices[0].delta.content or \"\"\nE               IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:596: IndexError"
+      },
+      "teardown": {
+        "duration": 0.0018906639888882637,
         "outcome": "passed"
       }
     }
   ],
-  "run_timestamp": 1744679294
+  "run_timestamp": 1744918065
 }
diff --git a/uv.lock b/uv.lock
index 97dc37693..78e6c56f4 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,4 +1,5 @@
 version = 1
+revision = 1
 requires-python = ">=3.10"
 resolution-markers = [
     "(python_full_version < '3.11' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux')",
@@ -956,11 +957,11 @@ wheels = [
 
 [[package]]
 name = "h11"
-version = "0.14.0"
+version = "0.16.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/f5/38/3af3d3633a34a3316095b39c8e8fb4853a28a536e55d347bd8d8e9a14b03/h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d", size = 100418 }
+sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/95/04/ff642e65ad6b90db43e668d70ffb6736436c7ce41fcc549f4e9472234127/h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761", size = 58259 },
+    { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515 },
 ]
 
 [[package]]
@@ -987,15 +988,15 @@ wheels = [
 
 [[package]]
 name = "httpcore"
-version = "1.0.7"
+version = "1.0.9"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "certifi" },
     { name = "h11" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/6a/41/d7d0a89eb493922c37d343b607bc1b5da7f5be7e383740b4753ad8943e90/httpcore-1.0.7.tar.gz", hash = "sha256:8551cb62a169ec7162ac7be8d4817d561f60e08eaa485234898414bb5a8a0b4c", size = 85196 }
+sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/87/f5/72347bc88306acb359581ac4d52f23c0ef445b57157adedb9aee0cd689d2/httpcore-1.0.7-py3-none-any.whl", hash = "sha256:a3fff8f43dc260d5bd363d9f9cf1830fa3a458b332856f34282de498ed420edd", size = 78551 },
+    { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784 },
 ]
 
 [[package]]
@@ -1378,6 +1379,7 @@ source = { editable = "." }
 dependencies = [
     { name = "blobfile" },
     { name = "fire" },
+    { name = "h11" },
     { name = "httpx" },
     { name = "huggingface-hub" },
     { name = "jinja2" },
@@ -1410,6 +1412,7 @@ dev = [
     { name = "pytest-asyncio" },
     { name = "pytest-cov" },
     { name = "pytest-html" },
+    { name = "pytest-json-report" },
     { name = "ruamel-yaml" },
     { name = "ruff" },
     { name = "types-requests" },
@@ -1456,6 +1459,7 @@ unit = [
     { name = "aiosqlite" },
     { name = "chardet" },
     { name = "openai" },
+    { name = "opentelemetry-exporter-otlp-proto-http" },
     { name = "pypdf" },
     { name = "qdrant-client" },
     { name = "sqlite-vec" },
@@ -1475,6 +1479,7 @@ requires-dist = [
     { name = "datasets", marker = "extra == 'test'" },
     { name = "fastapi", marker = "extra == 'dev'" },
     { name = "fire" },
+    { name = "h11", specifier = ">=0.16.0" },
     { name = "httpx" },
     { name = "huggingface-hub" },
     { name = "jinja2", specifier = ">=3.1.6" },
@@ -1489,6 +1494,7 @@ requires-dist = [
     { name = "openai", marker = "extra == 'test'" },
     { name = "openai", marker = "extra == 'unit'" },
     { name = "opentelemetry-exporter-otlp-proto-http", marker = "extra == 'test'" },
+    { name = "opentelemetry-exporter-otlp-proto-http", marker = "extra == 'unit'" },
     { name = "opentelemetry-sdk", marker = "extra == 'test'" },
     { name = "pandas", marker = "extra == 'ui'" },
     { name = "pillow" },
@@ -1502,6 +1508,7 @@ requires-dist = [
     { name = "pytest-asyncio", marker = "extra == 'dev'" },
     { name = "pytest-cov", marker = "extra == 'dev'" },
     { name = "pytest-html", marker = "extra == 'dev'" },
+    { name = "pytest-json-report", marker = "extra == 'dev'" },
     { name = "python-dotenv" },
     { name = "qdrant-client", marker = "extra == 'unit'" },
     { name = "requests" },
@@ -1531,6 +1538,7 @@ requires-dist = [
     { name = "types-setuptools", marker = "extra == 'dev'" },
     { name = "uvicorn", marker = "extra == 'dev'" },
 ]
+provides-extras = ["dev", "unit", "test", "docs", "codegen", "ui"]
 
 [[package]]
 name = "llama-stack-client"
@@ -2740,6 +2748,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c8/c7/c160021cbecd956cc1a6f79e5fe155f7868b2e5b848f1320dad0b3e3122f/pytest_html-4.1.1-py3-none-any.whl", hash = "sha256:c8152cea03bd4e9bee6d525573b67bbc6622967b72b9628dda0ea3e2a0b5dd71", size = 23491 },
 ]
 
+[[package]]
+name = "pytest-json-report"
+version = "1.5.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pytest" },
+    { name = "pytest-metadata" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/4f/d3/765dae9712fcd68d820338908c1337e077d5fdadccd5cacf95b9b0bea278/pytest-json-report-1.5.0.tar.gz", hash = "sha256:2dde3c647851a19b5f3700729e8310a6e66efb2077d674f27ddea3d34dc615de", size = 21241 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/81/35/d07400c715bf8a88aa0c1ee9c9eb6050ca7fe5b39981f0eea773feeb0681/pytest_json_report-1.5.0-py3-none-any.whl", hash = "sha256:9897b68c910b12a2e48dd849f9a284b2c79a732a8a9cb398452ddd23d3c8c325", size = 13222 },
+]
+
 [[package]]
 name = "pytest-metadata"
 version = "3.1.1"