From 7d9adf22adc14a5ce5a639f0b6cbd3ba269e7ca1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Han?= <seb@redhat.com>
Date: Wed, 9 Apr 2025 03:54:00 +0200
Subject: [PATCH 01/21] refactor: move missing tests to test directory (#1892)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move the test_context.py under the main tests directory, and fix the
code.

The problem was that the function captures the initial values of the
context variables and then restores those same initial values before
each iteration. This means that any modifications made to the context
variables during iteration are lost when the next iteration starts.

Error was:

```
====================================================== FAILURES =======================================================
______________________________________ test_preserve_contexts_across_event_loops ______________________________________

    @pytest.mark.asyncio
    async def test_preserve_contexts_across_event_loops():
        """
        Test that context variables are preserved across event loop boundaries with nested generators.
        This simulates the real-world scenario where:
        1. A new event loop is created for each streaming request
        2. The async generator runs inside that loop
        3. There are multiple levels of nested generators
        4. Context needs to be preserved across these boundaries
        """
        # Create context variables
        request_id = ContextVar("request_id", default=None)
        user_id = ContextVar("user_id", default=None)

        # Set initial values

        # Results container to verify values across thread boundaries
        results = []

        # Inner-most generator (level 2)
        async def inner_generator():
            # Should have the context from the outer scope
            yield (1, request_id.get(), user_id.get())

            # Modify one context variable
            user_id.set("user-modified")

            # Should reflect the modification
            yield (2, request_id.get(), user_id.get())

        # Middle generator (level 1)
        async def middle_generator():
            inner_gen = inner_generator()

            # Forward the first yield from inner
            item = await inner_gen.__anext__()
            yield item

            # Forward the second yield from inner
            item = await inner_gen.__anext__()
            yield item

            request_id.set("req-modified")

            # Add our own yield with both modified variables
            yield (3, request_id.get(), user_id.get())

        # Function to run in a separate thread with a new event loop
        def run_in_new_loop():
            # Create a new event loop for this thread
            loop = asyncio.new_event_loop()
            asyncio.set_event_loop(loop)

            try:
                # Outer generator (runs in the new loop)
                async def outer_generator():
                    request_id.set("req-12345")
                    user_id.set("user-6789")
                    # Wrap the middle generator
                    wrapped_gen = preserve_contexts_async_generator(middle_generator(), [request_id, user_id])

                    # Process all items from the middle generator
                    async for item in wrapped_gen:
                        # Store results for verification
                        results.append(item)

                # Run the outer generator in the new loop
                loop.run_until_complete(outer_generator())
            finally:
                loop.close()

        # Run the generator chain in a separate thread with a new event loop
        with ThreadPoolExecutor(max_workers=1) as executor:
            future = executor.submit(run_in_new_loop)
            future.result()  # Wait for completion

        # Verify the results
        assert len(results) == 3

        # First yield should have original values
        assert results[0] == (1, "req-12345", "user-6789")

        # Second yield should have modified user_id
        assert results[1] == (2, "req-12345", "user-modified")

        # Third yield should have both modified values
>       assert results[2] == (3, "req-modified", "user-modified")
E       AssertionError: assert (3, 'req-modified', 'user-6789') == (3, 'req-modified', 'user-modified')
E
E         At index 2 diff: 'user-6789' != 'user-modified'
E
E         Full diff:
E           (
E               3,
E               'req-modified',
E         -     'user-modified',
E         +     'user-6789',
E           )

tests/unit/distribution/test_context.py:155: AssertionError
-------------------------------------------------- Captured log call --------------------------------------------------
ERROR    asyncio:base_events.py:1758 Task was destroyed but it is pending!
task: <Task pending name='Task-7' coro=<<async_generator_athrow without __name__>()>>
================================================== warnings summary ===================================================
.venv/lib/python3.10/site-packages/pydantic/fields.py:1042
  /Users/leseb/Documents/AI/llama-stack/.venv/lib/python3.10/site-packages/pydantic/fields.py:1042: PydanticDeprecatedSince20: Using extra keyword arguments on `Field` is deprecated and will be removed. Use `json_schema_extra` instead. (Extra keys: 'contentEncoding'). Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
    warn(

-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
=============================================== short test summary info ===============================================
FAILED tests/unit/distribution/test_context.py::test_preserve_contexts_across_event_loops - AssertionError: assert (3, 'req-modified', 'user-6789') == (3, 'req-modified', 'user-modified')

  At index 2 diff: 'user-6789' != 'user-modified'

  Full diff:
    (
        3,
        'req-modified',
  -     'user-modified',
  +     'user-6789',
    )
```

[//]: # (## Documentation)

Signed-off-by: Sébastien Han <seb@redhat.com>
---
 llama_stack/distribution/utils/context.py                    | 5 +++++
 .../utils/tests => tests/unit/distribution}/test_context.py  | 0
 2 files changed, 5 insertions(+)
 rename {llama_stack/distribution/utils/tests => tests/unit/distribution}/test_context.py (100%)

diff --git a/llama_stack/distribution/utils/context.py b/llama_stack/distribution/utils/context.py
index fcc72161d..c34079ac6 100644
--- a/llama_stack/distribution/utils/context.py
+++ b/llama_stack/distribution/utils/context.py
@@ -29,6 +29,11 @@ def preserve_contexts_async_generator(
                     context_var.set(initial_context_values[context_var.name])
 
                 item = await gen.__anext__()
+
+                # Update our tracked values with any changes made during this iteration
+                for context_var in context_vars:
+                    initial_context_values[context_var.name] = context_var.get()
+
                 yield item
 
             except StopAsyncIteration:
diff --git a/llama_stack/distribution/utils/tests/test_context.py b/tests/unit/distribution/test_context.py
similarity index 100%
rename from llama_stack/distribution/utils/tests/test_context.py
rename to tests/unit/distribution/test_context.py

From bcbc56baa2368f18823cd507c7d68c6fec5a768a Mon Sep 17 00:00:00 2001
From: ehhuang <ehhuang@users.noreply.github.com>
Date: Tue, 8 Apr 2025 21:21:38 -0700
Subject: [PATCH 02/21] feat: adds test suite to verify provider's OAI compat
 endpoints (#1901)

# What does this PR do?


## Test Plan
pytest verifications/openai/test_chat_completion.py --provider together
---
 tests/verifications/README.md                 |   65 +
 tests/verifications/REPORT.md                 |   88 +
 tests/verifications/__init__.py               |    5 +
 tests/verifications/conftest.py               |   28 +
 tests/verifications/generate_report.py        |  485 +++
 tests/verifications/openai/__init__.py        |    5 +
 .../verifications/openai/fixtures/__init__.py |    5 +
 .../verifications/openai/fixtures/fixtures.py |   97 +
 tests/verifications/openai/fixtures/load.py   |   16 +
 .../fixtures/test_cases/chat_completion.yaml  |  162 +
 .../openai/test_chat_completion.py            |  202 ++
 .../test_results/fireworks_1744154308.json    | 2744 ++++++++++++++++
 .../test_results/openai_1744154522.json       | 2672 ++++++++++++++++
 .../test_results/together_1744154399.json     | 2830 +++++++++++++++++
 14 files changed, 9404 insertions(+)
 create mode 100644 tests/verifications/README.md
 create mode 100644 tests/verifications/REPORT.md
 create mode 100644 tests/verifications/__init__.py
 create mode 100644 tests/verifications/conftest.py
 create mode 100755 tests/verifications/generate_report.py
 create mode 100644 tests/verifications/openai/__init__.py
 create mode 100644 tests/verifications/openai/fixtures/__init__.py
 create mode 100644 tests/verifications/openai/fixtures/fixtures.py
 create mode 100644 tests/verifications/openai/fixtures/load.py
 create mode 100644 tests/verifications/openai/fixtures/test_cases/chat_completion.yaml
 create mode 100644 tests/verifications/openai/test_chat_completion.py
 create mode 100644 tests/verifications/test_results/fireworks_1744154308.json
 create mode 100644 tests/verifications/test_results/openai_1744154522.json
 create mode 100644 tests/verifications/test_results/together_1744154399.json

diff --git a/tests/verifications/README.md b/tests/verifications/README.md
new file mode 100644
index 000000000..986ff1087
--- /dev/null
+++ b/tests/verifications/README.md
@@ -0,0 +1,65 @@
+# Llama Stack Verifications
+
+Llama Stack Verifications provide standardized test suites to ensure API compatibility and behavior consistency across different LLM providers. These tests help verify that different models and providers implement the expected interfaces and behaviors correctly.
+
+## Overview
+
+This framework allows you to run the same set of verification tests against different LLM providers'  OpenAI-compatible endpoints (Fireworks, Together, Groq, Cerebras, etc., and OpenAI itself) to ensure they meet the expected behavior and interface standards.
+
+## Features
+
+The verification suite currently tests:
+
+- Basic chat completions (streaming and non-streaming)
+- Image input capabilities
+- Structured JSON output formatting
+- Tool calling functionality
+
+## Running Tests
+
+To run the verification tests, use pytest with the following parameters:
+
+```bash
+cd llama-stack
+pytest tests/verifications/openai --provider=<provider-name>
+```
+
+Example:
+```bash
+# Run all tests
+pytest tests/verifications/openai --provider=together
+
+# Only run tests with Llama 4 models
+pytest tests/verifications/openai --provider=together -k 'Llama-4'
+```
+
+### Parameters
+
+- `--provider`: The provider name (openai, fireworks, together, groq, cerebras, etc.)
+- `--base-url`: The base URL for the provider's API (optional - defaults to the standard URL for the specified provider)
+- `--api-key`: Your API key for the provider (optional - defaults to the standard API_KEY name for the specified provider)
+
+## Supported Providers
+
+The verification suite currently supports:
+- OpenAI
+- Fireworks
+- Together
+- Groq
+- Cerebras
+
+## Adding New Test Cases
+
+To add new test cases, create appropriate JSON files in the `openai/fixtures/test_cases/` directory following the existing patterns.
+
+
+## Structure
+
+- `__init__.py` - Marks the directory as a Python package
+- `conftest.py` - Global pytest configuration and fixtures
+- `openai/` - Tests specific to OpenAI-compatible APIs
+  - `fixtures/` - Test fixtures and utilities
+    - `fixtures.py` - Provider-specific fixtures
+    - `load.py` - Utilities for loading test cases
+    - `test_cases/` - JSON test case definitions
+  - `test_chat_completion.py` - Tests for chat completion APIs
diff --git a/tests/verifications/REPORT.md b/tests/verifications/REPORT.md
new file mode 100644
index 000000000..d5715ae21
--- /dev/null
+++ b/tests/verifications/REPORT.md
@@ -0,0 +1,88 @@
+# Test Results Report
+
+*Generated on: 2025-04-08 21:14:02*
+
+*This report was generated by running `python tests/verifications/generate_report.py`*
+
+## Legend
+
+- ✅ - Test passed
+- ❌ - Test failed
+- ⚪ - Test not applicable or not run for this model
+
+
+## Summary
+
+| Provider | Pass Rate | Tests Passed | Total Tests |
+| --- | --- | --- | --- |
+| Together | 67.7% | 21 | 31 |
+| Fireworks | 90.3% | 28 | 31 |
+| Openai | 100.0% | 22 | 22 |
+
+
+
+## Together
+
+*Tests run on: 2025-04-08 16:19:59*
+
+```bash
+pytest tests/verifications/openai/test_chat_completion.py --provider=together -v
+```
+
+| Test | Llama-3.3-70B-Instruct | Llama-4-Maverick-17B-128E-Instruct | Llama-4-Scout-17B-16E-Instruct |
+| --- | --- | --- | --- |
+| test_chat_non_streaming_basic (case 0) | ✅ | ✅ | ✅ |
+| test_chat_non_streaming_basic (case 1) | ✅ | ✅ | ✅ |
+| test_chat_non_streaming_image (case 0) | ⚪ | ✅ | ✅ |
+| test_chat_non_streaming_structured_output (case 0) | ✅ | ✅ | ✅ |
+| test_chat_non_streaming_structured_output (case 1) | ✅ | ✅ | ✅ |
+| test_chat_non_streaming_tool_calling (case 0) | ✅ | ✅ | ✅ |
+| test_chat_streaming_basic (case 0) | ✅ | ❌ | ❌ |
+| test_chat_streaming_basic (case 1) | ✅ | ❌ | ❌ |
+| test_chat_streaming_image (case 0) | ⚪ | ❌ | ❌ |
+| test_chat_streaming_structured_output (case 0) | ✅ | ❌ | ❌ |
+| test_chat_streaming_structured_output (case 1) | ✅ | ❌ | ❌ |
+
+## Fireworks
+
+*Tests run on: 2025-04-08 16:18:28*
+
+```bash
+pytest tests/verifications/openai/test_chat_completion.py --provider=fireworks -v
+```
+
+| Test | Llama-3.3-70B-Instruct | Llama-4-Maverick-17B-128E-Instruct | Llama-4-Scout-17B-16E-Instruct |
+| --- | --- | --- | --- |
+| test_chat_non_streaming_basic (case 0) | ✅ | ✅ | ✅ |
+| test_chat_non_streaming_basic (case 1) | ✅ | ✅ | ✅ |
+| test_chat_non_streaming_image (case 0) | ⚪ | ✅ | ✅ |
+| test_chat_non_streaming_structured_output (case 0) | ✅ | ✅ | ✅ |
+| test_chat_non_streaming_structured_output (case 1) | ✅ | ✅ | ✅ |
+| test_chat_non_streaming_tool_calling (case 0) | ✅ | ❌ | ❌ |
+| test_chat_streaming_basic (case 0) | ✅ | ✅ | ✅ |
+| test_chat_streaming_basic (case 1) | ✅ | ✅ | ✅ |
+| test_chat_streaming_image (case 0) | ⚪ | ✅ | ✅ |
+| test_chat_streaming_structured_output (case 0) | ✅ | ✅ | ✅ |
+| test_chat_streaming_structured_output (case 1) | ❌ | ✅ | ✅ |
+
+## Openai
+
+*Tests run on: 2025-04-08 16:22:02*
+
+```bash
+pytest tests/verifications/openai/test_chat_completion.py --provider=openai -v
+```
+
+| Test | gpt-4o | gpt-4o-mini |
+| --- | --- | --- |
+| test_chat_non_streaming_basic (case 0) | ✅ | ✅ |
+| test_chat_non_streaming_basic (case 1) | ✅ | ✅ |
+| test_chat_non_streaming_image (case 0) | ✅ | ✅ |
+| test_chat_non_streaming_structured_output (case 0) | ✅ | ✅ |
+| test_chat_non_streaming_structured_output (case 1) | ✅ | ✅ |
+| test_chat_non_streaming_tool_calling (case 0) | ✅ | ✅ |
+| test_chat_streaming_basic (case 0) | ✅ | ✅ |
+| test_chat_streaming_basic (case 1) | ✅ | ✅ |
+| test_chat_streaming_image (case 0) | ✅ | ✅ |
+| test_chat_streaming_structured_output (case 0) | ✅ | ✅ |
+| test_chat_streaming_structured_output (case 1) | ✅ | ✅ |
diff --git a/tests/verifications/__init__.py b/tests/verifications/__init__.py
new file mode 100644
index 000000000..756f351d8
--- /dev/null
+++ b/tests/verifications/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
diff --git a/tests/verifications/conftest.py b/tests/verifications/conftest.py
new file mode 100644
index 000000000..08967e834
--- /dev/null
+++ b/tests/verifications/conftest.py
@@ -0,0 +1,28 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+
+def pytest_addoption(parser):
+    parser.addoption(
+        "--base-url",
+        action="store",
+        help="Base URL for OpenAI compatible API",
+    )
+    parser.addoption(
+        "--api-key",
+        action="store",
+        help="API key",
+    )
+    parser.addoption(
+        "--provider",
+        action="store",
+        help="Provider to use for testing",
+    )
+
+
+pytest_plugins = [
+    "tests.verifications.openai.fixtures.fixtures",
+]
diff --git a/tests/verifications/generate_report.py b/tests/verifications/generate_report.py
new file mode 100755
index 000000000..98a5930da
--- /dev/null
+++ b/tests/verifications/generate_report.py
@@ -0,0 +1,485 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""
+Test Report Generator
+
+Requirements:
+    pip install pytest-json-report
+
+Usage:
+    # Generate a report using existing test results
+    python tests/verifications/generate_report.py
+
+    # Run tests and generate a report
+    python tests/verifications/generate_report.py --run-tests
+
+    # Run tests for specific providers
+    python tests/verifications/generate_report.py --run-tests --providers fireworks openai
+
+    # Save the report to a custom location
+    python tests/verifications/generate_report.py --output custom_report.md
+
+    # Clean up old test result files
+    python tests/verifications/generate_report.py --cleanup
+"""
+
+import argparse
+import json
+import os
+import re
+import subprocess
+import time
+from collections import defaultdict
+from pathlib import Path
+
+# Define the root directory for test results
+RESULTS_DIR = Path(__file__).parent / "test_results"
+RESULTS_DIR.mkdir(exist_ok=True)
+
+# Maximum number of test result files to keep per provider
+MAX_RESULTS_PER_PROVIDER = 1
+
+# Custom order of providers
+PROVIDER_ORDER = ["together", "fireworks", "groq", "cerebras", "openai"]
+
+# Dictionary to store providers and their models (will be populated dynamically)
+PROVIDERS = defaultdict(set)
+
+# Tests will be dynamically extracted from results
+ALL_TESTS = set()
+
+
+def run_tests(provider):
+    """Run pytest for a specific provider and save results"""
+    print(f"Running tests for provider: {provider}")
+
+    timestamp = int(time.time())
+    result_file = RESULTS_DIR / f"{provider}_{timestamp}.json"
+    temp_json_file = RESULTS_DIR / f"temp_{provider}_{timestamp}.json"
+
+    # Run pytest with JSON output
+    cmd = [
+        "python",
+        "-m",
+        "pytest",
+        "tests/verifications/openai/test_chat_completion.py",
+        f"--provider={provider}",
+        "-v",
+        "--json-report",
+        f"--json-report-file={temp_json_file}",
+    ]
+
+    try:
+        result = subprocess.run(cmd, capture_output=True, text=True)
+        print(f"Pytest exit code: {result.returncode}")
+
+        # Check if the JSON file was created
+        if temp_json_file.exists():
+            # Read the JSON file and save it to our results format
+            with open(temp_json_file, "r") as f:
+                test_results = json.load(f)
+
+            # Save results to our own format with a trailing newline
+            with open(result_file, "w") as f:
+                json.dump(test_results, f, indent=2)
+                f.write("\n")  # Add a trailing newline for precommit
+
+            # Clean up temp file
+            temp_json_file.unlink()
+
+            print(f"Test results saved to {result_file}")
+            return result_file
+        else:
+            print(f"Error: JSON report file not created for {provider}")
+            print(f"Command stdout: {result.stdout}")
+            print(f"Command stderr: {result.stderr}")
+            return None
+    except Exception as e:
+        print(f"Error running tests for {provider}: {e}")
+        return None
+
+
+def parse_results(result_file):
+    """Parse the test results file and extract pass/fail by model and test"""
+    if not os.path.exists(result_file):
+        print(f"Results file does not exist: {result_file}")
+        return {}
+
+    with open(result_file, "r") as f:
+        results = json.load(f)
+
+    # Initialize results dictionary
+    parsed_results = defaultdict(lambda: defaultdict(dict))
+    provider = os.path.basename(result_file).split("_")[0]
+
+    # Debug: Print summary of test results
+    print(f"Test results summary for {provider}:")
+    print(f"Total tests: {results.get('summary', {}).get('total', 0)}")
+    print(f"Passed: {results.get('summary', {}).get('passed', 0)}")
+    print(f"Failed: {results.get('summary', {}).get('failed', 0)}")
+    print(f"Error: {results.get('summary', {}).get('error', 0)}")
+    print(f"Skipped: {results.get('summary', {}).get('skipped', 0)}")
+
+    # Extract test results
+    if "tests" not in results or not results["tests"]:
+        print(f"No test results found in {result_file}")
+        return parsed_results
+
+    # Map for normalizing model names
+    model_name_map = {
+        "Llama-3.3-8B-Instruct": "Llama-3.3-8B-Instruct",
+        "Llama-3.3-70B-Instruct": "Llama-3.3-70B-Instruct",
+        "Llama-3.2-11B-Vision-Instruct": "Llama-3.2-11B-Vision-Instruct",
+        "Llama-4-Scout-17B-16E": "Llama-4-Scout-17B-16E-Instruct",
+        "Llama-4-Scout-17B-16E-Instruct": "Llama-4-Scout-17B-16E-Instruct",
+        "Llama-4-Maverick-17B-128E": "Llama-4-Maverick-17B-128E-Instruct",
+        "Llama-4-Maverick-17B-128E-Instruct": "Llama-4-Maverick-17B-128E-Instruct",
+        "gpt-4o": "gpt-4o",
+        "gpt-4o-mini": "gpt-4o-mini",
+    }
+
+    # Keep track of all models found for this provider
+    provider_models = set()
+
+    # Track all unique test cases for each base test
+    test_case_counts = defaultdict(int)
+
+    # First pass: count the number of cases for each test
+    for test in results["tests"]:
+        test_id = test.get("nodeid", "")
+
+        if "call" in test:
+            test_name = test_id.split("::")[1].split("[")[0]
+            input_output_match = re.search(r"\[input_output(\d+)-", test_id)
+            if input_output_match:
+                test_case_counts[test_name] += 1
+
+    # Second pass: process the tests with case numbers only for tests with multiple cases
+    for test in results["tests"]:
+        test_id = test.get("nodeid", "")
+        outcome = test.get("outcome", "")
+
+        # Only process tests that have been executed (not setup errors)
+        if "call" in test:
+            # Regular test that actually ran
+            test_name = test_id.split("::")[1].split("[")[0]
+
+            # Extract input_output parameter to differentiate between test cases
+            input_output_match = re.search(r"\[input_output(\d+)-", test_id)
+            input_output_index = input_output_match.group(1) if input_output_match else ""
+
+            # Create a more detailed test name with case number only if there are multiple cases
+            detailed_test_name = test_name
+            if input_output_index and test_case_counts[test_name] > 1:
+                detailed_test_name = f"{test_name} (case {input_output_index})"
+
+            # Track all unique test names
+            ALL_TESTS.add(detailed_test_name)
+
+            # Extract model name from test_id using a more robust pattern
+            model_match = re.search(r"\[input_output\d+-([^\]]+)\]", test_id)
+            if model_match:
+                raw_model = model_match.group(1)
+                model = model_name_map.get(raw_model, raw_model)
+
+                # Add to set of known models for this provider
+                provider_models.add(model)
+
+                # Also update the global PROVIDERS dictionary
+                PROVIDERS[provider].add(model)
+
+                # Store the result
+                if outcome == "passed":
+                    parsed_results[provider][model][detailed_test_name] = True
+                else:
+                    parsed_results[provider][model][detailed_test_name] = False
+
+                print(f"Parsed test result: {detailed_test_name} for model {model}: {outcome}")
+        elif outcome == "error" and "setup" in test and test.get("setup", {}).get("outcome") == "failed":
+            # This is a setup failure, which likely means a configuration issue
+            # Extract the base test name and model name
+            parts = test_id.split("::")
+            if len(parts) > 1:
+                test_name = parts[1].split("[")[0]
+
+                # Extract input_output parameter to differentiate between test cases
+                input_output_match = re.search(r"\[input_output(\d+)-", test_id)
+                input_output_index = input_output_match.group(1) if input_output_match else ""
+
+                # Create a more detailed test name with case number only if there are multiple cases
+                detailed_test_name = test_name
+                if input_output_index and test_case_counts[test_name] > 1:
+                    detailed_test_name = f"{test_name} (case {input_output_index})"
+
+                if detailed_test_name in ALL_TESTS:
+                    # Use a more robust pattern for model extraction
+                    model_match = re.search(r"\[input_output\d+-([^\]]+)\]", test_id)
+                    if model_match:
+                        raw_model = model_match.group(1)
+                        model = model_name_map.get(raw_model, raw_model)
+
+                        # Add to set of known models for this provider
+                        provider_models.add(model)
+
+                        # Also update the global PROVIDERS dictionary
+                        PROVIDERS[provider].add(model)
+
+                        # Mark setup failures as false (failed)
+                        parsed_results[provider][model][detailed_test_name] = False
+                        print(f"Parsed setup failure: {detailed_test_name} for model {model}")
+
+    # Debug: Print parsed results
+    if not parsed_results[provider]:
+        print(f"Warning: No test results parsed for provider {provider}")
+    else:
+        for model, tests in parsed_results[provider].items():
+            print(f"Model {model}: {len(tests)} test results")
+
+    return parsed_results
+
+
+def cleanup_old_results():
+    """Clean up old test result files, keeping only the newest N per provider"""
+    for provider in PROVIDERS.keys():
+        # Get all result files for this provider
+        provider_files = list(RESULTS_DIR.glob(f"{provider}_*.json"))
+
+        # Sort by timestamp (newest first)
+        provider_files.sort(key=lambda x: int(x.stem.split("_")[1]), reverse=True)
+
+        # Remove old files beyond the max to keep
+        if len(provider_files) > MAX_RESULTS_PER_PROVIDER:
+            for old_file in provider_files[MAX_RESULTS_PER_PROVIDER:]:
+                try:
+                    old_file.unlink()
+                    print(f"Removed old result file: {old_file}")
+                except Exception as e:
+                    print(f"Error removing file {old_file}: {e}")
+
+
+def get_latest_results_by_provider():
+    """Get the latest test result file for each provider"""
+    provider_results = {}
+
+    # Get all result files
+    result_files = list(RESULTS_DIR.glob("*.json"))
+
+    # Extract all provider names from filenames
+    all_providers = set()
+    for file in result_files:
+        # File format is provider_timestamp.json
+        parts = file.stem.split("_")
+        if len(parts) >= 2:
+            all_providers.add(parts[0])
+
+    # Group by provider
+    for provider in all_providers:
+        provider_files = [f for f in result_files if f.name.startswith(f"{provider}_")]
+
+        # Sort by timestamp (newest first)
+        provider_files.sort(key=lambda x: int(x.stem.split("_")[1]), reverse=True)
+
+        if provider_files:
+            provider_results[provider] = provider_files[0]
+
+    return provider_results
+
+
+def generate_report(results_dict, output_file=None):
+    """Generate the markdown report"""
+    if output_file is None:
+        # Default to creating the report in the same directory as this script
+        output_file = Path(__file__).parent / "REPORT.md"
+    else:
+        output_file = Path(output_file)
+
+    # Get the timestamp from result files
+    provider_timestamps = {}
+    provider_results = get_latest_results_by_provider()
+    for provider, result_file in provider_results.items():
+        # Extract timestamp from filename (format: provider_timestamp.json)
+        try:
+            timestamp_str = result_file.stem.split("_")[1]
+            timestamp = int(timestamp_str)
+            formatted_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(timestamp))
+            provider_timestamps[provider] = formatted_time
+        except (IndexError, ValueError):
+            provider_timestamps[provider] = "Unknown"
+
+    # Convert provider model sets to sorted lists
+    for provider in PROVIDERS:
+        PROVIDERS[provider] = sorted(PROVIDERS[provider])
+
+    # Sort tests alphabetically
+    sorted_tests = sorted(ALL_TESTS)
+
+    report = ["# Test Results Report\n"]
+    report.append(f"*Generated on: {time.strftime('%Y-%m-%d %H:%M:%S')}*\n")
+    report.append("*This report was generated by running `python tests/verifications/generate_report.py`*\n")
+
+    # Icons for pass/fail
+    pass_icon = "✅"
+    fail_icon = "❌"
+    na_icon = "⚪"
+
+    # Add emoji legend
+    report.append("## Legend\n")
+    report.append(f"- {pass_icon} - Test passed")
+    report.append(f"- {fail_icon} - Test failed")
+    report.append(f"- {na_icon} - Test not applicable or not run for this model")
+    report.append("\n")
+
+    # Add a summary section
+    report.append("## Summary\n")
+
+    # Count total tests and passes
+    total_tests = 0
+    passed_tests = 0
+    provider_totals = {}
+
+    # Prepare summary data
+    for provider in PROVIDERS.keys():
+        provider_passed = 0
+        provider_total = 0
+
+        if provider in results_dict:
+            provider_models = PROVIDERS[provider]
+            for model in provider_models:
+                if model in results_dict[provider]:
+                    model_results = results_dict[provider][model]
+                    for test in sorted_tests:
+                        if test in model_results:
+                            provider_total += 1
+                            total_tests += 1
+                            if model_results[test]:
+                                provider_passed += 1
+                                passed_tests += 1
+
+        provider_totals[provider] = (provider_passed, provider_total)
+
+    # Add summary table
+    report.append("| Provider | Pass Rate | Tests Passed | Total Tests |")
+    report.append("| --- | --- | --- | --- |")
+
+    # Use the custom order for summary table
+    for provider in [p for p in PROVIDER_ORDER if p in PROVIDERS]:
+        passed, total = provider_totals.get(provider, (0, 0))
+        pass_rate = f"{(passed / total * 100):.1f}%" if total > 0 else "N/A"
+        report.append(f"| {provider.capitalize()} | {pass_rate} | {passed} | {total} |")
+
+    # Add providers not in the custom order
+    for provider in [p for p in PROVIDERS if p not in PROVIDER_ORDER]:
+        passed, total = provider_totals.get(provider, (0, 0))
+        pass_rate = f"{(passed / total * 100):.1f}%" if total > 0 else "N/A"
+        report.append(f"| {provider.capitalize()} | {pass_rate} | {passed} | {total} |")
+
+    report.append("\n")
+
+    # Process each provider in the custom order, then any additional providers
+    for provider in sorted(
+        PROVIDERS.keys(), key=lambda p: (PROVIDER_ORDER.index(p) if p in PROVIDER_ORDER else float("inf"), p)
+    ):
+        if not PROVIDERS[provider]:
+            # Skip providers with no models
+            continue
+
+        report.append(f"\n## {provider.capitalize()}\n")
+
+        # Add timestamp when test was run
+        if provider in provider_timestamps:
+            report.append(f"*Tests run on: {provider_timestamps[provider]}*\n")
+
+        # Add test command for reproducing results
+        test_cmd = f"pytest tests/verifications/openai/test_chat_completion.py --provider={provider} -v"
+        report.append(f"```bash\n{test_cmd}\n```\n")
+
+        # Get the relevant models for this provider
+        provider_models = PROVIDERS[provider]
+
+        # Create table header with models as columns
+        header = "| Test | " + " | ".join(provider_models) + " |"
+        separator = "| --- | " + " | ".join(["---"] * len(provider_models)) + " |"
+
+        report.append(header)
+        report.append(separator)
+
+        # Get results for this provider
+        provider_results = results_dict.get(provider, {})
+
+        # Add rows for each test
+        for test in sorted_tests:
+            row = f"| {test} |"
+
+            # Add results for each model in this test
+            for model in provider_models:
+                if model in provider_results and test in provider_results[model]:
+                    result = pass_icon if provider_results[model][test] else fail_icon
+                else:
+                    result = na_icon
+                row += f" {result} |"
+
+            report.append(row)
+
+    # Write to file
+    with open(output_file, "w") as f:
+        f.write("\n".join(report))
+        f.write("\n")
+
+    print(f"Report generated: {output_file}")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Generate test report")
+    parser.add_argument("--run-tests", action="store_true", help="Run tests before generating report")
+    parser.add_argument(
+        "--providers",
+        type=str,
+        nargs="+",
+        help="Specify providers to test (comma-separated or space-separated, default: all)",
+    )
+    parser.add_argument("--output", type=str, help="Output file location (default: tests/verifications/REPORT.md)")
+    args = parser.parse_args()
+
+    all_results = {}
+
+    if args.run_tests:
+        # Get list of available providers from command line or use detected providers
+        if args.providers:
+            # Handle both comma-separated and space-separated lists
+            test_providers = []
+            for provider_arg in args.providers:
+                # Split by comma if commas are present
+                if "," in provider_arg:
+                    test_providers.extend(provider_arg.split(","))
+                else:
+                    test_providers.append(provider_arg)
+        else:
+            # Default providers to test
+            test_providers = PROVIDER_ORDER
+
+        for provider in test_providers:
+            provider = provider.strip()  # Remove any whitespace
+            result_file = run_tests(provider)
+            if result_file:
+                provider_results = parse_results(result_file)
+                all_results.update(provider_results)
+    else:
+        # Use existing results
+        provider_result_files = get_latest_results_by_provider()
+
+        for result_file in provider_result_files.values():
+            provider_results = parse_results(result_file)
+            all_results.update(provider_results)
+
+    # Generate the report
+    generate_report(all_results, args.output)
+
+    cleanup_old_results()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/verifications/openai/__init__.py b/tests/verifications/openai/__init__.py
new file mode 100644
index 000000000..756f351d8
--- /dev/null
+++ b/tests/verifications/openai/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
diff --git a/tests/verifications/openai/fixtures/__init__.py b/tests/verifications/openai/fixtures/__init__.py
new file mode 100644
index 000000000..756f351d8
--- /dev/null
+++ b/tests/verifications/openai/fixtures/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
diff --git a/tests/verifications/openai/fixtures/fixtures.py b/tests/verifications/openai/fixtures/fixtures.py
new file mode 100644
index 000000000..b86de3662
--- /dev/null
+++ b/tests/verifications/openai/fixtures/fixtures.py
@@ -0,0 +1,97 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import os
+
+import pytest
+from openai import OpenAI
+
+
+@pytest.fixture
+def providers_model_mapping():
+    """
+    Mapping from model names used in test cases to provider's model names.
+    """
+    return {
+        "fireworks": {
+            "Llama-3.3-70B-Instruct": "accounts/fireworks/models/llama-v3p1-70b-instruct",
+            "Llama-3.2-11B-Vision-Instruct": "accounts/fireworks/models/llama-v3p2-11b-vision-instruct",
+            "Llama-4-Scout-17B-16E-Instruct": "accounts/fireworks/models/llama4-scout-instruct-basic",
+            "Llama-4-Maverick-17B-128E-Instruct": "accounts/fireworks/models/llama4-maverick-instruct-basic",
+        },
+        "together": {
+            "Llama-3.3-70B-Instruct": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+            "Llama-3.2-11B-Vision-Instruct": "meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
+            "Llama-4-Scout-17B-16E-Instruct": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+            "Llama-4-Maverick-17B-128E-Instruct": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+        },
+        "groq": {
+            "Llama-3.3-70B-Instruct": "llama-3.3-70b-versatile",
+            "Llama-3.2-11B-Vision-Instruct": "llama-3.2-11b-vision-preview",
+            "Llama-4-Scout-17B-16E-Instruct": "llama-4-scout-17b-16e-instruct",
+            "Llama-4-Maverick-17B-128E-Instruct": "llama-4-maverick-17b-128e-instruct",
+        },
+        "cerebras": {
+            "Llama-3.3-70B-Instruct": "llama-3.3-70b",
+        },
+        "openai": {
+            "gpt-4o": "gpt-4o",
+            "gpt-4o-mini": "gpt-4o-mini",
+        },
+    }
+
+
+@pytest.fixture
+def provider_metadata():
+    return {
+        "fireworks": ("https://api.fireworks.ai/inference/v1", "FIREWORKS_API_KEY"),
+        "together": ("https://api.together.xyz/v1", "TOGETHER_API_KEY"),
+        "groq": ("https://api.groq.com/openai/v1", "GROQ_API_KEY"),
+        "cerebras": ("https://api.cerebras.ai/v1", "CEREBRAS_API_KEY"),
+        "openai": ("https://api.openai.com/v1", "OPENAI_API_KEY"),
+    }
+
+
+@pytest.fixture
+def provider(request, provider_metadata):
+    provider = request.config.getoption("--provider")
+    base_url = request.config.getoption("--base-url")
+
+    if provider and base_url and provider_metadata[provider][0] != base_url:
+        raise ValueError(f"Provider {provider} is not supported for base URL {base_url}")
+
+    if not provider:
+        if not base_url:
+            raise ValueError("Provider and base URL are not provided")
+        for provider, metadata in provider_metadata.items():
+            if metadata[0] == base_url:
+                provider = provider
+                break
+
+    return provider
+
+
+@pytest.fixture
+def base_url(request, provider, provider_metadata):
+    return request.config.getoption("--base-url") or provider_metadata[provider][0]
+
+
+@pytest.fixture
+def api_key(request, provider, provider_metadata):
+    return request.config.getoption("--api-key") or os.getenv(provider_metadata[provider][1])
+
+
+@pytest.fixture
+def model_mapping(provider, providers_model_mapping):
+    return providers_model_mapping[provider]
+
+
+@pytest.fixture
+def openai_client(base_url, api_key):
+    return OpenAI(
+        base_url=base_url,
+        api_key=api_key,
+    )
diff --git a/tests/verifications/openai/fixtures/load.py b/tests/verifications/openai/fixtures/load.py
new file mode 100644
index 000000000..98580b2a1
--- /dev/null
+++ b/tests/verifications/openai/fixtures/load.py
@@ -0,0 +1,16 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from pathlib import Path
+
+import yaml
+
+
+def load_test_cases(name: str):
+    fixture_dir = Path(__file__).parent / "test_cases"
+    yaml_path = fixture_dir / f"{name}.yaml"
+    with open(yaml_path, "r") as f:
+        return yaml.safe_load(f)
diff --git a/tests/verifications/openai/fixtures/test_cases/chat_completion.yaml b/tests/verifications/openai/fixtures/test_cases/chat_completion.yaml
new file mode 100644
index 000000000..2c302a704
--- /dev/null
+++ b/tests/verifications/openai/fixtures/test_cases/chat_completion.yaml
@@ -0,0 +1,162 @@
+test_chat_basic:
+  test_name: test_chat_basic
+  test_params:
+    input_output:
+    - input:
+        messages:
+        - content: Which planet do humans live on?
+          role: user
+      output: Earth
+    - input:
+        messages:
+        - content: Which planet has rings around it with a name starting with letter
+            S?
+          role: user
+      output: Saturn
+    model:
+    - Llama-3.3-8B-Instruct
+    - Llama-3.3-70B-Instruct
+    - Llama-4-Scout-17B-16E
+    - Llama-4-Scout-17B-16E-Instruct
+    - Llama-4-Maverick-17B-128E
+    - Llama-4-Maverick-17B-128E-Instruct
+    - gpt-4o
+    - gpt-4o-mini
+test_chat_image:
+  test_name: test_chat_image
+  test_params:
+    input_output:
+    - input:
+        messages:
+        - content:
+          - text: What is in this image?
+            type: text
+          - image_url:
+              url: https://upload.wikimedia.org/wikipedia/commons/f/f7/Llamas%2C_Vernagt-Stausee%2C_Italy.jpg
+            type: image_url
+          role: user
+      output: llama
+    model:
+    - Llama-4-Scout-17B-16E
+    - Llama-4-Scout-17B-16E-Instruct
+    - Llama-4-Maverick-17B-128E
+    - Llama-4-Maverick-17B-128E-Instruct
+    - gpt-4o
+    - gpt-4o-mini
+test_chat_structured_output:
+  test_name: test_chat_structured_output
+  test_params:
+    input_output:
+    - input:
+        messages:
+        - content: Extract the event information.
+          role: system
+        - content: Alice and Bob are going to a science fair on Friday.
+          role: user
+        response_format:
+          json_schema:
+            name: calendar_event
+            schema:
+              properties:
+                date:
+                  title: Date
+                  type: string
+                name:
+                  title: Name
+                  type: string
+                participants:
+                  items:
+                    type: string
+                  title: Participants
+                  type: array
+              required:
+              - name
+              - date
+              - participants
+              title: CalendarEvent
+              type: object
+          type: json_schema
+      output: valid_calendar_event
+    - input:
+        messages:
+        - content: You are a helpful math tutor. Guide the user through the solution
+            step by step.
+          role: system
+        - content: how can I solve 8x + 7 = -23
+          role: user
+        response_format:
+          json_schema:
+            name: math_reasoning
+            schema:
+              $defs:
+                Step:
+                  properties:
+                    explanation:
+                      title: Explanation
+                      type: string
+                    output:
+                      title: Output
+                      type: string
+                  required:
+                  - explanation
+                  - output
+                  title: Step
+                  type: object
+              properties:
+                final_answer:
+                  title: Final Answer
+                  type: string
+                steps:
+                  items:
+                    $ref: '#/$defs/Step'
+                  title: Steps
+                  type: array
+              required:
+              - steps
+              - final_answer
+              title: MathReasoning
+              type: object
+          type: json_schema
+      output: valid_math_reasoning
+    model:
+    - Llama-3.3-8B-Instruct
+    - Llama-3.3-70B-Instruct
+    - Llama-4-Scout-17B-16E
+    - Llama-4-Scout-17B-16E-Instruct
+    - Llama-4-Maverick-17B-128E
+    - Llama-4-Maverick-17B-128E-Instruct
+    - gpt-4o
+    - gpt-4o-mini
+test_tool_calling:
+  test_name: test_tool_calling
+  test_params:
+    input_output:
+    - input:
+        messages:
+        - content: You are a helpful assistant that can use tools to get information.
+          role: system
+        - content: What's the weather like in San Francisco?
+          role: user
+        tools:
+        - function:
+            description: Get current temperature for a given location.
+            name: get_weather
+            parameters:
+              additionalProperties: false
+              properties:
+                location:
+                  description: "City and country e.g. Bogot\xE1, Colombia"
+                  type: string
+              required:
+              - location
+              type: object
+          type: function
+      output: get_weather_tool_call
+    model:
+    - Llama-3.3-70B-Instruct
+    - Llama-4-Scout-17B-16E
+    - Llama-4-Scout-17B-16E-Instruct
+    - Llama-4-Maverick-17B-128E
+    - Llama-4-Maverick-17B-128E-Instruct
+    - gpt-4o
+    - gpt-4o-mini
diff --git a/tests/verifications/openai/test_chat_completion.py b/tests/verifications/openai/test_chat_completion.py
new file mode 100644
index 000000000..c6a10de7b
--- /dev/null
+++ b/tests/verifications/openai/test_chat_completion.py
@@ -0,0 +1,202 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any
+
+import pytest
+from pydantic import BaseModel
+
+from tests.verifications.openai.fixtures.load import load_test_cases
+
+chat_completion_test_cases = load_test_cases("chat_completion")
+
+
+@pytest.fixture
+def correct_model_name(model, provider, providers_model_mapping):
+    """Return the provider-specific model name based on the generic model name."""
+    mapping = providers_model_mapping[provider]
+    if model not in mapping:
+        pytest.skip(f"Provider {provider} does not support model {model}")
+    return mapping[model]
+
+
+@pytest.mark.parametrize("model", chat_completion_test_cases["test_chat_basic"]["test_params"]["model"])
+@pytest.mark.parametrize(
+    "input_output",
+    chat_completion_test_cases["test_chat_basic"]["test_params"]["input_output"],
+)
+def test_chat_non_streaming_basic(openai_client, input_output, correct_model_name):
+    response = openai_client.chat.completions.create(
+        model=correct_model_name,
+        messages=input_output["input"]["messages"],
+        stream=False,
+    )
+    assert response.choices[0].message.role == "assistant"
+    assert input_output["output"].lower() in response.choices[0].message.content.lower()
+
+
+@pytest.mark.parametrize("model", chat_completion_test_cases["test_chat_basic"]["test_params"]["model"])
+@pytest.mark.parametrize(
+    "input_output",
+    chat_completion_test_cases["test_chat_basic"]["test_params"]["input_output"],
+)
+def test_chat_streaming_basic(openai_client, input_output, correct_model_name):
+    response = openai_client.chat.completions.create(
+        model=correct_model_name,
+        messages=input_output["input"]["messages"],
+        stream=True,
+    )
+    content = ""
+    for chunk in response:
+        content += chunk.choices[0].delta.content or ""
+
+    # TODO: add detailed type validation
+
+    assert input_output["output"].lower() in content.lower()
+
+
+@pytest.mark.parametrize("model", chat_completion_test_cases["test_chat_image"]["test_params"]["model"])
+@pytest.mark.parametrize(
+    "input_output",
+    chat_completion_test_cases["test_chat_image"]["test_params"]["input_output"],
+)
+def test_chat_non_streaming_image(openai_client, input_output, correct_model_name):
+    response = openai_client.chat.completions.create(
+        model=correct_model_name,
+        messages=input_output["input"]["messages"],
+        stream=False,
+    )
+    assert response.choices[0].message.role == "assistant"
+    assert input_output["output"].lower() in response.choices[0].message.content.lower()
+
+
+@pytest.mark.parametrize("model", chat_completion_test_cases["test_chat_image"]["test_params"]["model"])
+@pytest.mark.parametrize(
+    "input_output",
+    chat_completion_test_cases["test_chat_image"]["test_params"]["input_output"],
+)
+def test_chat_streaming_image(openai_client, input_output, correct_model_name):
+    response = openai_client.chat.completions.create(
+        model=correct_model_name,
+        messages=input_output["input"]["messages"],
+        stream=True,
+    )
+    content = ""
+    for chunk in response:
+        content += chunk.choices[0].delta.content or ""
+
+    # TODO: add detailed type validation
+
+    assert input_output["output"].lower() in content.lower()
+
+
+@pytest.mark.parametrize(
+    "model",
+    chat_completion_test_cases["test_chat_structured_output"]["test_params"]["model"],
+)
+@pytest.mark.parametrize(
+    "input_output",
+    chat_completion_test_cases["test_chat_structured_output"]["test_params"]["input_output"],
+)
+def test_chat_non_streaming_structured_output(openai_client, input_output, correct_model_name):
+    response = openai_client.chat.completions.create(
+        model=correct_model_name,
+        messages=input_output["input"]["messages"],
+        response_format=input_output["input"]["response_format"],
+        stream=False,
+    )
+
+    assert response.choices[0].message.role == "assistant"
+    maybe_json_content = response.choices[0].message.content
+
+    validate_structured_output(maybe_json_content, input_output["output"])
+
+
+@pytest.mark.parametrize(
+    "model",
+    chat_completion_test_cases["test_chat_structured_output"]["test_params"]["model"],
+)
+@pytest.mark.parametrize(
+    "input_output",
+    chat_completion_test_cases["test_chat_structured_output"]["test_params"]["input_output"],
+)
+def test_chat_streaming_structured_output(openai_client, input_output, correct_model_name):
+    response = openai_client.chat.completions.create(
+        model=correct_model_name,
+        messages=input_output["input"]["messages"],
+        response_format=input_output["input"]["response_format"],
+        stream=True,
+    )
+    maybe_json_content = ""
+    for chunk in response:
+        maybe_json_content += chunk.choices[0].delta.content or ""
+    validate_structured_output(maybe_json_content, input_output["output"])
+
+
+@pytest.mark.parametrize(
+    "model",
+    chat_completion_test_cases["test_tool_calling"]["test_params"]["model"],
+)
+@pytest.mark.parametrize(
+    "input_output",
+    chat_completion_test_cases["test_tool_calling"]["test_params"]["input_output"],
+)
+def test_chat_non_streaming_tool_calling(openai_client, input_output, correct_model_name):
+    response = openai_client.chat.completions.create(
+        model=correct_model_name,
+        messages=input_output["input"]["messages"],
+        tools=input_output["input"]["tools"],
+        stream=False,
+    )
+
+    assert response.choices[0].message.role == "assistant"
+    assert len(response.choices[0].message.tool_calls) > 0
+    assert input_output["output"] == "get_weather_tool_call"
+    assert response.choices[0].message.tool_calls[0].function.name == "get_weather"
+    # TODO: add detailed type validation
+
+
+def get_structured_output(maybe_json_content: str, schema_name: str) -> Any | None:
+    if schema_name == "valid_calendar_event":
+
+        class CalendarEvent(BaseModel):
+            name: str
+            date: str
+            participants: list[str]
+
+        try:
+            calendar_event = CalendarEvent.model_validate_json(maybe_json_content)
+            return calendar_event
+        except Exception:
+            return None
+    elif schema_name == "valid_math_reasoning":
+
+        class Step(BaseModel):
+            explanation: str
+            output: str
+
+        class MathReasoning(BaseModel):
+            steps: list[Step]
+            final_answer: str
+
+        try:
+            math_reasoning = MathReasoning.model_validate_json(maybe_json_content)
+            return math_reasoning
+        except Exception:
+            return None
+
+    return None
+
+
+def validate_structured_output(maybe_json_content: str, schema_name: str) -> None:
+    structured_output = get_structured_output(maybe_json_content, schema_name)
+    assert structured_output is not None
+    if schema_name == "valid_calendar_event":
+        assert structured_output.name is not None
+        assert structured_output.date is not None
+        assert len(structured_output.participants) == 2
+    elif schema_name == "valid_math_reasoning":
+        assert len(structured_output.final_answer) > 0
diff --git a/tests/verifications/test_results/fireworks_1744154308.json b/tests/verifications/test_results/fireworks_1744154308.json
new file mode 100644
index 000000000..691f6e474
--- /dev/null
+++ b/tests/verifications/test_results/fireworks_1744154308.json
@@ -0,0 +1,2744 @@
+{
+  "created": 1744154399.039055,
+  "duration": 87.73799800872803,
+  "exitcode": 1,
+  "root": "/Users/erichuang/projects/llama-stack",
+  "environment": {},
+  "summary": {
+    "skipped": 52,
+    "passed": 28,
+    "failed": 3,
+    "total": 83,
+    "collected": 83
+  },
+  "collectors": [
+    {
+      "nodeid": "",
+      "outcome": "passed",
+      "result": [
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py",
+          "type": "Module"
+        }
+      ]
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py",
+      "outcome": "passed",
+      "result": [
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-8B-Instruct]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-70B-Instruct]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o-mini]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-8B-Instruct]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-70B-Instruct]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o-mini]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-8B-Instruct]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-70B-Instruct]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o-mini]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-8B-Instruct]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-70B-Instruct]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o-mini]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E]",
+          "type": "Function",
+          "lineno": 60
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+          "type": "Function",
+          "lineno": 60
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E]",
+          "type": "Function",
+          "lineno": 60
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+          "type": "Function",
+          "lineno": 60
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o]",
+          "type": "Function",
+          "lineno": 60
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o-mini]",
+          "type": "Function",
+          "lineno": 60
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E]",
+          "type": "Function",
+          "lineno": 75
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+          "type": "Function",
+          "lineno": 75
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E]",
+          "type": "Function",
+          "lineno": 75
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+          "type": "Function",
+          "lineno": 75
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o]",
+          "type": "Function",
+          "lineno": 75
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o-mini]",
+          "type": "Function",
+          "lineno": 75
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o-mini]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o-mini]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o-mini]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o-mini]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-3.3-70B-Instruct]",
+          "type": "Function",
+          "lineno": 138
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E]",
+          "type": "Function",
+          "lineno": 138
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+          "type": "Function",
+          "lineno": 138
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E]",
+          "type": "Function",
+          "lineno": 138
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+          "type": "Function",
+          "lineno": 138
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o]",
+          "type": "Function",
+          "lineno": 138
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o-mini]",
+          "type": "Function",
+          "lineno": 138
+        }
+      ]
+    }
+  ],
+  "tests": [
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-8B-Instruct]",
+      "lineno": 25,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output0-Llama-3.3-8B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-3.3-8B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.17320987500716,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider fireworks does not support model Llama-3.3-8B-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.000177707988768816,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-70B-Instruct]",
+      "lineno": 25,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output0-Llama-3.3-70B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-3.3-70B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.009193749981932342,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.1473859580000862,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00043337501119822264,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E]",
+      "lineno": 25,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Scout-17B-16E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.01645291701424867,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider fireworks does not support model Llama-4-Scout-17B-16E')"
+      },
+      "teardown": {
+        "duration": 0.0002898749662563205,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+      "lineno": 25,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Scout-17B-16E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.01562033302616328,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.8782661251025274,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002795408945530653,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]",
+      "lineno": 25,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Maverick-17B-128E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.008571124984882772,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider fireworks does not support model Llama-4-Maverick-17B-128E')"
+      },
+      "teardown": {
+        "duration": 0.0003043749602511525,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+      "lineno": 25,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Maverick-17B-128E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.00842841702979058,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.3863223339430988,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0009970410028472543,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o]",
+      "lineno": 25,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output0-gpt-4o]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-gpt-4o",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.007089875056408346,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider fireworks does not support model gpt-4o')"
+      },
+      "teardown": {
+        "duration": 0.00017958390526473522,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o-mini]",
+      "lineno": 25,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output0-gpt-4o-mini]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-gpt-4o-mini",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.005809499998576939,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider fireworks does not support model gpt-4o-mini')"
+      },
+      "teardown": {
+        "duration": 0.00016495899762958288,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-8B-Instruct]",
+      "lineno": 25,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output1-Llama-3.3-8B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-3.3-8B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.0119722920935601,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider fireworks does not support model Llama-3.3-8B-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.00016962504014372826,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-70B-Instruct]",
+      "lineno": 25,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output1-Llama-3.3-70B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-3.3-70B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.005716291954740882,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.6822018750244752,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0005292498972266912,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E]",
+      "lineno": 25,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Scout-17B-16E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.025827708072029054,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider fireworks does not support model Llama-4-Scout-17B-16E')"
+      },
+      "teardown": {
+        "duration": 0.000295999925583601,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]",
+      "lineno": 25,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Scout-17B-16E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.010980832972563803,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.7537062909686938,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0008091670460999012,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]",
+      "lineno": 25,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Maverick-17B-128E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.006567832897417247,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider fireworks does not support model Llama-4-Maverick-17B-128E')"
+      },
+      "teardown": {
+        "duration": 0.0001545000122860074,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]",
+      "lineno": 25,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Maverick-17B-128E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.005985083989799023,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.7263387079583481,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0006324589485302567,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o]",
+      "lineno": 25,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output1-gpt-4o]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-gpt-4o",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.0171962499152869,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider fireworks does not support model gpt-4o')"
+      },
+      "teardown": {
+        "duration": 0.000780042028054595,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o-mini]",
+      "lineno": 25,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output1-gpt-4o-mini]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-gpt-4o-mini",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.01365620899014175,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider fireworks does not support model gpt-4o-mini')"
+      },
+      "teardown": {
+        "duration": 0.00016758404672145844,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-8B-Instruct]",
+      "lineno": 40,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_basic[input_output0-Llama-3.3-8B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-3.3-8B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.0064070840599015355,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider fireworks does not support model Llama-3.3-8B-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.0002031669719144702,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-70B-Instruct]",
+      "lineno": 40,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_basic[input_output0-Llama-3.3-70B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-3.3-70B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.010951624950394034,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.5433399169705808,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0013178749941289425,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E]",
+      "lineno": 40,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Scout-17B-16E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.022056750021874905,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider fireworks does not support model Llama-4-Scout-17B-16E')"
+      },
+      "teardown": {
+        "duration": 0.0006570409750565886,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+      "lineno": 40,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Scout-17B-16E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.008314333041198552,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.7779882500180975,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0006799160037189722,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]",
+      "lineno": 40,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Maverick-17B-128E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.03601404093205929,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider fireworks does not support model Llama-4-Maverick-17B-128E')"
+      },
+      "teardown": {
+        "duration": 0.000610582996159792,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+      "lineno": 40,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Maverick-17B-128E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.014321292052045465,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.0243758750148118,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0010485410457476974,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o]",
+      "lineno": 40,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_basic[input_output0-gpt-4o]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-gpt-4o",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.021133000031113625,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider fireworks does not support model gpt-4o')"
+      },
+      "teardown": {
+        "duration": 0.0005400830414146185,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o-mini]",
+      "lineno": 40,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_basic[input_output0-gpt-4o-mini]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-gpt-4o-mini",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.007212458993308246,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider fireworks does not support model gpt-4o-mini')"
+      },
+      "teardown": {
+        "duration": 0.00026770797558128834,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-8B-Instruct]",
+      "lineno": 40,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_basic[input_output1-Llama-3.3-8B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-3.3-8B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.012334750033915043,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider fireworks does not support model Llama-3.3-8B-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.00042683398351073265,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-70B-Instruct]",
+      "lineno": 40,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_basic[input_output1-Llama-3.3-70B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-3.3-70B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.011477917083539069,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.670572166913189,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0005759169580414891,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E]",
+      "lineno": 40,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Scout-17B-16E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.024620208074338734,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider fireworks does not support model Llama-4-Scout-17B-16E')"
+      },
+      "teardown": {
+        "duration": 0.0005166250048205256,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]",
+      "lineno": 40,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Scout-17B-16E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.008708957931958139,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.6654335829662159,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002927089808508754,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]",
+      "lineno": 40,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Maverick-17B-128E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.018128167022950947,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider fireworks does not support model Llama-4-Maverick-17B-128E')"
+      },
+      "teardown": {
+        "duration": 0.0001929170684888959,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]",
+      "lineno": 40,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Maverick-17B-128E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.0063874589977785945,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.8047525839647278,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00039245898369699717,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o]",
+      "lineno": 40,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_basic[input_output1-gpt-4o]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-gpt-4o",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.01366533397231251,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider fireworks does not support model gpt-4o')"
+      },
+      "teardown": {
+        "duration": 0.00028241705149412155,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o-mini]",
+      "lineno": 40,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_basic[input_output1-gpt-4o-mini]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-gpt-4o-mini",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.010844790958799422,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider fireworks does not support model gpt-4o-mini')"
+      },
+      "teardown": {
+        "duration": 0.000258082989603281,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E]",
+      "lineno": 60,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Scout-17B-16E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.00936354196164757,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider fireworks does not support model Llama-4-Scout-17B-16E')"
+      },
+      "teardown": {
+        "duration": 0.00020533299539238214,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+      "lineno": 60,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Scout-17B-16E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.008578249951824546,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 2.6288582499837503,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0006052498938515782,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E]",
+      "lineno": 60,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Maverick-17B-128E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.02061279199551791,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider fireworks does not support model Llama-4-Maverick-17B-128E')"
+      },
+      "teardown": {
+        "duration": 0.00029320805333554745,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+      "lineno": 60,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Maverick-17B-128E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.00995812495239079,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 3.0904540000483394,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0003214169992133975,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o]",
+      "lineno": 60,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_image[input_output0-gpt-4o]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-gpt-4o",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.0261635419446975,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider fireworks does not support model gpt-4o')"
+      },
+      "teardown": {
+        "duration": 0.00032716698478907347,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o-mini]",
+      "lineno": 60,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_image[input_output0-gpt-4o-mini]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-gpt-4o-mini",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.027220541960559785,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider fireworks does not support model gpt-4o-mini')"
+      },
+      "teardown": {
+        "duration": 0.0003192499279975891,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E]",
+      "lineno": 75,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Scout-17B-16E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.010883458075113595,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider fireworks does not support model Llama-4-Scout-17B-16E')"
+      },
+      "teardown": {
+        "duration": 0.0002687909873202443,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+      "lineno": 75,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Scout-17B-16E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.0171177500160411,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.6752691670553759,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0004877089522778988,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E]",
+      "lineno": 75,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Maverick-17B-128E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.011608208995312452,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider fireworks does not support model Llama-4-Maverick-17B-128E')"
+      },
+      "teardown": {
+        "duration": 0.00017137499526143074,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+      "lineno": 75,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Maverick-17B-128E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.009284624946303666,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 3.537356249988079,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0005068340105935931,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o]",
+      "lineno": 75,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_image[input_output0-gpt-4o]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-gpt-4o",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.016660499968566,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider fireworks does not support model gpt-4o')"
+      },
+      "teardown": {
+        "duration": 0.00029341597110033035,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o-mini]",
+      "lineno": 75,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_image[input_output0-gpt-4o-mini]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-gpt-4o-mini",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.01374066702555865,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider fireworks does not support model gpt-4o-mini')"
+      },
+      "teardown": {
+        "duration": 0.0002625000197440386,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]",
+      "lineno": 95,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-3.3-8B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.013120374991558492,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider fireworks does not support model Llama-3.3-8B-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.00021954195108264685,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]",
+      "lineno": 95,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-3.3-70B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.015080374898388982,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.157175041968003,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.000495875021442771,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]",
+      "lineno": 95,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Scout-17B-16E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.013946042046882212,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider fireworks does not support model Llama-4-Scout-17B-16E')"
+      },
+      "teardown": {
+        "duration": 0.0002954580122604966,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+      "lineno": 95,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Scout-17B-16E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.011617792071774602,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.9537639999762177,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0004819999448955059,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]",
+      "lineno": 95,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Maverick-17B-128E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.027436082949861884,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider fireworks does not support model Llama-4-Maverick-17B-128E')"
+      },
+      "teardown": {
+        "duration": 0.00030274991877377033,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+      "lineno": 95,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Maverick-17B-128E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.016110333963297307,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.8493227910948917,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0004883749643340707,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o]",
+      "lineno": 95,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output0-gpt-4o]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-gpt-4o",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.017850833013653755,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider fireworks does not support model gpt-4o')"
+      },
+      "teardown": {
+        "duration": 0.0003287500003352761,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o-mini]",
+      "lineno": 95,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output0-gpt-4o-mini]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-gpt-4o-mini",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.012523208046332002,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider fireworks does not support model gpt-4o-mini')"
+      },
+      "teardown": {
+        "duration": 0.00023500004317611456,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]",
+      "lineno": 95,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-3.3-8B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.007516667013987899,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider fireworks does not support model Llama-3.3-8B-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.00018912507221102715,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]",
+      "lineno": 95,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-3.3-70B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.007337165996432304,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 3.124099582899362,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0006703329272568226,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]",
+      "lineno": 95,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Scout-17B-16E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.014259999967180192,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider fireworks does not support model Llama-4-Scout-17B-16E')"
+      },
+      "teardown": {
+        "duration": 0.00030262500513345003,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]",
+      "lineno": 95,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Scout-17B-16E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.010863124975003302,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.3330956250429153,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00018679199274629354,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]",
+      "lineno": 95,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Maverick-17B-128E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.005797958001494408,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider fireworks does not support model Llama-4-Maverick-17B-128E')"
+      },
+      "teardown": {
+        "duration": 0.00017529097385704517,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]",
+      "lineno": 95,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Maverick-17B-128E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.005647709011100233,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 3.2295467499643564,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0005654999986290932,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o]",
+      "lineno": 95,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output1-gpt-4o]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-gpt-4o",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.007151791942305863,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider fireworks does not support model gpt-4o')"
+      },
+      "teardown": {
+        "duration": 0.00015316694043576717,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o-mini]",
+      "lineno": 95,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output1-gpt-4o-mini]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-gpt-4o-mini",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.006435790914110839,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider fireworks does not support model gpt-4o-mini')"
+      },
+      "teardown": {
+        "duration": 0.00015954102855175734,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]",
+      "lineno": 117,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-3.3-8B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.006164791993796825,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider fireworks does not support model Llama-3.3-8B-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.00014074996579438448,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]",
+      "lineno": 117,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-3.3-70B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.010064583038911223,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.1676458748988807,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002513329964131117,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]",
+      "lineno": 117,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Scout-17B-16E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.011011417023837566,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider fireworks does not support model Llama-4-Scout-17B-16E')"
+      },
+      "teardown": {
+        "duration": 0.00020608294289559126,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+      "lineno": 117,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Scout-17B-16E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.011654542060568929,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.7950789160095155,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0002690000692382455,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]",
+      "lineno": 117,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Maverick-17B-128E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.0066834589233621955,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider fireworks does not support model Llama-4-Maverick-17B-128E')"
+      },
+      "teardown": {
+        "duration": 0.00017270795069634914,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+      "lineno": 117,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Maverick-17B-128E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.011390416999347508,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.7844940840732306,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.000511458027176559,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o]",
+      "lineno": 117,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output0-gpt-4o]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-gpt-4o",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.005813500029034913,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider fireworks does not support model gpt-4o')"
+      },
+      "teardown": {
+        "duration": 0.00015495799016207457,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o-mini]",
+      "lineno": 117,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output0-gpt-4o-mini]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-gpt-4o-mini",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.0075639160349965096,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider fireworks does not support model gpt-4o-mini')"
+      },
+      "teardown": {
+        "duration": 0.00014358304906636477,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]",
+      "lineno": 117,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-3.3-8B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.008526541059836745,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider fireworks does not support model Llama-3.3-8B-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.00015841599088162184,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]",
+      "lineno": 117,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-3.3-70B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.007805416011251509,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 13.25898533302825,
+        "outcome": "failed",
+        "crash": {
+          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py",
+          "lineno": 196,
+          "message": "assert None is not None"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai/test_chat_completion.py",
+            "lineno": 136,
+            "message": ""
+          },
+          {
+            "path": "tests/verifications/openai/test_chat_completion.py",
+            "lineno": 196,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "openai_client = <openai.OpenAI object at 0x105cc8e50>\ninput_output = {'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solution step by step.',... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\ncorrect_model_name = 'accounts/fireworks/models/llama-v3p1-70b-instruct'\n\n    @pytest.mark.parametrize(\n        \"model\",\n        chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"model\"],\n    )\n    @pytest.mark.parametrize(\n        \"input_output\",\n        chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"input_output\"],\n    )\n    def test_chat_streaming_structured_output(openai_client, input_output, correct_model_name):\n        response = openai_client.chat.completions.create(\n            model=correct_model_name,\n            messages=input_output[\"input\"][\"messages\"],\n            response_format=input_output[\"input\"][\"response_format\"],\n            stream=True,\n        )\n        maybe_json_content = \"\"\n        for chunk in response:\n            maybe_json_content += chunk.choices[0].delta.content or \"\"\n>       validate_structured_output(maybe_json_content, input_output[\"output\"])\n\ntests/verifications/openai/test_chat_completion.py:136: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nmaybe_json_content = '{ \"final_answer\": \"}To solve the equation 8x + 7 = -23, we need to isolate the variable x. We can do this by followin...tassistantassistantassistantassistantassistantassistantassistantassistantassistantassistantassistantassistantassistant'\nschema_name = 'valid_math_reasoning'\n\n    def validate_structured_output(maybe_json_content: str, schema_name: str) -> None:\n        structured_output = get_structured_output(maybe_json_content, schema_name)\n>       assert structured_output is not None\nE       assert None is not None\n\ntests/verifications/openai/test_chat_completion.py:196: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.00022583396639674902,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]",
+      "lineno": 117,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Scout-17B-16E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.006412541959434748,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider fireworks does not support model Llama-4-Scout-17B-16E')"
+      },
+      "teardown": {
+        "duration": 0.0001449589617550373,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]",
+      "lineno": 117,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Scout-17B-16E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.010353000019676983,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 4.559281209018081,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00021179206669330597,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]",
+      "lineno": 117,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Maverick-17B-128E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.011320417048409581,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider fireworks does not support model Llama-4-Maverick-17B-128E')"
+      },
+      "teardown": {
+        "duration": 0.0001623749267309904,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]",
+      "lineno": 117,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Maverick-17B-128E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.005637791007757187,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 2.9282109580235556,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00019149994477629662,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o]",
+      "lineno": 117,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output1-gpt-4o]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-gpt-4o",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.021475916961207986,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider fireworks does not support model gpt-4o')"
+      },
+      "teardown": {
+        "duration": 0.0002605828922241926,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o-mini]",
+      "lineno": 117,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output1-gpt-4o-mini]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-gpt-4o-mini",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.012046082993037999,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider fireworks does not support model gpt-4o-mini')"
+      },
+      "teardown": {
+        "duration": 0.00016966694965958595,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-3.3-70B-Instruct]",
+      "lineno": 138,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_tool_calling[input_output0-Llama-3.3-70B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-3.3-70B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.00782629195600748,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.9290615000063553,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0004110001027584076,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E]",
+      "lineno": 138,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Scout-17B-16E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.00842183397617191,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider fireworks does not support model Llama-4-Scout-17B-16E')"
+      },
+      "teardown": {
+        "duration": 0.00023745803628116846,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+      "lineno": 138,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Scout-17B-16E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.010762874968349934,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 23.62101216695737,
+        "outcome": "failed",
+        "crash": {
+          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py",
+          "lineno": 156,
+          "message": "TypeError: object of type 'NoneType' has no len()"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai/test_chat_completion.py",
+            "lineno": 156,
+            "message": "TypeError"
+          }
+        ],
+        "longrepr": "openai_client = <openai.OpenAI object at 0x105d11f90>\ninput_output = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\ncorrect_model_name = 'accounts/fireworks/models/llama4-scout-instruct-basic'\n\n    @pytest.mark.parametrize(\n        \"model\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"model\"],\n    )\n    @pytest.mark.parametrize(\n        \"input_output\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"input_output\"],\n    )\n    def test_chat_non_streaming_tool_calling(openai_client, input_output, correct_model_name):\n        response = openai_client.chat.completions.create(\n            model=correct_model_name,\n            messages=input_output[\"input\"][\"messages\"],\n            tools=input_output[\"input\"][\"tools\"],\n            stream=False,\n        )\n    \n        assert response.choices[0].message.role == \"assistant\"\n>       assert len(response.choices[0].message.tool_calls) > 0\nE       TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai/test_chat_completion.py:156: TypeError"
+      },
+      "teardown": {
+        "duration": 0.0004520840011537075,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E]",
+      "lineno": 138,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Maverick-17B-128E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.00953104195650667,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider fireworks does not support model Llama-4-Maverick-17B-128E')"
+      },
+      "teardown": {
+        "duration": 0.00017912499606609344,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+      "lineno": 138,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Maverick-17B-128E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.010302042006514966,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 5.55651158397086,
+        "outcome": "failed",
+        "crash": {
+          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py",
+          "lineno": 156,
+          "message": "TypeError: object of type 'NoneType' has no len()"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai/test_chat_completion.py",
+            "lineno": 156,
+            "message": "TypeError"
+          }
+        ],
+        "longrepr": "openai_client = <openai.OpenAI object at 0x1062017b0>\ninput_output = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\ncorrect_model_name = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\n\n    @pytest.mark.parametrize(\n        \"model\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"model\"],\n    )\n    @pytest.mark.parametrize(\n        \"input_output\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"input_output\"],\n    )\n    def test_chat_non_streaming_tool_calling(openai_client, input_output, correct_model_name):\n        response = openai_client.chat.completions.create(\n            model=correct_model_name,\n            messages=input_output[\"input\"][\"messages\"],\n            tools=input_output[\"input\"][\"tools\"],\n            stream=False,\n        )\n    \n        assert response.choices[0].message.role == \"assistant\"\n>       assert len(response.choices[0].message.tool_calls) > 0\nE       TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai/test_chat_completion.py:156: TypeError"
+      },
+      "teardown": {
+        "duration": 0.0003929579397663474,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o]",
+      "lineno": 138,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_tool_calling[input_output0-gpt-4o]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-gpt-4o",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.01593891705852002,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider fireworks does not support model gpt-4o')"
+      },
+      "teardown": {
+        "duration": 0.0003579579060897231,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o-mini]",
+      "lineno": 138,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_tool_calling[input_output0-gpt-4o-mini]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-gpt-4o-mini",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.01874550001230091,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider fireworks does not support model gpt-4o-mini')"
+      },
+      "teardown": {
+        "duration": 0.00031995808240026236,
+        "outcome": "passed"
+      }
+    }
+  ]
+}
diff --git a/tests/verifications/test_results/openai_1744154522.json b/tests/verifications/test_results/openai_1744154522.json
new file mode 100644
index 000000000..310f3500d
--- /dev/null
+++ b/tests/verifications/test_results/openai_1744154522.json
@@ -0,0 +1,2672 @@
+{
+  "created": 1744154576.251519,
+  "duration": 51.50739002227783,
+  "exitcode": 0,
+  "root": "/Users/erichuang/projects/llama-stack",
+  "environment": {},
+  "summary": {
+    "skipped": 61,
+    "passed": 22,
+    "total": 83,
+    "collected": 83
+  },
+  "collectors": [
+    {
+      "nodeid": "",
+      "outcome": "passed",
+      "result": [
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py",
+          "type": "Module"
+        }
+      ]
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py",
+      "outcome": "passed",
+      "result": [
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-8B-Instruct]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-70B-Instruct]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o-mini]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-8B-Instruct]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-70B-Instruct]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o-mini]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-8B-Instruct]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-70B-Instruct]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o-mini]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-8B-Instruct]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-70B-Instruct]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o-mini]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E]",
+          "type": "Function",
+          "lineno": 60
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+          "type": "Function",
+          "lineno": 60
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E]",
+          "type": "Function",
+          "lineno": 60
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+          "type": "Function",
+          "lineno": 60
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o]",
+          "type": "Function",
+          "lineno": 60
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o-mini]",
+          "type": "Function",
+          "lineno": 60
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E]",
+          "type": "Function",
+          "lineno": 75
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+          "type": "Function",
+          "lineno": 75
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E]",
+          "type": "Function",
+          "lineno": 75
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+          "type": "Function",
+          "lineno": 75
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o]",
+          "type": "Function",
+          "lineno": 75
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o-mini]",
+          "type": "Function",
+          "lineno": 75
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o-mini]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o-mini]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o-mini]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o-mini]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-3.3-70B-Instruct]",
+          "type": "Function",
+          "lineno": 138
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E]",
+          "type": "Function",
+          "lineno": 138
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+          "type": "Function",
+          "lineno": 138
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E]",
+          "type": "Function",
+          "lineno": 138
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+          "type": "Function",
+          "lineno": 138
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o]",
+          "type": "Function",
+          "lineno": 138
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o-mini]",
+          "type": "Function",
+          "lineno": 138
+        }
+      ]
+    }
+  ],
+  "tests": [
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-8B-Instruct]",
+      "lineno": 25,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output0-Llama-3.3-8B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-3.3-8B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.0531630830373615,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-3.3-8B-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.0001657919492572546,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-70B-Instruct]",
+      "lineno": 25,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output0-Llama-3.3-70B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-3.3-70B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.006063499953597784,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-3.3-70B-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.00014004099648445845,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E]",
+      "lineno": 25,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Scout-17B-16E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.005356832989491522,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E')"
+      },
+      "teardown": {
+        "duration": 0.00016508297994732857,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+      "lineno": 25,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Scout-17B-16E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.006139832898043096,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.00014450005255639553,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]",
+      "lineno": 25,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Maverick-17B-128E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.00542324990965426,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E')"
+      },
+      "teardown": {
+        "duration": 0.00014112505596131086,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+      "lineno": 25,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Maverick-17B-128E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.004965625004842877,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.00013720791321247816,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o]",
+      "lineno": 25,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output0-gpt-4o]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-gpt-4o",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.005054084002040327,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.6271341659594327,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00043925002682954073,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o-mini]",
+      "lineno": 25,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output0-gpt-4o-mini]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-gpt-4o-mini",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.0159178749890998,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.44088316697161645,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0006467089988291264,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-8B-Instruct]",
+      "lineno": 25,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output1-Llama-3.3-8B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-3.3-8B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.016705541987903416,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-3.3-8B-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.0005769169656559825,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-70B-Instruct]",
+      "lineno": 25,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output1-Llama-3.3-70B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-3.3-70B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.012067249976098537,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-3.3-70B-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.00016683305148035288,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E]",
+      "lineno": 25,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Scout-17B-16E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.009295083000324667,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E')"
+      },
+      "teardown": {
+        "duration": 0.00017204193864017725,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]",
+      "lineno": 25,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Scout-17B-16E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.009534333017654717,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.00020175008103251457,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]",
+      "lineno": 25,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Maverick-17B-128E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.006628665956668556,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E')"
+      },
+      "teardown": {
+        "duration": 0.0003687090938910842,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]",
+      "lineno": 25,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Maverick-17B-128E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.0061322919791564345,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.0003664169926196337,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o]",
+      "lineno": 25,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output1-gpt-4o]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-gpt-4o",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.00623433303553611,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.7898445830214769,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0006602079374715686,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o-mini]",
+      "lineno": 25,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output1-gpt-4o-mini]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-gpt-4o-mini",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.014758958015590906,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.1555478329537436,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0011781250359490514,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-8B-Instruct]",
+      "lineno": 40,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_basic[input_output0-Llama-3.3-8B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-3.3-8B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.03454475000035018,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-3.3-8B-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.000967124942690134,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-70B-Instruct]",
+      "lineno": 40,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_basic[input_output0-Llama-3.3-70B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-3.3-70B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.025206666090525687,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-3.3-70B-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.000189624959602952,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E]",
+      "lineno": 40,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Scout-17B-16E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.014331333106383681,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E')"
+      },
+      "teardown": {
+        "duration": 0.00023133307695388794,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+      "lineno": 40,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Scout-17B-16E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.009339665994048119,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.00020329200197011232,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]",
+      "lineno": 40,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Maverick-17B-128E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.010387042071670294,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E')"
+      },
+      "teardown": {
+        "duration": 0.00018254201859235764,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+      "lineno": 40,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Maverick-17B-128E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.012297999928705394,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.00018662505317479372,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o]",
+      "lineno": 40,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_basic[input_output0-gpt-4o]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-gpt-4o",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.006984042003750801,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.32529433304443955,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0033042499562725425,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o-mini]",
+      "lineno": 40,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_basic[input_output0-gpt-4o-mini]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-gpt-4o-mini",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.01832079200539738,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.48440287495031953,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00047233293298631907,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-8B-Instruct]",
+      "lineno": 40,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_basic[input_output1-Llama-3.3-8B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-3.3-8B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.02893691696226597,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-3.3-8B-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.0001747499918565154,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-70B-Instruct]",
+      "lineno": 40,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_basic[input_output1-Llama-3.3-70B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-3.3-70B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.006553041050210595,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-3.3-70B-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.00016829196829348803,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E]",
+      "lineno": 40,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Scout-17B-16E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.013746666954830289,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E')"
+      },
+      "teardown": {
+        "duration": 0.00019237503875046968,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]",
+      "lineno": 40,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Scout-17B-16E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.007175332983024418,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.0001873329747468233,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]",
+      "lineno": 40,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Maverick-17B-128E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.006127291941083968,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E')"
+      },
+      "teardown": {
+        "duration": 0.00019004102796316147,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]",
+      "lineno": 40,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Maverick-17B-128E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.006421791040338576,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.0001611249754205346,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o]",
+      "lineno": 40,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_basic[input_output1-gpt-4o]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-gpt-4o",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.009806249989196658,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.9556747920578346,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0004937920020893216,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o-mini]",
+      "lineno": 40,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_basic[input_output1-gpt-4o-mini]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-gpt-4o-mini",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.03146500000730157,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.082494750036858,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0006242080125957727,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E]",
+      "lineno": 60,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Scout-17B-16E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.021534667001105845,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E')"
+      },
+      "teardown": {
+        "duration": 0.0003469999646767974,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+      "lineno": 60,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Scout-17B-16E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.025929750059731305,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.0008774169255048037,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E]",
+      "lineno": 60,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Maverick-17B-128E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.012507125036790967,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E')"
+      },
+      "teardown": {
+        "duration": 0.00022008304949849844,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+      "lineno": 60,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Maverick-17B-128E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.008156375028192997,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.0002079169498756528,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o]",
+      "lineno": 60,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_image[input_output0-gpt-4o]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-gpt-4o",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.012587749981321394,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 2.7379885419504717,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00044579198583960533,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o-mini]",
+      "lineno": 60,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_image[input_output0-gpt-4o-mini]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-gpt-4o-mini",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.017111250082962215,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 2.599374584038742,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0009177909232676029,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E]",
+      "lineno": 75,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Scout-17B-16E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.02198700001463294,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E')"
+      },
+      "teardown": {
+        "duration": 0.00042749999556690454,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+      "lineno": 75,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Scout-17B-16E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.015032917028293014,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.00041016703471541405,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E]",
+      "lineno": 75,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Maverick-17B-128E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.013976250076666474,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E')"
+      },
+      "teardown": {
+        "duration": 0.00027600000612437725,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+      "lineno": 75,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Maverick-17B-128E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.00799729092977941,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.00020320899784564972,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o]",
+      "lineno": 75,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_image[input_output0-gpt-4o]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-gpt-4o",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.010483540943823755,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 4.249965250026435,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0008596250554546714,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o-mini]",
+      "lineno": 75,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_image[input_output0-gpt-4o-mini]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-gpt-4o-mini",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.018141582957468927,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 2.297856790944934,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0005075830267742276,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]",
+      "lineno": 95,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-3.3-8B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.017144332989118993,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-3.3-8B-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.0006829580524936318,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]",
+      "lineno": 95,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-3.3-70B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.009827250032685697,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-3.3-70B-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.00024204188957810402,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]",
+      "lineno": 95,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Scout-17B-16E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.006737958989106119,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E')"
+      },
+      "teardown": {
+        "duration": 0.00022729102056473494,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+      "lineno": 95,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Scout-17B-16E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.006030917051248252,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.00022229203023016453,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]",
+      "lineno": 95,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Maverick-17B-128E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.009183833957649767,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E')"
+      },
+      "teardown": {
+        "duration": 0.00022629194427281618,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+      "lineno": 95,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Maverick-17B-128E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.007097500027157366,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.00826825003605336,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o]",
+      "lineno": 95,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output0-gpt-4o]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-gpt-4o",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.006604874972254038,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.4057738750707358,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.000506040989421308,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o-mini]",
+      "lineno": 95,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output0-gpt-4o-mini]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-gpt-4o-mini",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.015966624952852726,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.540478374925442,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0009536249563097954,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]",
+      "lineno": 95,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-3.3-8B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.020631707971915603,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-3.3-8B-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.0004928340204060078,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]",
+      "lineno": 95,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-3.3-70B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.016745459055528045,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-3.3-70B-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.0003412909572944045,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]",
+      "lineno": 95,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Scout-17B-16E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.012252667103894055,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E')"
+      },
+      "teardown": {
+        "duration": 0.00028650008607655764,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]",
+      "lineno": 95,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Scout-17B-16E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.01128904102370143,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.00027041707653552294,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]",
+      "lineno": 95,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Maverick-17B-128E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.009191332967020571,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E')"
+      },
+      "teardown": {
+        "duration": 0.0002074999501928687,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]",
+      "lineno": 95,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Maverick-17B-128E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.007687666919082403,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.0002027079463005066,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o]",
+      "lineno": 95,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output1-gpt-4o]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-gpt-4o",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.007542708073742688,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 4.244797708000988,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0012778330128639936,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o-mini]",
+      "lineno": 95,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output1-gpt-4o-mini]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-gpt-4o-mini",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.026919999974779785,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 9.006108874920756,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00046324997674673796,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]",
+      "lineno": 117,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-3.3-8B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.01554666692391038,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-3.3-8B-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.0004023330984637141,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]",
+      "lineno": 117,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-3.3-70B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.007354958914220333,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-3.3-70B-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.0002900830004364252,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]",
+      "lineno": 117,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Scout-17B-16E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.017274250043556094,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E')"
+      },
+      "teardown": {
+        "duration": 0.0002668329980224371,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+      "lineno": 117,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Scout-17B-16E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.006813667016103864,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.00024500000290572643,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]",
+      "lineno": 117,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Maverick-17B-128E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.007385291974060237,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E')"
+      },
+      "teardown": {
+        "duration": 0.00017024995759129524,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+      "lineno": 117,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Maverick-17B-128E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.00857366609852761,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.00016850000247359276,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o]",
+      "lineno": 117,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output0-gpt-4o]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-gpt-4o",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.005570041947066784,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.8564215000951663,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0004029169213026762,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o-mini]",
+      "lineno": 117,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output0-gpt-4o-mini]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-gpt-4o-mini",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.00786762498319149,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.6419672920601442,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0005102079594507813,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]",
+      "lineno": 117,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-3.3-8B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.017147499951533973,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-3.3-8B-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.00032350001856684685,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]",
+      "lineno": 117,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-3.3-70B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.01194737502373755,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-3.3-70B-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.0005004579434171319,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]",
+      "lineno": 117,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Scout-17B-16E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.010250666993670166,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E')"
+      },
+      "teardown": {
+        "duration": 0.00022554199676960707,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]",
+      "lineno": 117,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Scout-17B-16E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.007847042055800557,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.000283458037301898,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]",
+      "lineno": 117,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Maverick-17B-128E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.008078000042587519,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E')"
+      },
+      "teardown": {
+        "duration": 0.0001794169656932354,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]",
+      "lineno": 117,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Maverick-17B-128E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.007204750087112188,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.00017725001089274883,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o]",
+      "lineno": 117,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output1-gpt-4o]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-gpt-4o",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.006797667010687292,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 5.411579457926564,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.001134666963480413,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o-mini]",
+      "lineno": 117,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output1-gpt-4o-mini]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-gpt-4o-mini",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.025059624924324453,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 9.112342999898829,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0009202499641105533,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-3.3-70B-Instruct]",
+      "lineno": 138,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_tool_calling[input_output0-Llama-3.3-70B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-3.3-70B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.024287916952744126,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider openai does not support model Llama-3.3-70B-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.00015587499365210533,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E]",
+      "lineno": 138,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Scout-17B-16E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.006531457998789847,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E')"
+      },
+      "teardown": {
+        "duration": 0.00014670798555016518,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+      "lineno": 138,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Scout-17B-16E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.006190375075675547,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.0001603750279173255,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E]",
+      "lineno": 138,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Maverick-17B-128E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.005670750048011541,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E')"
+      },
+      "teardown": {
+        "duration": 0.0001479999627918005,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+      "lineno": 138,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Maverick-17B-128E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.005662833107635379,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.0001480829669162631,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o]",
+      "lineno": 138,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_tool_calling[input_output0-gpt-4o]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-gpt-4o",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.00573637499473989,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.6269576249178499,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0010142088867723942,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o-mini]",
+      "lineno": 138,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_tool_calling[input_output0-gpt-4o-mini]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-gpt-4o-mini",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.01623620803002268,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.7144521250156686,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0011040839599445462,
+        "outcome": "passed"
+      }
+    }
+  ]
+}
diff --git a/tests/verifications/test_results/together_1744154399.json b/tests/verifications/test_results/together_1744154399.json
new file mode 100644
index 000000000..ae801e83b
--- /dev/null
+++ b/tests/verifications/test_results/together_1744154399.json
@@ -0,0 +1,2830 @@
+{
+  "created": 1744154470.9868789,
+  "duration": 59.6187219619751,
+  "exitcode": 1,
+  "root": "/Users/erichuang/projects/llama-stack",
+  "environment": {},
+  "summary": {
+    "skipped": 52,
+    "passed": 21,
+    "failed": 10,
+    "total": 83,
+    "collected": 83
+  },
+  "collectors": [
+    {
+      "nodeid": "",
+      "outcome": "passed",
+      "result": [
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py",
+          "type": "Module"
+        }
+      ]
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py",
+      "outcome": "passed",
+      "result": [
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-8B-Instruct]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-70B-Instruct]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o-mini]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-8B-Instruct]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-70B-Instruct]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o-mini]",
+          "type": "Function",
+          "lineno": 25
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-8B-Instruct]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-70B-Instruct]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o-mini]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-8B-Instruct]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-70B-Instruct]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o-mini]",
+          "type": "Function",
+          "lineno": 40
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E]",
+          "type": "Function",
+          "lineno": 60
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+          "type": "Function",
+          "lineno": 60
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E]",
+          "type": "Function",
+          "lineno": 60
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+          "type": "Function",
+          "lineno": 60
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o]",
+          "type": "Function",
+          "lineno": 60
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o-mini]",
+          "type": "Function",
+          "lineno": 60
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E]",
+          "type": "Function",
+          "lineno": 75
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+          "type": "Function",
+          "lineno": 75
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E]",
+          "type": "Function",
+          "lineno": 75
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+          "type": "Function",
+          "lineno": 75
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o]",
+          "type": "Function",
+          "lineno": 75
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o-mini]",
+          "type": "Function",
+          "lineno": 75
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o-mini]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o-mini]",
+          "type": "Function",
+          "lineno": 95
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o-mini]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o-mini]",
+          "type": "Function",
+          "lineno": 117
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-3.3-70B-Instruct]",
+          "type": "Function",
+          "lineno": 138
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E]",
+          "type": "Function",
+          "lineno": 138
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+          "type": "Function",
+          "lineno": 138
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E]",
+          "type": "Function",
+          "lineno": 138
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+          "type": "Function",
+          "lineno": 138
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o]",
+          "type": "Function",
+          "lineno": 138
+        },
+        {
+          "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o-mini]",
+          "type": "Function",
+          "lineno": 138
+        }
+      ]
+    }
+  ],
+  "tests": [
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-8B-Instruct]",
+      "lineno": 25,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output0-Llama-3.3-8B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-3.3-8B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.39231995795853436,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider together does not support model Llama-3.3-8B-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.0002014160854741931,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-70B-Instruct]",
+      "lineno": 25,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output0-Llama-3.3-70B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-3.3-70B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.0071710830088704824,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.7968309168936685,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0004362498875707388,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E]",
+      "lineno": 25,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Scout-17B-16E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.012780916062183678,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider together does not support model Llama-4-Scout-17B-16E')"
+      },
+      "teardown": {
+        "duration": 0.00029158301185816526,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+      "lineno": 25,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Scout-17B-16E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.013563874992541969,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.5071627920260653,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0005456249928101897,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]",
+      "lineno": 25,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Maverick-17B-128E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.020708917058072984,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider together does not support model Llama-4-Maverick-17B-128E')"
+      },
+      "teardown": {
+        "duration": 0.00030325003899633884,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+      "lineno": 25,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Maverick-17B-128E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.014170082984492183,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.2383921250002459,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0009597090538591146,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o]",
+      "lineno": 25,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output0-gpt-4o]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-gpt-4o",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.013402250013314188,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider together does not support model gpt-4o')"
+      },
+      "teardown": {
+        "duration": 0.00028245802968740463,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o-mini]",
+      "lineno": 25,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output0-gpt-4o-mini]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-gpt-4o-mini",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.008693707990460098,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider together does not support model gpt-4o-mini')"
+      },
+      "teardown": {
+        "duration": 0.00016249995678663254,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-8B-Instruct]",
+      "lineno": 25,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output1-Llama-3.3-8B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-3.3-8B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.005904874997213483,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider together does not support model Llama-3.3-8B-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.0001960420049726963,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-70B-Instruct]",
+      "lineno": 25,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output1-Llama-3.3-70B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-3.3-70B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.006532749976031482,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.5410778749501333,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00019516597967594862,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E]",
+      "lineno": 25,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Scout-17B-16E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.009374375105835497,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider together does not support model Llama-4-Scout-17B-16E')"
+      },
+      "teardown": {
+        "duration": 0.00015524995978921652,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]",
+      "lineno": 25,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Scout-17B-16E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.007205875008367002,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.42584729101508856,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0009506250498816371,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]",
+      "lineno": 25,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Maverick-17B-128E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.029625958995893598,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider together does not support model Llama-4-Maverick-17B-128E')"
+      },
+      "teardown": {
+        "duration": 0.0001860830234363675,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]",
+      "lineno": 25,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Maverick-17B-128E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.023576707928441465,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.2249365829629824,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0004278330598026514,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o]",
+      "lineno": 25,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output1-gpt-4o]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-gpt-4o",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.014816291979514062,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider together does not support model gpt-4o')"
+      },
+      "teardown": {
+        "duration": 0.00029558304231613874,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o-mini]",
+      "lineno": 25,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_basic[input_output1-gpt-4o-mini]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-gpt-4o-mini",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.012769333901815116,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider together does not support model gpt-4o-mini')"
+      },
+      "teardown": {
+        "duration": 0.00024329195730388165,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-8B-Instruct]",
+      "lineno": 40,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_basic[input_output0-Llama-3.3-8B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-3.3-8B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.009145625052042305,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider together does not support model Llama-3.3-8B-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.00021195888984948397,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-70B-Instruct]",
+      "lineno": 40,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_basic[input_output0-Llama-3.3-70B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-3.3-70B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.0133140409598127,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.7228892090497538,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0004301250446587801,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E]",
+      "lineno": 40,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Scout-17B-16E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.013998750015161932,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider together does not support model Llama-4-Scout-17B-16E')"
+      },
+      "teardown": {
+        "duration": 0.0002961249556392431,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+      "lineno": 40,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Scout-17B-16E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.012570249964483082,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.7193170419195667,
+        "outcome": "failed",
+        "crash": {
+          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py",
+          "lineno": 54,
+          "message": "IndexError: list index out of range"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai/test_chat_completion.py",
+            "lineno": 54,
+            "message": "IndexError"
+          }
+        ],
+        "longrepr": "openai_client = <openai.OpenAI object at 0x11be9e1a0>\ninput_output = {'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\ncorrect_model_name = 'meta-llama/Llama-4-Scout-17B-16E-Instruct'\n\n    @pytest.mark.parametrize(\"model\", chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"model\"])\n    @pytest.mark.parametrize(\n        \"input_output\",\n        chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"input_output\"],\n    )\n    def test_chat_streaming_basic(openai_client, input_output, correct_model_name):\n        response = openai_client.chat.completions.create(\n            model=correct_model_name,\n            messages=input_output[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai/test_chat_completion.py:54: IndexError"
+      },
+      "teardown": {
+        "duration": 0.00022504094522446394,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]",
+      "lineno": 40,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Maverick-17B-128E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.006660082959569991,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider together does not support model Llama-4-Maverick-17B-128E')"
+      },
+      "teardown": {
+        "duration": 0.0001445829402655363,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+      "lineno": 40,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Maverick-17B-128E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.021228999947197735,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.5670281670754775,
+        "outcome": "failed",
+        "crash": {
+          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py",
+          "lineno": 54,
+          "message": "IndexError: list index out of range"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai/test_chat_completion.py",
+            "lineno": 54,
+            "message": "IndexError"
+          }
+        ],
+        "longrepr": "openai_client = <openai.OpenAI object at 0x11c176c80>\ninput_output = {'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\ncorrect_model_name = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\n\n    @pytest.mark.parametrize(\"model\", chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"model\"])\n    @pytest.mark.parametrize(\n        \"input_output\",\n        chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"input_output\"],\n    )\n    def test_chat_streaming_basic(openai_client, input_output, correct_model_name):\n        response = openai_client.chat.completions.create(\n            model=correct_model_name,\n            messages=input_output[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai/test_chat_completion.py:54: IndexError"
+      },
+      "teardown": {
+        "duration": 0.0004656669916585088,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o]",
+      "lineno": 40,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_basic[input_output0-gpt-4o]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-gpt-4o",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.009595917072147131,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider together does not support model gpt-4o')"
+      },
+      "teardown": {
+        "duration": 0.00025625003036111593,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o-mini]",
+      "lineno": 40,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_basic[input_output0-gpt-4o-mini]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-gpt-4o-mini",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.009242708911187947,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider together does not support model gpt-4o-mini')"
+      },
+      "teardown": {
+        "duration": 0.0002484159776940942,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-8B-Instruct]",
+      "lineno": 40,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_basic[input_output1-Llama-3.3-8B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-3.3-8B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.00905474997125566,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider together does not support model Llama-3.3-8B-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.00023312494158744812,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-70B-Instruct]",
+      "lineno": 40,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_basic[input_output1-Llama-3.3-70B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-3.3-70B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.007183165987953544,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.0667660840554163,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0005163750611245632,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E]",
+      "lineno": 40,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Scout-17B-16E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.05233616603072733,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider together does not support model Llama-4-Scout-17B-16E')"
+      },
+      "teardown": {
+        "duration": 0.0003471659729257226,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]",
+      "lineno": 40,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Scout-17B-16E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.015932541922666132,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.41540695796720684,
+        "outcome": "failed",
+        "crash": {
+          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py",
+          "lineno": 54,
+          "message": "IndexError: list index out of range"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai/test_chat_completion.py",
+            "lineno": 54,
+            "message": "IndexError"
+          }
+        ],
+        "longrepr": "openai_client = <openai.OpenAI object at 0x11befe2f0>\ninput_output = {'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\ncorrect_model_name = 'meta-llama/Llama-4-Scout-17B-16E-Instruct'\n\n    @pytest.mark.parametrize(\"model\", chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"model\"])\n    @pytest.mark.parametrize(\n        \"input_output\",\n        chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"input_output\"],\n    )\n    def test_chat_streaming_basic(openai_client, input_output, correct_model_name):\n        response = openai_client.chat.completions.create(\n            model=correct_model_name,\n            messages=input_output[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai/test_chat_completion.py:54: IndexError"
+      },
+      "teardown": {
+        "duration": 0.0002845840062946081,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]",
+      "lineno": 40,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Maverick-17B-128E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.007243875064887106,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider together does not support model Llama-4-Maverick-17B-128E')"
+      },
+      "teardown": {
+        "duration": 0.00016258296091109514,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]",
+      "lineno": 40,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Maverick-17B-128E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.009275624994188547,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.43309554096777,
+        "outcome": "failed",
+        "crash": {
+          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py",
+          "lineno": 54,
+          "message": "IndexError: list index out of range"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai/test_chat_completion.py",
+            "lineno": 54,
+            "message": "IndexError"
+          }
+        ],
+        "longrepr": "openai_client = <openai.OpenAI object at 0x11c1898a0>\ninput_output = {'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\ncorrect_model_name = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\n\n    @pytest.mark.parametrize(\"model\", chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"model\"])\n    @pytest.mark.parametrize(\n        \"input_output\",\n        chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"input_output\"],\n    )\n    def test_chat_streaming_basic(openai_client, input_output, correct_model_name):\n        response = openai_client.chat.completions.create(\n            model=correct_model_name,\n            messages=input_output[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai/test_chat_completion.py:54: IndexError"
+      },
+      "teardown": {
+        "duration": 0.0003690000157803297,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o]",
+      "lineno": 40,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_basic[input_output1-gpt-4o]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-gpt-4o",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.011570582981221378,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider together does not support model gpt-4o')"
+      },
+      "teardown": {
+        "duration": 0.00024937500711530447,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o-mini]",
+      "lineno": 40,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_basic[input_output1-gpt-4o-mini]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-gpt-4o-mini",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.010756584000773728,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider together does not support model gpt-4o-mini')"
+      },
+      "teardown": {
+        "duration": 0.00026183295994997025,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E]",
+      "lineno": 60,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Scout-17B-16E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.008863041992299259,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider together does not support model Llama-4-Scout-17B-16E')"
+      },
+      "teardown": {
+        "duration": 0.00023283297196030617,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+      "lineno": 60,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Scout-17B-16E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.007975792046636343,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 2.1585817909799516,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0005107080796733499,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E]",
+      "lineno": 60,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Maverick-17B-128E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.05228079203516245,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider together does not support model Llama-4-Maverick-17B-128E')"
+      },
+      "teardown": {
+        "duration": 0.0017226670170202851,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+      "lineno": 60,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Maverick-17B-128E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.009964749915525317,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 4.6593364590080455,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0009852920193225145,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o]",
+      "lineno": 60,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_image[input_output0-gpt-4o]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-gpt-4o",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.023214041953906417,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider together does not support model gpt-4o')"
+      },
+      "teardown": {
+        "duration": 0.0003567079547792673,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o-mini]",
+      "lineno": 60,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_image[input_output0-gpt-4o-mini]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-gpt-4o-mini",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.01705008395947516,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider together does not support model gpt-4o-mini')"
+      },
+      "teardown": {
+        "duration": 0.0003085409989580512,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E]",
+      "lineno": 75,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Scout-17B-16E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.014711958006955683,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider together does not support model Llama-4-Scout-17B-16E')"
+      },
+      "teardown": {
+        "duration": 0.0003121249610558152,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+      "lineno": 75,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Scout-17B-16E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.01843333407305181,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 2.8683876669965684,
+        "outcome": "failed",
+        "crash": {
+          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py",
+          "lineno": 89,
+          "message": "IndexError: list index out of range"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai/test_chat_completion.py",
+            "lineno": 89,
+            "message": "IndexError"
+          }
+        ],
+        "longrepr": "openai_client = <openai.OpenAI object at 0x11c1a53f0>\ninput_output = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\ncorrect_model_name = 'meta-llama/Llama-4-Scout-17B-16E-Instruct'\n\n    @pytest.mark.parametrize(\"model\", chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"model\"])\n    @pytest.mark.parametrize(\n        \"input_output\",\n        chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"input_output\"],\n    )\n    def test_chat_streaming_image(openai_client, input_output, correct_model_name):\n        response = openai_client.chat.completions.create(\n            model=correct_model_name,\n            messages=input_output[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai/test_chat_completion.py:89: IndexError"
+      },
+      "teardown": {
+        "duration": 0.00028662499971687794,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E]",
+      "lineno": 75,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Maverick-17B-128E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.00653208396397531,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider together does not support model Llama-4-Maverick-17B-128E')"
+      },
+      "teardown": {
+        "duration": 0.00021291698794811964,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+      "lineno": 75,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Maverick-17B-128E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.006028458010405302,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 4.981105040991679,
+        "outcome": "failed",
+        "crash": {
+          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py",
+          "lineno": 89,
+          "message": "IndexError: list index out of range"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai/test_chat_completion.py",
+            "lineno": 89,
+            "message": "IndexError"
+          }
+        ],
+        "longrepr": "openai_client = <openai.OpenAI object at 0x11beb7f70>\ninput_output = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\ncorrect_model_name = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\n\n    @pytest.mark.parametrize(\"model\", chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"model\"])\n    @pytest.mark.parametrize(\n        \"input_output\",\n        chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"input_output\"],\n    )\n    def test_chat_streaming_image(openai_client, input_output, correct_model_name):\n        response = openai_client.chat.completions.create(\n            model=correct_model_name,\n            messages=input_output[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai/test_chat_completion.py:89: IndexError"
+      },
+      "teardown": {
+        "duration": 0.0010110830189660192,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o]",
+      "lineno": 75,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_image[input_output0-gpt-4o]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-gpt-4o",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.01591233303770423,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider together does not support model gpt-4o')"
+      },
+      "teardown": {
+        "duration": 0.0003783750580623746,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o-mini]",
+      "lineno": 75,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_image[input_output0-gpt-4o-mini]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-gpt-4o-mini",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.010691000032238662,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider together does not support model gpt-4o-mini')"
+      },
+      "teardown": {
+        "duration": 0.00027445796877145767,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]",
+      "lineno": 95,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-3.3-8B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.01258529198821634,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider together does not support model Llama-3.3-8B-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.0002044580178335309,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]",
+      "lineno": 95,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-3.3-70B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.010904791066423059,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.8311828339938074,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00048687495291233063,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]",
+      "lineno": 95,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Scout-17B-16E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.029216791968792677,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider together does not support model Llama-4-Scout-17B-16E')"
+      },
+      "teardown": {
+        "duration": 0.0002269580727443099,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+      "lineno": 95,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Scout-17B-16E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.013182583032175899,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 1.7446029160637408,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0008087089518085122,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]",
+      "lineno": 95,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Maverick-17B-128E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.02009516698308289,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider together does not support model Llama-4-Maverick-17B-128E')"
+      },
+      "teardown": {
+        "duration": 0.000320291961543262,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+      "lineno": 95,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Maverick-17B-128E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.015216833096928895,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.8049291669158265,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0005109170451760292,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o]",
+      "lineno": 95,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output0-gpt-4o]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-gpt-4o",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.0171551660168916,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider together does not support model gpt-4o')"
+      },
+      "teardown": {
+        "duration": 0.0005707499803975224,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o-mini]",
+      "lineno": 95,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output0-gpt-4o-mini]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-gpt-4o-mini",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.01131124992389232,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider together does not support model gpt-4o-mini')"
+      },
+      "teardown": {
+        "duration": 0.0003044159384444356,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]",
+      "lineno": 95,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-3.3-8B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.0054290409898385406,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider together does not support model Llama-3.3-8B-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.00014645792543888092,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]",
+      "lineno": 95,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-3.3-70B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.011368000064976513,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 4.363120499998331,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0003998749889433384,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]",
+      "lineno": 95,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Scout-17B-16E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.04945958300959319,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider together does not support model Llama-4-Scout-17B-16E')"
+      },
+      "teardown": {
+        "duration": 0.0002401659730821848,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]",
+      "lineno": 95,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Scout-17B-16E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.011090958025306463,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 4.699277375009842,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.000689250067807734,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]",
+      "lineno": 95,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Maverick-17B-128E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.020744459005072713,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider together does not support model Llama-4-Maverick-17B-128E')"
+      },
+      "teardown": {
+        "duration": 0.0001836250303313136,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]",
+      "lineno": 95,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Maverick-17B-128E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.005926624988205731,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 2.7814464160474017,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0009554170537739992,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o]",
+      "lineno": 95,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output1-gpt-4o]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-gpt-4o",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.03027112502604723,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider together does not support model gpt-4o')"
+      },
+      "teardown": {
+        "duration": 0.0003245410043746233,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o-mini]",
+      "lineno": 95,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_structured_output[input_output1-gpt-4o-mini]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-gpt-4o-mini",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.009138708002865314,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider together does not support model gpt-4o-mini')"
+      },
+      "teardown": {
+        "duration": 0.0001919999485835433,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]",
+      "lineno": 117,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-3.3-8B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.0064505410846322775,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider together does not support model Llama-3.3-8B-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.00015720794908702374,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]",
+      "lineno": 117,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-3.3-70B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.00582624995149672,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.8302567919017747,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00020354206208139658,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]",
+      "lineno": 117,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Scout-17B-16E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.014151416951790452,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider together does not support model Llama-4-Scout-17B-16E')"
+      },
+      "teardown": {
+        "duration": 0.00034970801789313555,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+      "lineno": 117,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Scout-17B-16E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.012150791939347982,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.7078855830477551,
+        "outcome": "failed",
+        "crash": {
+          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py",
+          "lineno": 135,
+          "message": "IndexError: list index out of range"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai/test_chat_completion.py",
+            "lineno": 135,
+            "message": "IndexError"
+          }
+        ],
+        "longrepr": "openai_client = <openai.OpenAI object at 0x11beeb460>\ninput_output = {'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'content': 'Alice and Bob ar...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\ncorrect_model_name = 'meta-llama/Llama-4-Scout-17B-16E-Instruct'\n\n    @pytest.mark.parametrize(\n        \"model\",\n        chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"model\"],\n    )\n    @pytest.mark.parametrize(\n        \"input_output\",\n        chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"input_output\"],\n    )\n    def test_chat_streaming_structured_output(openai_client, input_output, correct_model_name):\n        response = openai_client.chat.completions.create(\n            model=correct_model_name,\n            messages=input_output[\"input\"][\"messages\"],\n            response_format=input_output[\"input\"][\"response_format\"],\n            stream=True,\n        )\n        maybe_json_content = \"\"\n        for chunk in response:\n>           maybe_json_content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai/test_chat_completion.py:135: IndexError"
+      },
+      "teardown": {
+        "duration": 0.0008542909054085612,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]",
+      "lineno": 117,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Maverick-17B-128E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.022667833953164518,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider together does not support model Llama-4-Maverick-17B-128E')"
+      },
+      "teardown": {
+        "duration": 0.0006820419803261757,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+      "lineno": 117,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Maverick-17B-128E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.01285991701297462,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.6888671671040356,
+        "outcome": "failed",
+        "crash": {
+          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py",
+          "lineno": 135,
+          "message": "IndexError: list index out of range"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai/test_chat_completion.py",
+            "lineno": 135,
+            "message": "IndexError"
+          }
+        ],
+        "longrepr": "openai_client = <openai.OpenAI object at 0x11c19b4f0>\ninput_output = {'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'content': 'Alice and Bob ar...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\ncorrect_model_name = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\n\n    @pytest.mark.parametrize(\n        \"model\",\n        chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"model\"],\n    )\n    @pytest.mark.parametrize(\n        \"input_output\",\n        chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"input_output\"],\n    )\n    def test_chat_streaming_structured_output(openai_client, input_output, correct_model_name):\n        response = openai_client.chat.completions.create(\n            model=correct_model_name,\n            messages=input_output[\"input\"][\"messages\"],\n            response_format=input_output[\"input\"][\"response_format\"],\n            stream=True,\n        )\n        maybe_json_content = \"\"\n        for chunk in response:\n>           maybe_json_content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai/test_chat_completion.py:135: IndexError"
+      },
+      "teardown": {
+        "duration": 0.0007953330641612411,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o]",
+      "lineno": 117,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output0-gpt-4o]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-gpt-4o",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.015029000001959503,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider together does not support model gpt-4o')"
+      },
+      "teardown": {
+        "duration": 0.00015666603576391935,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o-mini]",
+      "lineno": 117,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output0-gpt-4o-mini]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-gpt-4o-mini",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.00622316705994308,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider together does not support model gpt-4o-mini')"
+      },
+      "teardown": {
+        "duration": 0.0001533749746158719,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]",
+      "lineno": 117,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-3.3-8B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.005598834017291665,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider together does not support model Llama-3.3-8B-Instruct')"
+      },
+      "teardown": {
+        "duration": 0.00013062497600913048,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]",
+      "lineno": 117,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-3.3-70B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.005876541952602565,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 7.561108374968171,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0004579999949783087,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]",
+      "lineno": 117,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Scout-17B-16E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.018791542039252818,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider together does not support model Llama-4-Scout-17B-16E')"
+      },
+      "teardown": {
+        "duration": 0.0004900830099359155,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]",
+      "lineno": 117,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Scout-17B-16E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.0065952910808846354,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 2.6826554159633815,
+        "outcome": "failed",
+        "crash": {
+          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py",
+          "lineno": 135,
+          "message": "IndexError: list index out of range"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai/test_chat_completion.py",
+            "lineno": 135,
+            "message": "IndexError"
+          }
+        ],
+        "longrepr": "openai_client = <openai.OpenAI object at 0x11c188640>\ninput_output = {'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solution step by step.',... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\ncorrect_model_name = 'meta-llama/Llama-4-Scout-17B-16E-Instruct'\n\n    @pytest.mark.parametrize(\n        \"model\",\n        chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"model\"],\n    )\n    @pytest.mark.parametrize(\n        \"input_output\",\n        chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"input_output\"],\n    )\n    def test_chat_streaming_structured_output(openai_client, input_output, correct_model_name):\n        response = openai_client.chat.completions.create(\n            model=correct_model_name,\n            messages=input_output[\"input\"][\"messages\"],\n            response_format=input_output[\"input\"][\"response_format\"],\n            stream=True,\n        )\n        maybe_json_content = \"\"\n        for chunk in response:\n>           maybe_json_content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai/test_chat_completion.py:135: IndexError"
+      },
+      "teardown": {
+        "duration": 0.0009669580031186342,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]",
+      "lineno": 117,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Maverick-17B-128E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.019489208003506064,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider together does not support model Llama-4-Maverick-17B-128E')"
+      },
+      "teardown": {
+        "duration": 0.0007419160101562738,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]",
+      "lineno": 117,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-Llama-4-Maverick-17B-128E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.012299792026169598,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 2.829678333015181,
+        "outcome": "failed",
+        "crash": {
+          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py",
+          "lineno": 135,
+          "message": "IndexError: list index out of range"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai/test_chat_completion.py",
+            "lineno": 135,
+            "message": "IndexError"
+          }
+        ],
+        "longrepr": "openai_client = <openai.OpenAI object at 0x11c1ed6c0>\ninput_output = {'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solution step by step.',... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\ncorrect_model_name = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\n\n    @pytest.mark.parametrize(\n        \"model\",\n        chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"model\"],\n    )\n    @pytest.mark.parametrize(\n        \"input_output\",\n        chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"input_output\"],\n    )\n    def test_chat_streaming_structured_output(openai_client, input_output, correct_model_name):\n        response = openai_client.chat.completions.create(\n            model=correct_model_name,\n            messages=input_output[\"input\"][\"messages\"],\n            response_format=input_output[\"input\"][\"response_format\"],\n            stream=True,\n        )\n        maybe_json_content = \"\"\n        for chunk in response:\n>           maybe_json_content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai/test_chat_completion.py:135: IndexError"
+      },
+      "teardown": {
+        "duration": 0.0010418329620733857,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o]",
+      "lineno": 117,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output1-gpt-4o]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-gpt-4o",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.016189916990697384,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider together does not support model gpt-4o')"
+      },
+      "teardown": {
+        "duration": 0.00027966592460870743,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o-mini]",
+      "lineno": 117,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_streaming_structured_output[input_output1-gpt-4o-mini]",
+        "parametrize",
+        "pytestmark",
+        "input_output1-gpt-4o-mini",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.010247125057503581,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider together does not support model gpt-4o-mini')"
+      },
+      "teardown": {
+        "duration": 0.00023291702382266521,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-3.3-70B-Instruct]",
+      "lineno": 138,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_tool_calling[input_output0-Llama-3.3-70B-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-3.3-70B-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.012632582918740809,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.40774812502786517,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0007319580763578415,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E]",
+      "lineno": 138,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Scout-17B-16E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.019890791969373822,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider together does not support model Llama-4-Scout-17B-16E')"
+      },
+      "teardown": {
+        "duration": 0.0006391670322045684,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+      "lineno": 138,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Scout-17B-16E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.0178165000397712,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.38229950005188584,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0010000420734286308,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E]",
+      "lineno": 138,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Maverick-17B-128E",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.024259291938506067,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider together does not support model Llama-4-Maverick-17B-128E')"
+      },
+      "teardown": {
+        "duration": 0.0003602079814299941,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+      "lineno": 138,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E-Instruct]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-Llama-4-Maverick-17B-128E-Instruct",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.012425708002410829,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.7610744580160826,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0005935420049354434,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o]",
+      "lineno": 138,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_tool_calling[input_output0-gpt-4o]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-gpt-4o",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.018717541941441596,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider together does not support model gpt-4o')"
+      },
+      "teardown": {
+        "duration": 0.000659791985526681,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o-mini]",
+      "lineno": 138,
+      "outcome": "skipped",
+      "keywords": [
+        "test_chat_non_streaming_tool_calling[input_output0-gpt-4o-mini]",
+        "parametrize",
+        "pytestmark",
+        "input_output0-gpt-4o-mini",
+        "test_chat_completion.py",
+        "openai",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "setup": {
+        "duration": 0.012784749967977405,
+        "outcome": "skipped",
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider together does not support model gpt-4o-mini')"
+      },
+      "teardown": {
+        "duration": 0.0002145830076187849,
+        "outcome": "passed"
+      }
+    }
+  ]
+}

From 983f6feeb8eae327adb8273ecf0f309985fcf648 Mon Sep 17 00:00:00 2001
From: AlexHe99 <alehe@amd.com>
Date: Wed, 9 Apr 2025 12:35:32 +0800
Subject: [PATCH 03/21] docs: Update remote-vllm.md with AMD GPU vLLM server
 supported. (#1858)

Add the content to use AMD GPU as the vLLM server. Split the original
part to two sub chapters,
1. AMD vLLM server
2. NVIDIA vLLM server (orignal)

# What does this PR do?
[Provide a short summary of what this PR does and why. Link to relevant
issues if applicable.]

[//]: # (If resolving an issue, uncomment and update the line below)
[//]: # (Closes #[issue-number])

## Test Plan
[Describe the tests you ran to verify your changes with result
summaries. *Provide clear instructions so the plan can be easily
re-executed.*]

[//]: # (## Documentation)

---------

Signed-off-by: Alex He <alehe@amd.com>
---
 .../self_hosted_distro/remote-vllm.md         | 74 +++++++++++++++++++
 .../templates/remote-vllm/doc_template.md     | 74 +++++++++++++++++++
 2 files changed, 148 insertions(+)

diff --git a/docs/source/distributions/self_hosted_distro/remote-vllm.md b/docs/source/distributions/self_hosted_distro/remote-vllm.md
index b6e8a8ad4..457d703b3 100644
--- a/docs/source/distributions/self_hosted_distro/remote-vllm.md
+++ b/docs/source/distributions/self_hosted_distro/remote-vllm.md
@@ -41,6 +41,80 @@ The following environment variables can be configured:
 
 ## Setting up vLLM server
 
+Both AMD and NVIDIA GPUs can serve as accelerators for the vLLM server, which acts as both the LLM inference provider and the safety provider.
+
+### Setting up vLLM server on AMD GPU
+
+AMD provides two main vLLM container options:
+- rocm/vllm: Production-ready container
+- rocm/vllm-dev: Development container with the latest vLLM features
+
+Please check the [Blog about ROCm vLLM Usage](https://rocm.blogs.amd.com/software-tools-optimization/vllm-container/README.html) to get more details.
+
+Here is a sample script to start a ROCm vLLM server locally via Docker:
+
+```bash
+export INFERENCE_PORT=8000
+export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
+export CUDA_VISIBLE_DEVICES=0
+export VLLM_DIMG="rocm/vllm-dev:main"
+
+docker run \
+    --pull always \
+    --ipc=host \
+    --privileged \
+    --shm-size 16g \
+    --device=/dev/kfd \
+    --device=/dev/dri \
+    --group-add video \
+    --cap-add=SYS_PTRACE \
+    --cap-add=CAP_SYS_ADMIN \
+    --security-opt seccomp=unconfined \
+    --security-opt apparmor=unconfined \
+    --env "HUGGING_FACE_HUB_TOKEN=$HF_TOKEN" \
+    --env "HIP_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES" \
+    -p $INFERENCE_PORT:$INFERENCE_PORT \
+    -v ~/.cache/huggingface:/root/.cache/huggingface \
+    $VLLM_DIMG \
+    python -m vllm.entrypoints.openai.api_server \
+    --model $INFERENCE_MODEL \
+    --port $INFERENCE_PORT
+```
+
+Note that you'll also need to set `--enable-auto-tool-choice` and `--tool-call-parser` to [enable tool calling in vLLM](https://docs.vllm.ai/en/latest/features/tool_calling.html).
+
+If you are using Llama Stack Safety / Shield APIs, then you will need to also run another instance of a vLLM with a corresponding safety model like `meta-llama/Llama-Guard-3-1B` using a script like:
+
+```bash
+export SAFETY_PORT=8081
+export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
+export CUDA_VISIBLE_DEVICES=1
+export VLLM_DIMG="rocm/vllm-dev:main"
+
+docker run \
+    --pull always \
+    --ipc=host \
+    --privileged \
+    --shm-size 16g \
+    --device=/dev/kfd \
+    --device=/dev/dri \
+    --group-add video \
+    --cap-add=SYS_PTRACE \
+    --cap-add=CAP_SYS_ADMIN \
+    --security-opt seccomp=unconfined \
+    --security-opt apparmor=unconfined \
+    --env "HUGGING_FACE_HUB_TOKEN=$HF_TOKEN" \
+    --env "HIP_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES" \
+    -p $SAFETY_PORT:$SAFETY_PORT \
+    -v ~/.cache/huggingface:/root/.cache/huggingface \
+    $VLLM_DIMG \
+    python -m vllm.entrypoints.openai.api_server \
+    --model $SAFETY_MODEL \
+    --port $SAFETY_PORT
+```
+
+### Setting up vLLM server on NVIDIA GPU
+
 Please check the [vLLM Documentation](https://docs.vllm.ai/en/v0.5.5/serving/deploying_with_docker.html) to get a vLLM endpoint. Here is a sample script to start a vLLM server locally via Docker:
 
 ```bash
diff --git a/llama_stack/templates/remote-vllm/doc_template.md b/llama_stack/templates/remote-vllm/doc_template.md
index 57c9f116c..7543e8239 100644
--- a/llama_stack/templates/remote-vllm/doc_template.md
+++ b/llama_stack/templates/remote-vllm/doc_template.md
@@ -28,6 +28,80 @@ The following environment variables can be configured:
 
 ## Setting up vLLM server
 
+Both AMD and NVIDIA GPUs can serve as accelerators for the vLLM server, which acts as both the LLM inference provider and the safety provider.
+
+### Setting up vLLM server on AMD GPU
+
+AMD provides two main vLLM container options:
+- rocm/vllm: Production-ready container
+- rocm/vllm-dev: Development container with the latest vLLM features
+
+Please check the [Blog about ROCm vLLM Usage](https://rocm.blogs.amd.com/software-tools-optimization/vllm-container/README.html) to get more details.
+
+Here is a sample script to start a ROCm vLLM server locally via Docker:
+
+```bash
+export INFERENCE_PORT=8000
+export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
+export CUDA_VISIBLE_DEVICES=0
+export VLLM_DIMG="rocm/vllm-dev:main"
+
+docker run \
+    --pull always \
+    --ipc=host \
+    --privileged \
+    --shm-size 16g \
+    --device=/dev/kfd \
+    --device=/dev/dri \
+    --group-add video \
+    --cap-add=SYS_PTRACE \
+    --cap-add=CAP_SYS_ADMIN \
+    --security-opt seccomp=unconfined \
+    --security-opt apparmor=unconfined \
+    --env "HUGGING_FACE_HUB_TOKEN=$HF_TOKEN" \
+    --env "HIP_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES" \
+    -p $INFERENCE_PORT:$INFERENCE_PORT \
+    -v ~/.cache/huggingface:/root/.cache/huggingface \
+    $VLLM_DIMG \
+    python -m vllm.entrypoints.openai.api_server \
+    --model $INFERENCE_MODEL \
+    --port $INFERENCE_PORT
+```
+
+Note that you'll also need to set `--enable-auto-tool-choice` and `--tool-call-parser` to [enable tool calling in vLLM](https://docs.vllm.ai/en/latest/features/tool_calling.html).
+
+If you are using Llama Stack Safety / Shield APIs, then you will need to also run another instance of a vLLM with a corresponding safety model like `meta-llama/Llama-Guard-3-1B` using a script like:
+
+```bash
+export SAFETY_PORT=8081
+export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
+export CUDA_VISIBLE_DEVICES=1
+export VLLM_DIMG="rocm/vllm-dev:main"
+
+docker run \
+    --pull always \
+    --ipc=host \
+    --privileged \
+    --shm-size 16g \
+    --device=/dev/kfd \
+    --device=/dev/dri \
+    --group-add video \
+    --cap-add=SYS_PTRACE \
+    --cap-add=CAP_SYS_ADMIN \
+    --security-opt seccomp=unconfined \
+    --security-opt apparmor=unconfined \
+    --env "HUGGING_FACE_HUB_TOKEN=$HF_TOKEN" \
+    --env "HIP_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES" \
+    -p $SAFETY_PORT:$SAFETY_PORT \
+    -v ~/.cache/huggingface:/root/.cache/huggingface \
+    $VLLM_DIMG \
+    python -m vllm.entrypoints.openai.api_server \
+    --model $SAFETY_MODEL \
+    --port $SAFETY_PORT
+```
+
+### Setting up vLLM server on NVIDIA GPU
+
 Please check the [vLLM Documentation](https://docs.vllm.ai/en/v0.5.5/serving/deploying_with_docker.html) to get a vLLM endpoint. Here is a sample script to start a vLLM server locally via Docker:
 
 ```bash

From 10882bf478bcb8d89babeb9416ab24fb39385d20 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Han?= <seb@redhat.com>
Date: Wed, 9 Apr 2025 09:43:48 +0200
Subject: [PATCH 04/21] chore: remove unused tempdir in agent (#1896)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# What does this PR do?

The usage of the tempdir was removed in
094eb6a5ae8dbac297fe59914db11c612250f92f.

Signed-off-by: Sébastien Han <seb@redhat.com>
---
 .../providers/inline/agents/meta_reference/agent_instance.py   | 2 --
 llama_stack/providers/inline/agents/meta_reference/agents.py   | 3 ---
 2 files changed, 5 deletions(-)

diff --git a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
index 6840da89f..f441d6eb6 100644
--- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
+++ b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
@@ -89,7 +89,6 @@ class ChatAgent(ShieldRunnerMixin):
         self,
         agent_id: str,
         agent_config: AgentConfig,
-        tempdir: str,
         inference_api: Inference,
         safety_api: Safety,
         tool_runtime_api: ToolRuntime,
@@ -99,7 +98,6 @@ class ChatAgent(ShieldRunnerMixin):
     ):
         self.agent_id = agent_id
         self.agent_config = agent_config
-        self.tempdir = tempdir
         self.inference_api = inference_api
         self.safety_api = safety_api
         self.vector_io_api = vector_io_api
diff --git a/llama_stack/providers/inline/agents/meta_reference/agents.py b/llama_stack/providers/inline/agents/meta_reference/agents.py
index 5ca123595..656178773 100644
--- a/llama_stack/providers/inline/agents/meta_reference/agents.py
+++ b/llama_stack/providers/inline/agents/meta_reference/agents.py
@@ -7,7 +7,6 @@
 import json
 import logging
 import shutil
-import tempfile
 import uuid
 from typing import AsyncGenerator, List, Optional, Union
 
@@ -64,7 +63,6 @@ class MetaReferenceAgentsImpl(Agents):
         self.tool_groups_api = tool_groups_api
 
         self.in_memory_store = InmemoryKVStoreImpl()
-        self.tempdir = tempfile.mkdtemp()
 
     async def initialize(self) -> None:
         self.persistence_store = await kvstore_impl(self.config.persistence_store)
@@ -107,7 +105,6 @@ class MetaReferenceAgentsImpl(Agents):
         return ChatAgent(
             agent_id=agent_id,
             agent_config=agent_config,
-            tempdir=self.tempdir,
             inference_api=self.inference_api,
             safety_api=self.safety_api,
             vector_io_api=self.vector_io_api,

From 8001c30a4fe12ca15c79ca7b7038c30d1d7b181f Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Wed, 9 Apr 2025 00:46:02 -0700
Subject: [PATCH 05/21] fix: meta reference + llama4 tokenizer fix

---
 llama_stack/models/llama/llama4/tokenizer.py                  | 4 +++-
 .../providers/inline/inference/meta_reference/generators.py   | 4 ++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/llama_stack/models/llama/llama4/tokenizer.py b/llama_stack/models/llama/llama4/tokenizer.py
index 4d271e5fd..8eabc3205 100644
--- a/llama_stack/models/llama/llama4/tokenizer.py
+++ b/llama_stack/models/llama/llama4/tokenizer.py
@@ -56,9 +56,11 @@ LLAMA4_TEXT_POST_TRAIN_SPECIAL_TOKENS = [
     "<|text_post_train_reserved_special_token_3|>",
     "<|text_post_train_reserved_special_token_4|>",
     "<|text_post_train_reserved_special_token_5|>",
+    "<|text_post_train_reserved_special_token_6|>",
+    "<|text_post_train_reserved_special_token_7|>",
     "<|finetune_right_pad|>",
 ] + get_reserved_special_tokens(
-    "text_post_train", 61, 6
+    "text_post_train", 61, 8
 )  # <|text_post_train_reserved_special_token_6|>, ..., <|text_post_train_reserved_special_token_66|>
 
 # 200080, ..., 201133
diff --git a/llama_stack/providers/inline/inference/meta_reference/generators.py b/llama_stack/providers/inline/inference/meta_reference/generators.py
index 65bed4d8c..34dd58a9a 100644
--- a/llama_stack/providers/inline/inference/meta_reference/generators.py
+++ b/llama_stack/providers/inline/inference/meta_reference/generators.py
@@ -259,7 +259,7 @@ class Llama3Generator:
 
         temperature, top_p = _infer_sampling_params(sampling_params)
         for result in self.inner_generator.generate(
-            llm_inputs=[self.formatter.encode_content(request.content)],
+            model_inputs=[self.formatter.encode_content(request.content)],
             max_gen_len=max_gen_len,
             temperature=temperature,
             top_p=top_p,
@@ -284,7 +284,7 @@ class Llama3Generator:
 
         temperature, top_p = _infer_sampling_params(sampling_params)
         for result in self.inner_generator.generate(
-            llm_inputs=[self.formatter.encode_dialog_prompt(request.messages, _infer_tool_prompt_format(request))],
+            model_inputs=[self.formatter.encode_dialog_prompt(request.messages, _infer_tool_prompt_format(request))],
             max_gen_len=max_gen_len,
             temperature=temperature,
             top_p=top_p,

From e3d22d8de733b1786087fba85920695c40e15777 Mon Sep 17 00:00:00 2001
From: Ihar Hrachyshka <ihar.hrachyshka@gmail.com>
Date: Wed, 9 Apr 2025 04:10:07 -0400
Subject: [PATCH 06/21] chore: fix hash for
 thollander/actions-comment-pull-request (#1900)

# What does this PR do?

Fix hash for v3.0.1 tag for a github action.

Signed-off-by: Ihar Hrachyshka <ihar.hrachyshka@gmail.com>
---
 .github/workflows/gha_workflow_llama_stack_tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/gha_workflow_llama_stack_tests.yml b/.github/workflows/gha_workflow_llama_stack_tests.yml
index 91b9d2f3b..9eae291e9 100644
--- a/.github/workflows/gha_workflow_llama_stack_tests.yml
+++ b/.github/workflows/gha_workflow_llama_stack_tests.yml
@@ -320,7 +320,7 @@ jobs:
       - name: "PR - Update comment"
         id: pr_update_comment
         if: github.event_name == 'pull_request_target'
-        uses: thollander/actions-comment-pull-request@65f9e5c9a1f2cd378bd74b2e057c9736982a8e74 # v3.0.1
+        uses: thollander/actions-comment-pull-request@24bffb9b452ba05a4f3f77933840a6a841d1b32b # v3.0.1
         with:
           filePath: test-summary.md
 

From 45e210fd0c43ee76a93f21275575f3cbd83a70f6 Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Wed, 9 Apr 2025 01:09:16 -0700
Subject: [PATCH 07/21] fix: llama3 bf16 model load

---
 llama_stack/models/llama/llama3/generation.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/llama_stack/models/llama/llama3/generation.py b/llama_stack/models/llama/llama3/generation.py
index ee99a07ba..8c6aa242b 100644
--- a/llama_stack/models/llama/llama3/generation.py
+++ b/llama_stack/models/llama/llama3/generation.py
@@ -119,17 +119,16 @@ class Llama3:
             torch.set_default_device(device)
         else:
             print(f"Setting default device to {device}")
-            torch.set_default_device(device)
             if device.type == "cuda":
                 if torch.cuda.is_bf16_supported():
-                    torch.set_default_dtype(torch.bfloat16)
+                    torch.set_default_tensor_type(torch.cuda.BFloat16Tensor)
                 else:
-                    torch.set_default_dtype(torch.half)
+                    torch.set_default_tensor_type(torch.cuda.Float16Tensor)
             elif device.type == "xpu":
                 if torch.xpu.is_bf16_supported():
-                    torch.set_default_dtype(torch.bfloat16)
+                    torch.set_default_tensor_type(torch.xpu.BFloat16Tensor)
                 else:
-                    torch.set_default_dtype(torch.half)
+                    torch.set_default_tensor_type(torch.xpu.Float16Tensor)
 
             model = build_model()
             print("Loading state dict...")

From 389767010b0333c49cf6cb86122308a5ec474621 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Han?= <seb@redhat.com>
Date: Wed, 9 Apr 2025 10:30:41 +0200
Subject: [PATCH 08/21] feat: ability to execute external providers (#1672)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# What does this PR do?

Providers that live outside of the llama-stack codebase are now
supported.
A new property `external_providers_dir` has been added to the main
config and can be configured as follow:

```
external_providers_dir: /etc/llama-stack/providers.d/
```

Where the expected structure is:

```
providers.d/
  inference/
    custom_ollama.yaml
    vllm.yaml
  vector_io/
    qdrant.yaml
```

Where `custom_ollama.yaml` is:

```
adapter:
  adapter_type: custom_ollama
  pip_packages: ["ollama", "aiohttp"]
  config_class: llama_stack_ollama_provider.config.OllamaImplConfig
  module: llama_stack_ollama_provider
api_dependencies: []
optional_api_dependencies: []
```

Obviously the package must be installed on the system, here is the
`llama_stack_ollama_provider` example:

```
$ uv pip show llama-stack-ollama-provider
Using Python 3.10.16 environment at: /Users/leseb/Documents/AI/llama-stack/.venv
Name: llama-stack-ollama-provider
Version: 0.1.0
Location: /Users/leseb/Documents/AI/llama-stack/.venv/lib/python3.10/site-packages
Editable project location: /private/var/folders/mq/rnm5w_7s2d3fxmtkx02knvhm0000gn/T/tmp.ZBHU5Ezxg4/ollama/llama-stack-ollama-provider
Requires:
Required-by:
```

Closes: https://github.com/meta-llama/llama-stack/issues/658

Signed-off-by: Sébastien Han <seb@redhat.com>
---
 .github/workflows/test-external-providers.yml |  93 +++++++
 docs/source/providers/external.md             | 234 ++++++++++++++++++
 docs/source/providers/index.md                |   5 +
 llama_stack/distribution/datatypes.py         |   5 +
 llama_stack/distribution/distribution.py      | 130 +++++++++-
 llama_stack/distribution/resolver.py          |   1 +
 llama_stack/distribution/stack.py             |   2 +-
 .../llama-stack-provider-ollama/README.md     |   3 +
 .../custom_ollama.yaml                        |   7 +
 .../pyproject.toml                            |  44 ++++
 .../llama-stack-provider-ollama/run.yaml      | 135 ++++++++++
 tests/unit/distribution/test_distribution.py  | 223 +++++++++++++++++
 12 files changed, 875 insertions(+), 7 deletions(-)
 create mode 100644 .github/workflows/test-external-providers.yml
 create mode 100644 docs/source/providers/external.md
 create mode 100644 tests/external-provider/llama-stack-provider-ollama/README.md
 create mode 100644 tests/external-provider/llama-stack-provider-ollama/custom_ollama.yaml
 create mode 100644 tests/external-provider/llama-stack-provider-ollama/pyproject.toml
 create mode 100644 tests/external-provider/llama-stack-provider-ollama/run.yaml
 create mode 100644 tests/unit/distribution/test_distribution.py

diff --git a/.github/workflows/test-external-providers.yml b/.github/workflows/test-external-providers.yml
new file mode 100644
index 000000000..2ead8f845
--- /dev/null
+++ b/.github/workflows/test-external-providers.yml
@@ -0,0 +1,93 @@
+name: Test External Providers
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+
+jobs:
+  test-external-providers:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+        with:
+          python-version: "3.10"
+
+      - name: Install Ollama
+        run: |
+          curl -fsSL https://ollama.com/install.sh | sh
+
+      - name: Pull Ollama image
+        run: |
+          ollama pull llama3.2:3b-instruct-fp16
+
+      - name: Start Ollama in background
+        run: |
+          nohup ollama run llama3.2:3b-instruct-fp16 --keepalive=30m > ollama.log 2>&1 &
+
+      - name: Set Up Environment and Install Dependencies
+        run: |
+          uv sync --extra dev --extra test
+          uv pip install -e .
+
+      - name: Install Ollama custom provider
+        run: |
+          mkdir -p tests/external-provider/llama-stack-provider-ollama/src/
+          cp -a llama_stack/providers/remote/inference/ollama/ tests/external-provider/llama-stack-provider-ollama/src/llama_stack_provider_ollama
+          uv pip install tests/external-provider/llama-stack-provider-ollama
+
+      - name: Create provider configuration
+        run: |
+          mkdir -p /tmp/providers.d/remote/inference
+          cp tests/external-provider/llama-stack-provider-ollama/custom_ollama.yaml /tmp/providers.d/remote/inference/custom_ollama.yaml
+
+      - name: Wait for Ollama to start
+        run: |
+          echo "Waiting for Ollama..."
+          for i in {1..30}; do
+            if curl -s http://localhost:11434 | grep -q "Ollama is running"; then
+              echo "Ollama is running!"
+              exit 0
+            fi
+            sleep 1
+          done
+          echo "Ollama failed to start"
+          ollama ps
+          ollama.log
+          exit 1
+
+      - name: Start Llama Stack server in background
+        env:
+          INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
+        run: |
+          source .venv/bin/activate
+          nohup uv run llama stack run tests/external-provider/llama-stack-provider-ollama/run.yaml --image-type venv > server.log 2>&1 &
+
+      - name: Wait for Llama Stack server to be ready
+        run: |
+          echo "Waiting for Llama Stack server..."
+          for i in {1..30}; do
+            if curl -s http://localhost:8321/v1/health | grep -q "OK"; then
+              echo "Llama Stack server is up!"
+              if grep -q "remote::custom_ollama from /tmp/providers.d/remote/inference/custom_ollama.yaml" server.log; then
+                echo "Llama Stack server is using custom Ollama provider"
+                exit 0
+              else
+                echo "Llama Stack server is not using custom Ollama provider"
+                exit 1
+              fi
+            fi
+            sleep 1
+          done
+          echo "Llama Stack server failed to start"
+          cat server.log
+          exit 1
+
+      - name: run inference tests
+        run: |
+          uv run pytest -v tests/integration/inference/test_text_inference.py --stack-config="http://localhost:8321" --text-model="meta-llama/Llama-3.2-3B-Instruct" --embedding-model=all-MiniLM-L6-v2
diff --git a/docs/source/providers/external.md b/docs/source/providers/external.md
new file mode 100644
index 000000000..90fc77979
--- /dev/null
+++ b/docs/source/providers/external.md
@@ -0,0 +1,234 @@
+# External Providers
+
+Llama Stack supports external providers that live outside of the main codebase. This allows you to:
+- Create and maintain your own providers independently
+- Share providers with others without contributing to the main codebase
+- Keep provider-specific code separate from the core Llama Stack code
+
+## Configuration
+
+To enable external providers, you need to configure the `external_providers_dir` in your Llama Stack configuration. This directory should contain your external provider specifications:
+
+```yaml
+external_providers_dir: /etc/llama-stack/providers.d/
+```
+
+## Directory Structure
+
+The external providers directory should follow this structure:
+
+```
+providers.d/
+  remote/
+    inference/
+      custom_ollama.yaml
+      vllm.yaml
+    vector_io/
+      qdrant.yaml
+    safety/
+      llama-guard.yaml
+  inline/
+    inference/
+      custom_ollama.yaml
+      vllm.yaml
+    vector_io/
+      qdrant.yaml
+    safety/
+      llama-guard.yaml
+```
+
+Each YAML file in these directories defines a provider specification for that particular API.
+
+## Provider Types
+
+Llama Stack supports two types of external providers:
+
+1. **Remote Providers**: Providers that communicate with external services (e.g., cloud APIs)
+2. **Inline Providers**: Providers that run locally within the Llama Stack process
+
+## Known External Providers
+
+Here's a list of known external providers that you can use with Llama Stack:
+
+| Type | Name | Description | Repository |
+|------|------|-------------|------------|
+| Remote | KubeFlow Training | Train models with KubeFlow | [llama-stack-provider-kft](https://github.com/opendatahub-io/llama-stack-provider-kft) |
+
+### Remote Provider Specification
+
+Remote providers are used when you need to communicate with external services. Here's an example for a custom Ollama provider:
+
+```yaml
+adapter:
+  adapter_type: custom_ollama
+  pip_packages:
+  - ollama
+  - aiohttp
+  config_class: llama_stack_ollama_provider.config.OllamaImplConfig
+  module: llama_stack_ollama_provider
+api_dependencies: []
+optional_api_dependencies: []
+```
+
+#### Adapter Configuration
+
+The `adapter` section defines how to load and configure the provider:
+
+- `adapter_type`: A unique identifier for this adapter
+- `pip_packages`: List of Python packages required by the provider
+- `config_class`: The full path to the configuration class
+- `module`: The Python module containing the provider implementation
+
+### Inline Provider Specification
+
+Inline providers run locally within the Llama Stack process. Here's an example for a custom vector store provider:
+
+```yaml
+module: llama_stack_vector_provider
+config_class: llama_stack_vector_provider.config.VectorStoreConfig
+pip_packages:
+  - faiss-cpu
+  - numpy
+api_dependencies:
+  - inference
+optional_api_dependencies:
+  - vector_io
+provider_data_validator: llama_stack_vector_provider.validator.VectorStoreValidator
+container_image: custom-vector-store:latest  # optional
+```
+
+#### Inline Provider Fields
+
+- `module`: The Python module containing the provider implementation
+- `config_class`: The full path to the configuration class
+- `pip_packages`: List of Python packages required by the provider
+- `api_dependencies`: List of Llama Stack APIs that this provider depends on
+- `optional_api_dependencies`: List of optional Llama Stack APIs that this provider can use
+- `provider_data_validator`: Optional validator for provider data
+- `container_image`: Optional container image to use instead of pip packages
+
+## Required Implementation
+
+### Remote Providers
+
+Remote providers must expose a `get_adapter_impl()` function in their module that takes two arguments:
+1. `config`: An instance of the provider's config class
+2. `deps`: A dictionary of API dependencies
+
+This function must return an instance of the provider's adapter class that implements the required protocol for the API.
+
+Example:
+```python
+async def get_adapter_impl(
+    config: OllamaImplConfig, deps: Dict[Api, Any]
+) -> OllamaInferenceAdapter:
+    return OllamaInferenceAdapter(config)
+```
+
+### Inline Providers
+
+Inline providers must expose a `get_provider_impl()` function in their module that takes two arguments:
+1. `config`: An instance of the provider's config class
+2. `deps`: A dictionary of API dependencies
+
+Example:
+```python
+async def get_provider_impl(
+    config: VectorStoreConfig, deps: Dict[Api, Any]
+) -> VectorStoreImpl:
+    impl = VectorStoreImpl(config, deps[Api.inference])
+    await impl.initialize()
+    return impl
+```
+
+## Dependencies
+
+The provider package must be installed on the system. For example:
+
+```bash
+$ uv pip show llama-stack-ollama-provider
+Name: llama-stack-ollama-provider
+Version: 0.1.0
+Location: /path/to/venv/lib/python3.10/site-packages
+```
+
+## Example: Custom Ollama Provider
+
+Here's a complete example of creating and using a custom Ollama provider:
+
+1. First, create the provider package:
+
+```bash
+mkdir -p llama-stack-provider-ollama
+cd llama-stack-provider-ollama
+git init
+uv init
+```
+
+2. Edit `pyproject.toml`:
+
+```toml
+[project]
+name = "llama-stack-provider-ollama"
+version = "0.1.0"
+description = "Ollama provider for Llama Stack"
+requires-python = ">=3.10"
+dependencies = ["llama-stack", "pydantic", "ollama", "aiohttp"]
+```
+
+3. Create the provider specification:
+
+```yaml
+# /etc/llama-stack/providers.d/remote/inference/custom_ollama.yaml
+adapter:
+  adapter_type: custom_ollama
+  pip_packages: ["ollama", "aiohttp"]
+  config_class: llama_stack_provider_ollama.config.OllamaImplConfig
+  module: llama_stack_provider_ollama
+api_dependencies: []
+optional_api_dependencies: []
+```
+
+4. Install the provider:
+
+```bash
+uv pip install -e .
+```
+
+5. Configure Llama Stack to use external providers:
+
+```yaml
+external_providers_dir: /etc/llama-stack/providers.d/
+```
+
+The provider will now be available in Llama Stack with the type `remote::custom_ollama`.
+
+## Best Practices
+
+1. **Package Naming**: Use the prefix `llama-stack-provider-` for your provider packages to make them easily identifiable.
+
+2. **Version Management**: Keep your provider package versioned and compatible with the Llama Stack version you're using.
+
+3. **Dependencies**: Only include the minimum required dependencies in your provider package.
+
+4. **Documentation**: Include clear documentation in your provider package about:
+   - Installation requirements
+   - Configuration options
+   - Usage examples
+   - Any limitations or known issues
+
+5. **Testing**: Include tests in your provider package to ensure it works correctly with Llama Stack.
+You can refer to the [integration tests
+guide](https://github.com/meta-llama/llama-stack/blob/main/tests/integration/README.md) for more
+information. Execute the test for the Provider type you are developing.
+
+## Troubleshooting
+
+If your external provider isn't being loaded:
+
+1. Check that the `external_providers_dir` path is correct and accessible.
+2. Verify that the YAML files are properly formatted.
+3. Ensure all required Python packages are installed.
+4. Check the Llama Stack server logs for any error messages - turn on debug logging to get more
+   information using `LLAMA_STACK_LOGGING=all=debug`.
+5. Verify that the provider package is installed in your Python environment.
diff --git a/docs/source/providers/index.md b/docs/source/providers/index.md
index f8997a281..75faf7c00 100644
--- a/docs/source/providers/index.md
+++ b/docs/source/providers/index.md
@@ -11,6 +11,10 @@ Providers come in two flavors:
 
 Importantly, Llama Stack always strives to provide at least one fully inline provider for each API so you can iterate on a fully featured environment locally.
 
+## External Providers
+
+Llama Stack supports external providers that live outside of the main codebase. This allows you to create and maintain your own providers independently. See the [External Providers Guide](external) for details.
+
 ## Agents
 Run multi-step agentic workflows with LLMs with tool usage, memory (RAG), etc.
 
@@ -50,6 +54,7 @@ The following providers (i.e., databases) are available for Vector IO:
 ```{toctree}
 :maxdepth: 1
 
+external
 vector_io/faiss
 vector_io/sqlite-vec
 vector_io/chromadb
diff --git a/llama_stack/distribution/datatypes.py b/llama_stack/distribution/datatypes.py
index 48f1925dd..b24b0ec50 100644
--- a/llama_stack/distribution/datatypes.py
+++ b/llama_stack/distribution/datatypes.py
@@ -312,6 +312,11 @@ a default SQLite store will be used.""",
         description="Configuration for the HTTP(S) server",
     )
 
+    external_providers_dir: Optional[str] = Field(
+        default=None,
+        description="Path to directory containing external provider implementations. The providers code and dependencies must be installed on the system.",
+    )
+
 
 class BuildConfig(BaseModel):
     version: str = LLAMA_STACK_BUILD_CONFIG_VERSION
diff --git a/llama_stack/distribution/distribution.py b/llama_stack/distribution/distribution.py
index ddb727663..d4447139c 100644
--- a/llama_stack/distribution/distribution.py
+++ b/llama_stack/distribution/distribution.py
@@ -4,12 +4,25 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+import glob
 import importlib
-from typing import Dict, List
+import os
+from typing import Any, Dict, List
 
+import yaml
 from pydantic import BaseModel
 
-from llama_stack.providers.datatypes import Api, ProviderSpec
+from llama_stack.distribution.datatypes import StackRunConfig
+from llama_stack.log import get_logger
+from llama_stack.providers.datatypes import (
+    AdapterSpec,
+    Api,
+    InlineProviderSpec,
+    ProviderSpec,
+    remote_provider_spec,
+)
+
+logger = get_logger(name=__name__, category="core")
 
 
 def stack_apis() -> List[Api]:
@@ -59,11 +72,116 @@ def providable_apis() -> List[Api]:
     return [api for api in Api if api not in routing_table_apis and api != Api.inspect and api != Api.providers]
 
 
-def get_provider_registry() -> Dict[Api, Dict[str, ProviderSpec]]:
-    ret = {}
+def _load_remote_provider_spec(spec_data: Dict[str, Any], api: Api) -> ProviderSpec:
+    adapter = AdapterSpec(**spec_data["adapter"])
+    spec = remote_provider_spec(
+        api=api,
+        adapter=adapter,
+        api_dependencies=[Api(dep) for dep in spec_data.get("api_dependencies", [])],
+    )
+    return spec
+
+
+def _load_inline_provider_spec(spec_data: Dict[str, Any], api: Api, provider_name: str) -> ProviderSpec:
+    spec = InlineProviderSpec(
+        api=api,
+        provider_type=f"inline::{provider_name}",
+        pip_packages=spec_data.get("pip_packages", []),
+        module=spec_data["module"],
+        config_class=spec_data["config_class"],
+        api_dependencies=[Api(dep) for dep in spec_data.get("api_dependencies", [])],
+        optional_api_dependencies=[Api(dep) for dep in spec_data.get("optional_api_dependencies", [])],
+        provider_data_validator=spec_data.get("provider_data_validator"),
+        container_image=spec_data.get("container_image"),
+    )
+    return spec
+
+
+def get_provider_registry(config: StackRunConfig | None = None) -> Dict[Api, Dict[str, ProviderSpec]]:
+    """Get the provider registry, optionally including external providers.
+
+    This function loads both built-in providers and external providers from YAML files.
+    External providers are loaded from a directory structure like:
+
+    providers.d/
+      remote/
+        inference/
+          custom_ollama.yaml
+          vllm.yaml
+        vector_io/
+          qdrant.yaml
+        safety/
+          llama-guard.yaml
+      inline/
+        inference/
+          custom_ollama.yaml
+          vllm.yaml
+        vector_io/
+          qdrant.yaml
+        safety/
+          llama-guard.yaml
+
+    Args:
+        config: Optional StackRunConfig containing the external providers directory path
+
+    Returns:
+        A dictionary mapping APIs to their available providers
+
+    Raises:
+        FileNotFoundError: If the external providers directory doesn't exist
+        ValueError: If any provider spec is invalid
+    """
+
+    ret: Dict[Api, Dict[str, ProviderSpec]] = {}
     for api in providable_apis():
         name = api.name.lower()
-        module = importlib.import_module(f"llama_stack.providers.registry.{name}")
-        ret[api] = {a.provider_type: a for a in module.available_providers()}
+        logger.debug(f"Importing module {name}")
+        try:
+            module = importlib.import_module(f"llama_stack.providers.registry.{name}")
+            ret[api] = {a.provider_type: a for a in module.available_providers()}
+        except ImportError as e:
+            logger.warning(f"Failed to import module {name}: {e}")
 
+    if config and config.external_providers_dir:
+        external_providers_dir = os.path.abspath(config.external_providers_dir)
+        if not os.path.exists(external_providers_dir):
+            raise FileNotFoundError(f"External providers directory not found: {external_providers_dir}")
+        logger.info(f"Loading external providers from {external_providers_dir}")
+
+        for api in providable_apis():
+            api_name = api.name.lower()
+
+            # Process both remote and inline providers
+            for provider_type in ["remote", "inline"]:
+                api_dir = os.path.join(external_providers_dir, provider_type, api_name)
+                if not os.path.exists(api_dir):
+                    logger.debug(f"No {provider_type} provider directory found for {api_name}")
+                    continue
+
+                # Look for provider spec files in the API directory
+                for spec_path in glob.glob(os.path.join(api_dir, "*.yaml")):
+                    provider_name = os.path.splitext(os.path.basename(spec_path))[0]
+                    logger.info(f"Loading {provider_type} provider spec from {spec_path}")
+
+                    try:
+                        with open(spec_path) as f:
+                            spec_data = yaml.safe_load(f)
+
+                        if provider_type == "remote":
+                            spec = _load_remote_provider_spec(spec_data, api)
+                            provider_type_key = f"remote::{provider_name}"
+                        else:
+                            spec = _load_inline_provider_spec(spec_data, api, provider_name)
+                            provider_type_key = f"inline::{provider_name}"
+
+                        logger.info(f"Loaded {provider_type} provider spec for {provider_type_key} from {spec_path}")
+                        if provider_type_key in ret[api]:
+                            logger.warning(f"Overriding already registered provider {provider_type_key} for {api.name}")
+                        ret[api][provider_type_key] = spec
+                    except yaml.YAMLError as yaml_err:
+                        logger.error(f"Failed to parse YAML file {spec_path}: {yaml_err}")
+                        raise yaml_err
+                    except Exception as e:
+                        logger.error(f"Failed to load provider spec from {spec_path}: {e}")
+                        raise e
     return ret
diff --git a/llama_stack/distribution/resolver.py b/llama_stack/distribution/resolver.py
index 25fe3f184..33ad343ec 100644
--- a/llama_stack/distribution/resolver.py
+++ b/llama_stack/distribution/resolver.py
@@ -351,6 +351,7 @@ async def instantiate_provider(
     if not hasattr(provider_spec, "module"):
         raise AttributeError(f"ProviderSpec of type {type(provider_spec)} does not have a 'module' attribute")
 
+    logger.debug(f"Instantiating provider {provider.provider_id} from {provider_spec.module}")
     module = importlib.import_module(provider_spec.module)
     args = []
     if isinstance(provider_spec, RemoteProviderSpec):
diff --git a/llama_stack/distribution/stack.py b/llama_stack/distribution/stack.py
index 9c9289a77..d70878db4 100644
--- a/llama_stack/distribution/stack.py
+++ b/llama_stack/distribution/stack.py
@@ -218,7 +218,7 @@ async def construct_stack(
     run_config: StackRunConfig, provider_registry: Optional[ProviderRegistry] = None
 ) -> Dict[Api, Any]:
     dist_registry, _ = await create_dist_registry(run_config.metadata_store, run_config.image_name)
-    impls = await resolve_impls(run_config, provider_registry or get_provider_registry(), dist_registry)
+    impls = await resolve_impls(run_config, provider_registry or get_provider_registry(run_config), dist_registry)
     await register_resources(run_config, impls)
     return impls
 
diff --git a/tests/external-provider/llama-stack-provider-ollama/README.md b/tests/external-provider/llama-stack-provider-ollama/README.md
new file mode 100644
index 000000000..8bd2b6a87
--- /dev/null
+++ b/tests/external-provider/llama-stack-provider-ollama/README.md
@@ -0,0 +1,3 @@
+# Ollama external provider for Llama Stack
+
+Template code to create a new external provider for Llama Stack.
diff --git a/tests/external-provider/llama-stack-provider-ollama/custom_ollama.yaml b/tests/external-provider/llama-stack-provider-ollama/custom_ollama.yaml
new file mode 100644
index 000000000..f0960b4d8
--- /dev/null
+++ b/tests/external-provider/llama-stack-provider-ollama/custom_ollama.yaml
@@ -0,0 +1,7 @@
+adapter:
+  adapter_type: custom_ollama
+  pip_packages: ["ollama", "aiohttp"]
+  config_class: llama_stack_provider_ollama.config.OllamaImplConfig
+  module: llama_stack_provider_ollama
+api_dependencies: []
+optional_api_dependencies: []
diff --git a/tests/external-provider/llama-stack-provider-ollama/pyproject.toml b/tests/external-provider/llama-stack-provider-ollama/pyproject.toml
new file mode 100644
index 000000000..ddebc54b0
--- /dev/null
+++ b/tests/external-provider/llama-stack-provider-ollama/pyproject.toml
@@ -0,0 +1,44 @@
+[project]
+dependencies = [
+    "llama-stack",
+    "pydantic",
+    "ollama",
+    "aiohttp",
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn",
+]
+
+name = "llama-stack-provider-ollama"
+version = "0.1.0"
+description = "External provider for Ollama using the Llama Stack API"
+readme = "README.md"
+requires-python = ">=3.10"
diff --git a/tests/external-provider/llama-stack-provider-ollama/run.yaml b/tests/external-provider/llama-stack-provider-ollama/run.yaml
new file mode 100644
index 000000000..7a3636c4d
--- /dev/null
+++ b/tests/external-provider/llama-stack-provider-ollama/run.yaml
@@ -0,0 +1,135 @@
+version: '2'
+image_name: ollama
+apis:
+- agents
+- datasetio
+- eval
+- inference
+- safety
+- scoring
+- telemetry
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: custom_ollama
+    provider_type: remote::custom_ollama
+    config:
+      url: ${env.OLLAMA_URL:http://localhost:11434}
+  vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config:
+      excluded_categories: []
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence_store:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db
+  telemetry:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
+      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
+      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/ollama/trace_store.db}
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/meta_reference_eval.db
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/huggingface_datasetio.db
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/localfs_datasetio.db
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      max_results: 3
+  - provider_id: code-interpreter
+    provider_type: inline::code-interpreter
+    config: {}
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
+  - provider_id: wolfram-alpha
+    provider_type: remote::wolfram-alpha
+    config:
+      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
+metadata_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db
+models:
+- metadata: {}
+  model_id: ${env.INFERENCE_MODEL}
+  provider_id: custom_ollama
+  model_type: llm
+- metadata:
+    embedding_dimension: 384
+  model_id: all-MiniLM-L6-v2
+  provider_id: custom_ollama
+  provider_model_id: all-minilm:latest
+  model_type: embedding
+shields: []
+vector_dbs: []
+datasets: []
+scoring_fns: []
+benchmarks: []
+tool_groups:
+- toolgroup_id: builtin::websearch
+  provider_id: tavily-search
+- toolgroup_id: builtin::rag
+  provider_id: rag-runtime
+- toolgroup_id: builtin::code_interpreter
+  provider_id: code-interpreter
+- toolgroup_id: builtin::wolfram_alpha
+  provider_id: wolfram-alpha
+server:
+  port: 8321
+external_providers_dir: /tmp/providers.d
diff --git a/tests/unit/distribution/test_distribution.py b/tests/unit/distribution/test_distribution.py
new file mode 100644
index 000000000..a4daffb82
--- /dev/null
+++ b/tests/unit/distribution/test_distribution.py
@@ -0,0 +1,223 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any, Dict
+from unittest.mock import patch
+
+import pytest
+import yaml
+from pydantic import BaseModel, Field, ValidationError
+
+from llama_stack.distribution.datatypes import Api, Provider, StackRunConfig
+from llama_stack.distribution.distribution import get_provider_registry
+from llama_stack.providers.datatypes import ProviderSpec
+
+
+class SampleConfig(BaseModel):
+    foo: str = Field(
+        default="bar",
+        description="foo",
+    )
+
+    @classmethod
+    def sample_run_config(cls, **kwargs: Any) -> Dict[str, Any]:
+        return {
+            "foo": "baz",
+        }
+
+
+@pytest.fixture
+def mock_providers():
+    """Mock the available_providers function to return test providers."""
+    with patch("llama_stack.providers.registry.inference.available_providers") as mock:
+        mock.return_value = [
+            ProviderSpec(
+                provider_type="test_provider",
+                api=Api.inference,
+                adapter_type="test_adapter",
+                config_class="test_provider.config.TestProviderConfig",
+            )
+        ]
+        yield mock
+
+
+@pytest.fixture
+def base_config(tmp_path):
+    """Create a base StackRunConfig with common settings."""
+    return StackRunConfig(
+        image_name="test_image",
+        providers={
+            "inference": [
+                Provider(
+                    provider_id="sample_provider",
+                    provider_type="sample",
+                    config=SampleConfig.sample_run_config(),
+                )
+            ]
+        },
+        external_providers_dir=str(tmp_path),
+    )
+
+
+@pytest.fixture
+def provider_spec_yaml():
+    """Common provider spec YAML for testing."""
+    return """
+adapter:
+  adapter_type: test_provider
+  config_class: test_provider.config.TestProviderConfig
+  module: test_provider
+api_dependencies:
+  - safety
+"""
+
+
+@pytest.fixture
+def inline_provider_spec_yaml():
+    """Common inline provider spec YAML for testing."""
+    return """
+module: test_provider
+config_class: test_provider.config.TestProviderConfig
+pip_packages:
+  - test-package
+api_dependencies:
+  - safety
+optional_api_dependencies:
+  - vector_io
+provider_data_validator: test_provider.validator.TestValidator
+container_image: test-image:latest
+"""
+
+
+@pytest.fixture
+def api_directories(tmp_path):
+    """Create the API directory structure for testing."""
+    # Create remote provider directory
+    remote_inference_dir = tmp_path / "remote" / "inference"
+    remote_inference_dir.mkdir(parents=True, exist_ok=True)
+
+    # Create inline provider directory
+    inline_inference_dir = tmp_path / "inline" / "inference"
+    inline_inference_dir.mkdir(parents=True, exist_ok=True)
+
+    return remote_inference_dir, inline_inference_dir
+
+
+class TestProviderRegistry:
+    """Test suite for provider registry functionality."""
+
+    def test_builtin_providers(self, mock_providers):
+        """Test loading built-in providers."""
+        registry = get_provider_registry(None)
+
+        assert Api.inference in registry
+        assert "test_provider" in registry[Api.inference]
+        assert registry[Api.inference]["test_provider"].provider_type == "test_provider"
+        assert registry[Api.inference]["test_provider"].api == Api.inference
+
+    def test_external_remote_providers(self, api_directories, mock_providers, base_config, provider_spec_yaml):
+        """Test loading external remote providers from YAML files."""
+        remote_dir, _ = api_directories
+        with open(remote_dir / "test_provider.yaml", "w") as f:
+            f.write(provider_spec_yaml)
+
+        registry = get_provider_registry(base_config)
+        assert len(registry[Api.inference]) == 2
+
+        assert Api.inference in registry
+        assert "remote::test_provider" in registry[Api.inference]
+        provider = registry[Api.inference]["remote::test_provider"]
+        assert provider.adapter.adapter_type == "test_provider"
+        assert provider.adapter.module == "test_provider"
+        assert provider.adapter.config_class == "test_provider.config.TestProviderConfig"
+        assert Api.safety in provider.api_dependencies
+
+    def test_external_inline_providers(self, api_directories, mock_providers, base_config, inline_provider_spec_yaml):
+        """Test loading external inline providers from YAML files."""
+        _, inline_dir = api_directories
+        with open(inline_dir / "test_provider.yaml", "w") as f:
+            f.write(inline_provider_spec_yaml)
+
+        registry = get_provider_registry(base_config)
+        assert len(registry[Api.inference]) == 2
+
+        assert Api.inference in registry
+        assert "inline::test_provider" in registry[Api.inference]
+        provider = registry[Api.inference]["inline::test_provider"]
+        assert provider.provider_type == "inline::test_provider"
+        assert provider.module == "test_provider"
+        assert provider.config_class == "test_provider.config.TestProviderConfig"
+        assert provider.pip_packages == ["test-package"]
+        assert Api.safety in provider.api_dependencies
+        assert Api.vector_io in provider.optional_api_dependencies
+        assert provider.provider_data_validator == "test_provider.validator.TestValidator"
+        assert provider.container_image == "test-image:latest"
+
+    def test_invalid_yaml(self, api_directories, mock_providers, base_config):
+        """Test handling of invalid YAML files."""
+        remote_dir, inline_dir = api_directories
+        with open(remote_dir / "invalid.yaml", "w") as f:
+            f.write("invalid: yaml: content: -")
+        with open(inline_dir / "invalid.yaml", "w") as f:
+            f.write("invalid: yaml: content: -")
+
+        with pytest.raises(yaml.YAMLError):
+            get_provider_registry(base_config)
+
+    def test_missing_directory(self, mock_providers):
+        """Test handling of missing external providers directory."""
+        config = StackRunConfig(
+            image_name="test_image",
+            providers={
+                "inference": [
+                    Provider(
+                        provider_id="sample_provider",
+                        provider_type="sample",
+                        config=SampleConfig.sample_run_config(),
+                    )
+                ]
+            },
+            external_providers_dir="/nonexistent/dir",
+        )
+        with pytest.raises(FileNotFoundError):
+            get_provider_registry(config)
+
+    def test_empty_api_directory(self, api_directories, mock_providers, base_config):
+        """Test handling of empty API directory."""
+        registry = get_provider_registry(base_config)
+        assert len(registry[Api.inference]) == 1  # Only built-in provider
+
+    def test_malformed_remote_provider_spec(self, api_directories, mock_providers, base_config):
+        """Test handling of malformed remote provider spec (missing required fields)."""
+        remote_dir, _ = api_directories
+        malformed_spec = """
+adapter:
+  adapter_type: test_provider
+  # Missing required fields
+api_dependencies:
+  - safety
+"""
+        with open(remote_dir / "malformed.yaml", "w") as f:
+            f.write(malformed_spec)
+
+        with pytest.raises(ValidationError):
+            get_provider_registry(base_config)
+
+    def test_malformed_inline_provider_spec(self, api_directories, mock_providers, base_config):
+        """Test handling of malformed inline provider spec (missing required fields)."""
+        _, inline_dir = api_directories
+        malformed_spec = """
+module: test_provider
+# Missing required config_class
+pip_packages:
+  - test-package
+"""
+        with open(inline_dir / "malformed.yaml", "w") as f:
+            f.write(malformed_spec)
+
+        with pytest.raises(KeyError) as exc_info:
+            get_provider_registry(base_config)
+        assert "config_class" in str(exc_info.value)

From 3a9be58523254b7f471e636822c55963be41801e Mon Sep 17 00:00:00 2001
From: Matthew Farrellee <matt@cs.wisc.edu>
Date: Wed, 9 Apr 2025 04:34:26 -0400
Subject: [PATCH 09/21] fix: use ollama list to find models (#1854)

# What does this PR do?

closes #1853

## Test Plan
```
uv run llama stack build --image-type conda --image-name ollama --config llama_stack/templates/ollama/build.yaml

ollama pull llama3.2:3b

LLAMA_STACK_CONFIG=http://localhost:8321 uv run pytest tests/integration/inference/test_text_inference.py -v --text-model=llama3.2:3b
```
---
 llama_stack/providers/remote/inference/ollama/ollama.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/llama_stack/providers/remote/inference/ollama/ollama.py b/llama_stack/providers/remote/inference/ollama/ollama.py
index 5a78c07cc..12902996b 100644
--- a/llama_stack/providers/remote/inference/ollama/ollama.py
+++ b/llama_stack/providers/remote/inference/ollama/ollama.py
@@ -307,9 +307,10 @@ class OllamaInferenceAdapter(Inference, ModelsProtocolPrivate):
         if model.model_type == ModelType.embedding:
             logger.info(f"Pulling embedding model `{model.provider_resource_id}` if necessary...")
             await self.client.pull(model.provider_resource_id)
-            response = await self.client.list()
-        else:
-            response = await self.client.ps()
+        # we use list() here instead of ps() -
+        #  - ps() only lists running models, not available models
+        #  - models not currently running are run by the ollama server as needed
+        response = await self.client.list()
         available_models = [m["model"] for m in response["models"]]
         if model.provider_resource_id not in available_models:
             raise ValueError(

From a2cf2999066aa583f6e356a6580862184916a998 Mon Sep 17 00:00:00 2001
From: Matthew Farrellee <matt@cs.wisc.edu>
Date: Wed, 9 Apr 2025 04:35:19 -0400
Subject: [PATCH 10/21] fix: update getting started guide to use `ollama pull`
 (#1855)

# What does this PR do?

download the getting started w/ ollama model instead of downloading and
running it.

directly running it was necessary before
https://github.com/meta-llama/llama-stack/pull/1854

## Test Plan

run the code on the page
---
 docs/source/getting_started/index.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/source/getting_started/index.md b/docs/source/getting_started/index.md
index ef258a9cf..e9ad51961 100644
--- a/docs/source/getting_started/index.md
+++ b/docs/source/getting_started/index.md
@@ -6,13 +6,13 @@ Llama Stack is a stateful service with REST APIs to support seamless transition
 In this guide, we'll walk through how to build a RAG agent locally using Llama Stack with [Ollama](https://ollama.com/) to run inference on a Llama Model.
 
 
-### 1. Start Ollama
+### 1. Download a Llama model with Ollama
 
 ```bash
-ollama run llama3.2:3b --keepalive 60m
+ollama pull llama3.2:3b-instruct-fp16
 ```
 
-By default, Ollama keeps the model loaded in memory for 5 minutes which can be too short. We set the `--keepalive` flag to 60 minutes to ensure the model remains loaded for sometime.
+This will instruct the Ollama service to download the Llama 3.2 3B Instruct model, which we'll use in the rest of this guide.
 
 ```{admonition} Note
 :class: tip

From 22814299b00ecd4fbd996d8a631aef6645818e6e Mon Sep 17 00:00:00 2001
From: Paolo Dettori <dettori@us.ibm.com>
Date: Wed, 9 Apr 2025 04:56:07 -0400
Subject: [PATCH 11/21] fix: solve unregister_toolgroup error (#1608)

# What does this PR do?
Fixes issue #1537 that causes "500 Internal Server Error" when
unregistering a toolgroup

# (Closes #1537 )

## Test Plan

```console
$ pytest -s -v tests/integration/tool_runtime/test_registration.py --stack-config=ollama --env INFERENCE_MODEL="meta-llama/Llama-3.2-3B-Instruct"
INFO     2025-03-14 21:15:03,999 tests.integration.conftest:41 tests: Setting DISABLE_CODE_SANDBOX=1 for macOS
/opt/homebrew/lib/python3.10/site-packages/pytest_asyncio/plugin.py:207: PytestDeprecationWarning: The configuration option "asyncio_default_fixture_loop_scope" is unset.
The event loop scope for asynchronous fixtures will default to the fixture caching scope. Future versions of pytest-asyncio will default the loop scope for asynchronous fixtures to function scope. Set the default fixture loop scope explicitly in order to avoid unexpected behavior in the future. Valid fixture loop scopes are: "function", "class", "module", "package", "session"

  warnings.warn(PytestDeprecationWarning(_DEFAULT_FIXTURE_LOOP_SCOPE_UNSET))
===================================================== test session starts =====================================================
platform darwin -- Python 3.10.16, pytest-8.3.5, pluggy-1.5.0 -- /opt/homebrew/opt/python@3.10/bin/python3.10
cachedir: .pytest_cache
rootdir: /Users/paolo/Projects/aiplatform/llama-stack
configfile: pyproject.toml
plugins: asyncio-0.25.3, anyio-4.8.0
asyncio: mode=strict, asyncio_default_fixture_loop_scope=None
collected 1 item

tests/integration/tool_runtime/test_registration.py::test_register_and_unregister_toolgroup[None-None-None-None-None] INFO     2025-03-14 21:15:04,478 llama_stack.providers.remote.inference.ollama.ollama:75 inference: checking
         connectivity to Ollama at `http://localhost:11434`...
INFO     2025-03-14 21:15:05,350 llama_stack.providers.remote.inference.ollama.ollama:294 inference: Pulling embedding
         model `all-minilm:latest` if necessary...
INFO:     Started server process [78391]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)
INFO:     127.0.0.1:57424 - "GET /sse HTTP/1.1" 200 OK
INFO:     127.0.0.1:57434 - "GET /sse HTTP/1.1" 200 OK
INFO     2025-03-14 21:15:16,129 mcp.client.sse:51 uncategorized: Connecting to SSE endpoint: http://localhost:8000/sse
INFO:     127.0.0.1:57445 - "GET /sse HTTP/1.1" 200 OK
INFO     2025-03-14 21:15:16,146 mcp.client.sse:71 uncategorized: Received endpoint URL:
         http://localhost:8000/messages/?session_id=c5b6fc01f8dc4b5e80e38eb1c1b22a9b
INFO     2025-03-14 21:15:16,147 mcp.client.sse:140 uncategorized: Starting post writer with endpoint URL:
         http://localhost:8000/messages/?session_id=c5b6fc01f8dc4b5e80e38eb1c1b22a9b
INFO:     127.0.0.1:57447 - "POST /messages/?session_id=c5b6fc01f8dc4b5e80e38eb1c1b22a9b HTTP/1.1" 202 Accepted
INFO:     127.0.0.1:57447 - "POST /messages/?session_id=c5b6fc01f8dc4b5e80e38eb1c1b22a9b HTTP/1.1" 202 Accepted
INFO:     127.0.0.1:57447 - "POST /messages/?session_id=c5b6fc01f8dc4b5e80e38eb1c1b22a9b HTTP/1.1" 202 Accepted
INFO     2025-03-14 21:15:16,155 mcp.server.lowlevel.server:535 uncategorized: Processing request of type
         ListToolsRequest
PASSED

=============================================== 1 passed, 4 warnings in 12.17s ================================================
```

---------

Signed-off-by: Paolo Dettori <dettori@us.ibm.com>
---
 .../distribution/routers/routing_tables.py    |   4 +-
 .../tool_runtime/test_registration.py         | 124 ++++++++++++++++++
 2 files changed, 126 insertions(+), 2 deletions(-)
 create mode 100644 tests/integration/tool_runtime/test_registration.py

diff --git a/llama_stack/distribution/routers/routing_tables.py b/llama_stack/distribution/routers/routing_tables.py
index 557330df7..f6adae49d 100644
--- a/llama_stack/distribution/routers/routing_tables.py
+++ b/llama_stack/distribution/routers/routing_tables.py
@@ -608,8 +608,8 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
         tool_group = await self.get_tool_group(toolgroup_id)
         if tool_group is None:
             raise ValueError(f"Tool group {toolgroup_id} not found")
-        tools = (await self.list_tools(toolgroup_id)).data
-        for tool in tools:
+        tools = await self.list_tools(toolgroup_id)
+        for tool in getattr(tools, "data", []):
             await self.unregister_object(tool)
         await self.unregister_object(tool_group)
 
diff --git a/tests/integration/tool_runtime/test_registration.py b/tests/integration/tool_runtime/test_registration.py
new file mode 100644
index 000000000..e04b56652
--- /dev/null
+++ b/tests/integration/tool_runtime/test_registration.py
@@ -0,0 +1,124 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import socket
+import threading
+import time
+
+import httpx
+import mcp.types as types
+import pytest
+import uvicorn
+from llama_stack_client.types.shared_params.url import URL
+from mcp.server.fastmcp import Context, FastMCP
+from mcp.server.sse import SseServerTransport
+from starlette.applications import Starlette
+from starlette.routing import Mount, Route
+
+
+@pytest.fixture(scope="module")
+def mcp_server():
+    server = FastMCP("FastMCP Test Server")
+
+    @server.tool()
+    async def fetch(url: str, ctx: Context) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:
+        headers = {"User-Agent": "MCP Test Server (github.com/modelcontextprotocol/python-sdk)"}
+        async with httpx.AsyncClient(follow_redirects=True, headers=headers) as client:
+            response = await client.get(url)
+            response.raise_for_status()
+            return [types.TextContent(type="text", text=response.text)]
+
+    sse = SseServerTransport("/messages/")
+
+    async def handle_sse(request):
+        async with sse.connect_sse(request.scope, request.receive, request._send) as streams:
+            await server._mcp_server.run(
+                streams[0],
+                streams[1],
+                server._mcp_server.create_initialization_options(),
+            )
+
+    app = Starlette(
+        debug=True,
+        routes=[
+            Route("/sse", endpoint=handle_sse),
+            Mount("/messages/", app=sse.handle_post_message),
+        ],
+    )
+
+    def get_open_port():
+        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
+            sock.bind(("", 0))
+            return sock.getsockname()[1]
+
+    port = get_open_port()
+
+    def run_server():
+        uvicorn.run(app, host="0.0.0.0", port=port)
+
+    # Start the server in a new thread
+    server_thread = threading.Thread(target=run_server, daemon=True)
+    server_thread.start()
+
+    # Polling until the server is ready
+    timeout = 10
+    start_time = time.time()
+
+    while time.time() - start_time < timeout:
+        try:
+            response = httpx.get(f"http://localhost:{port}/sse")
+            if response.status_code == 200:
+                break
+        except (httpx.RequestError, httpx.HTTPStatusError):
+            pass
+        time.sleep(0.1)
+
+    yield port
+
+
+def test_register_and_unregister_toolgroup(llama_stack_client, mcp_server):
+    """
+    Integration test for registering and unregistering a toolgroup using the ToolGroups API.
+    """
+    port = mcp_server
+    test_toolgroup_id = "remote::web-fetch"
+    provider_id = "model-context-protocol"
+
+    # Cleanup before running the test
+    toolgroups = llama_stack_client.toolgroups.list()
+    for toolgroup in toolgroups:
+        if toolgroup.identifier == test_toolgroup_id:
+            llama_stack_client.toolgroups.unregister(toolgroup_id=test_toolgroup_id)
+
+    # Register the toolgroup
+    llama_stack_client.toolgroups.register(
+        toolgroup_id=test_toolgroup_id,
+        provider_id=provider_id,
+        mcp_endpoint=URL(uri=f"http://localhost:{port}/sse"),
+    )
+
+    # Verify registration
+    registered_toolgroup = llama_stack_client.toolgroups.get(toolgroup_id=test_toolgroup_id)
+    assert registered_toolgroup is not None
+    assert registered_toolgroup.identifier == test_toolgroup_id
+    assert registered_toolgroup.provider_id == provider_id
+
+    # Verify tools listing
+    tools_list_response = llama_stack_client.tools.list(toolgroup_id=test_toolgroup_id)
+    assert isinstance(tools_list_response, list)
+    assert tools_list_response
+
+    # Unregister the toolgroup
+    llama_stack_client.toolgroups.unregister(toolgroup_id=test_toolgroup_id)
+
+    # Verify it is unregistered
+    with pytest.raises(ValueError, match=f"Tool group '{test_toolgroup_id}' not found"):
+        llama_stack_client.toolgroups.get(toolgroup_id=test_toolgroup_id)
+
+    # Verify tools are also unregistered
+    unregister_tools_list_response = llama_stack_client.tools.list(toolgroup_id=test_toolgroup_id)
+    assert isinstance(unregister_tools_list_response, list)
+    assert not unregister_tools_list_response

From 30b49d8dfa26fa0c07f39b6e7ce59d207fbcea82 Mon Sep 17 00:00:00 2001
From: Jaland <hokie10@gmail.com>
Date: Wed, 9 Apr 2025 10:45:15 +0100
Subject: [PATCH 12/21] fix: Playground Container Issue (#1868)

**What does this PR do?**

This PR fixes a build issue with the Containerfile caused by missing
requirement `llama-stack`. It updates the Containerfile to include the
necessary requirements and upgrades the Python version to ensure
successful builds.

**Test Plan**
The updated Containerfile has been tested, and the build now completes
successfully with the required dependencies included.
---
 llama_stack/distribution/ui/Containerfile    | 2 +-
 llama_stack/distribution/ui/requirements.txt | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/llama_stack/distribution/ui/Containerfile b/llama_stack/distribution/ui/Containerfile
index a97f25753..0126d1867 100644
--- a/llama_stack/distribution/ui/Containerfile
+++ b/llama_stack/distribution/ui/Containerfile
@@ -1,7 +1,7 @@
 # More info on playground configuration can be found here:
 # https://llama-stack.readthedocs.io/en/latest/playground
 
-FROM python:3.9-slim
+FROM python:3.12-slim
 WORKDIR /app
 COPY . /app/
 RUN /usr/local/bin/python -m pip install --upgrade pip && \
diff --git a/llama_stack/distribution/ui/requirements.txt b/llama_stack/distribution/ui/requirements.txt
index 39f2b3d27..1e0456267 100644
--- a/llama_stack/distribution/ui/requirements.txt
+++ b/llama_stack/distribution/ui/requirements.txt
@@ -2,3 +2,4 @@ streamlit
 pandas
 llama-stack-client>=0.0.55
 streamlit-option-menu
+llama-stack>=0.1.9

From 96571053049e016b3509187ad3b00ce4fa86dc72 Mon Sep 17 00:00:00 2001
From: Michael Clifford <mcliffor@redhat.com>
Date: Wed, 9 Apr 2025 09:26:52 -0400
Subject: [PATCH 13/21] feat: Add tools page to playground (#1904)

# What does this PR do?

This PR adds an additional page to the playground called "Tools". This
page connects to a llama-stack server and lists all the available LLM
models, builtin tools and MCP tools in the sidebar. Users can select
whatever combination of model and tools they want from the sidebar for
their agent. Once the selections are made, users can chat with their
agent similarly to the RAG page and test out agent tool use.

closes #1902

## Test Plan

Ran the following commands with a llama-stack server and the updated
playground worked as expected.
```
export LLAMA_STACK_ENDPOINT="http://localhost:8321"
streamlit run  llama_stack/distribution/ui/app.py
```

[//]: # (## Documentation)

Signed-off-by: Michael Clifford <mcliffor@redhat.com>
---
 llama_stack/distribution/ui/app.py            |   2 +
 .../distribution/ui/page/playground/tools.py  | 116 ++++++++++++++++++
 2 files changed, 118 insertions(+)
 create mode 100644 llama_stack/distribution/ui/page/playground/tools.py

diff --git a/llama_stack/distribution/ui/app.py b/llama_stack/distribution/ui/app.py
index 045b07982..441f65d20 100644
--- a/llama_stack/distribution/ui/app.py
+++ b/llama_stack/distribution/ui/app.py
@@ -24,6 +24,7 @@ def main():
     # Playground pages
     chat_page = st.Page("page/playground/chat.py", title="Chat", icon="💬", default=True)
     rag_page = st.Page("page/playground/rag.py", title="RAG", icon="💬", default=False)
+    tool_page = st.Page("page/playground/tools.py", title="Tools", icon="🛠", default=False)
 
     # Distribution pages
     resources_page = st.Page("page/distribution/resources.py", title="Resources", icon="🔍", default=False)
@@ -39,6 +40,7 @@ def main():
             "Playground": [
                 chat_page,
                 rag_page,
+                tool_page,
                 application_evaluation_page,
                 native_evaluation_page,
             ],
diff --git a/llama_stack/distribution/ui/page/playground/tools.py b/llama_stack/distribution/ui/page/playground/tools.py
new file mode 100644
index 000000000..e987f617b
--- /dev/null
+++ b/llama_stack/distribution/ui/page/playground/tools.py
@@ -0,0 +1,116 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import uuid
+
+import streamlit as st
+from llama_stack_client import Agent
+
+from llama_stack.distribution.ui.modules.api import llama_stack_api
+
+
+def tool_chat_page():
+    st.title("🛠 Tools")
+
+    client = llama_stack_api.client
+    models = client.models.list()
+    model_list = [model.identifier for model in models if model.api_model_type == "llm"]
+
+    tool_groups = client.toolgroups.list()
+    tool_groups_list = [tool_group.identifier for tool_group in tool_groups]
+    mcp_tools_list = [tool for tool in tool_groups_list if tool.startswith("mcp::")]
+    builtin_tools_list = [tool for tool in tool_groups_list if not tool.startswith("mcp::")]
+
+    def reset_agent():
+        st.session_state.clear()
+        st.cache_resource.clear()
+
+    with st.sidebar:
+        st.subheader("Model")
+        model = st.selectbox(label="models", options=model_list, on_change=reset_agent)
+
+        st.subheader("Builtin Tools")
+        toolgroup_selection = st.pills(
+            label="Available ToolGroups", options=builtin_tools_list, selection_mode="multi", on_change=reset_agent
+        )
+
+        st.subheader("MCP Servers")
+        mcp_selection = st.pills(
+            label="Available MCP Servers", options=mcp_tools_list, selection_mode="multi", on_change=reset_agent
+        )
+
+        toolgroup_selection.extend(mcp_selection)
+
+        active_tool_list = []
+        for toolgroup_id in toolgroup_selection:
+            active_tool_list.extend(
+                [
+                    f"{''.join(toolgroup_id.split('::')[1:])}:{t.identifier}"
+                    for t in client.tools.list(toolgroup_id=toolgroup_id)
+                ]
+            )
+
+        st.subheader(f"Active Tools: 🛠 {len(active_tool_list)}")
+        st.json(active_tool_list)
+
+    @st.cache_resource
+    def create_agent():
+        return Agent(
+            client,
+            model=model,
+            instructions="You are a helpful assistant. When you use a tool always respond with a summary of the result.",
+            tools=toolgroup_selection,
+            sampling_params={
+                "strategy": {"type": "greedy"},
+            },
+        )
+
+    agent = create_agent()
+
+    if "agent_session_id" not in st.session_state:
+        st.session_state["agent_session_id"] = agent.create_session(session_name=f"tool_demo_{uuid.uuid4()}")
+
+    session_id = st.session_state["agent_session_id"]
+
+    if "messages" not in st.session_state:
+        st.session_state["messages"] = [{"role": "assistant", "content": "How can I help you?"}]
+
+    for msg in st.session_state.messages:
+        with st.chat_message(msg["role"]):
+            st.markdown(msg["content"])
+
+    if prompt := st.chat_input(placeholder=""):
+        with st.chat_message("user"):
+            st.markdown(prompt)
+
+        st.session_state.messages.append({"role": "user", "content": prompt})
+
+        turn_response = agent.create_turn(
+            session_id=session_id,
+            messages=[{"role": "user", "content": prompt}],
+            stream=True,
+        )
+
+        def response_generator(turn_response):
+            for response in turn_response:
+                if hasattr(response.event, "payload"):
+                    print(response.event.payload)
+                    if response.event.payload.event_type == "step_progress":
+                        if hasattr(response.event.payload.delta, "text"):
+                            yield response.event.payload.delta.text
+                    if response.event.payload.event_type == "step_complete":
+                        if response.event.payload.step_details.step_type == "tool_execution":
+                            yield " 🛠 "
+                else:
+                    yield f"Error occurred in the Llama Stack Cluster: {response}"
+
+        with st.chat_message("assistant"):
+            response = st.write_stream(response_generator(turn_response))
+
+        st.session_state.messages.append({"role": "assistant", "content": response})
+
+
+tool_chat_page()

From 692f56068c24c42fe4a5543aec04a3f7f9bd3925 Mon Sep 17 00:00:00 2001
From: Yuan Tang <terrytangyuan@gmail.com>
Date: Wed, 9 Apr 2025 09:34:41 -0400
Subject: [PATCH 14/21] docs: Add recent release notes (#1899)

# What does this PR do?

These are missing and changelog doc automation is not working yet due to
missing permissions for GitHub Actions:
https://dev.to/suzuki0430/how-to-enable-the-allow-github-actions-to-create-and-approve-pull-requests-option-when-its-grayed-out-3e1i

---------

Signed-off-by: Yuan Tang <terrytangyuan@gmail.com>
---
 CHANGELOG.md | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 953d04def..5086094ad 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,42 @@
 # Changelog
 
+# v0.2.1
+Published on: 2025-04-05T23:13:00Z
+
+
+
+---
+
+# v0.2.0
+Published on: 2025-04-05T19:04:29Z
+
+## Llama 4 Support
+
+Checkout more at https://www.llama.com
+
+
+
+---
+
+# v0.1.9
+Published on: 2025-03-29T00:52:23Z
+
+### Build and Test Agents
+* Agents: Entire document context with attachments
+* RAG: Documentation with sqlite-vec faiss comparison
+* Getting started: Fixes to getting started notebook.
+
+### Agent Evals and Model Customization
+* (**New**) Post-training: Add nemo customizer
+
+### Better Engineering
+* Moved sqlite-vec to non-blocking calls
+* Don't return a payload on file delete
+
+
+
+---
+
 # v0.1.8
 Published on: 2025-03-24T01:28:50Z
 

From 5c010e234a13b064803884e5e9c1fd9ce47f3741 Mon Sep 17 00:00:00 2001
From: Michael Clifford <mcliffor@redhat.com>
Date: Wed, 9 Apr 2025 09:56:41 -0400
Subject: [PATCH 15/21] fix: add tavily_search option to playground api (#1909)

# What does this PR do?
This PR adds the "TAVILY_SEARCH_API_KEY" option to the playground to
enable the use of the websearch tool.

[//]: # (If resolving an issue, uncomment and update the line below)
[//]: # (Closes #[issue-number])

## Test Plan

```
export TAVILY_SEARCH_API_KEY=***
streamlit run  llama_stack/distribution/ui/app.py
```
Without this change the builtin websearch tool will fail due to missing
API key.


[//]: # (## Documentation)
Related to #1902

Signed-off-by: Michael Clifford <mcliffor@redhat.com>
---
 llama_stack/distribution/ui/modules/api.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llama_stack/distribution/ui/modules/api.py b/llama_stack/distribution/ui/modules/api.py
index 40caccda0..d5395c5b9 100644
--- a/llama_stack/distribution/ui/modules/api.py
+++ b/llama_stack/distribution/ui/modules/api.py
@@ -19,6 +19,7 @@ class LlamaStackApi:
                 "together_api_key": os.environ.get("TOGETHER_API_KEY", ""),
                 "sambanova_api_key": os.environ.get("SAMBANOVA_API_KEY", ""),
                 "openai_api_key": os.environ.get("OPENAI_API_KEY", ""),
+                "tavily_search_api_key": os.environ.get("TAVILY_SEARCH_API_KEY", ""),
             },
         )
 

From b93318e40bf8a6ad399f4fa1322456fe0e8797ef Mon Sep 17 00:00:00 2001
From: Francisco Arceo <arceofrancisco@gmail.com>
Date: Wed, 9 Apr 2025 10:40:56 -0600
Subject: [PATCH 16/21] chore: Detect browser setting for dark/light mode and
 set default to light mode (#1913)

# What does this PR do?

1. Adding some lightweight JS to detect the default browser setting for
dark/light mode
3. Setting default screen setting to light mode as to not change default
behavior.

From the docs: https://github.com/MrDogeBro/sphinx_rtd_dark_mode

>This lets you choose which theme the user sees when they load the docs
for the first time ever. After the first time however, this setting has
no effect as the users preference is stored in local storage within
their browser. This option accepts a boolean for the value. If this
option is true (the default option), users will start in dark mode when
first visiting the site. If this option is false, users will start in
light mode when they first visit the site.

# Closes #1915

## Test Plan
Tested locally on my Mac on Safari and Chrome.

---------

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
---
 docs/_static/js/detect_theme.js | 9 +++++++++
 docs/source/conf.py             | 3 +++
 2 files changed, 12 insertions(+)
 create mode 100644 docs/_static/js/detect_theme.js

diff --git a/docs/_static/js/detect_theme.js b/docs/_static/js/detect_theme.js
new file mode 100644
index 000000000..484b2bb8b
--- /dev/null
+++ b/docs/_static/js/detect_theme.js
@@ -0,0 +1,9 @@
+document.addEventListener("DOMContentLoaded", function () {
+  const prefersDark = window.matchMedia("(prefers-color-scheme: dark)").matches;
+  const htmlElement = document.documentElement;
+  if (prefersDark) {
+    htmlElement.setAttribute("data-theme", "dark");
+  } else {
+    htmlElement.setAttribute("data-theme", "light");
+  }
+});
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 33654fe67..55c6383b2 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -112,6 +112,8 @@ html_theme_options = {
     # "style_nav_header_background": "#c3c9d4",
 }
 
+default_dark_mode = False
+
 html_static_path = ["../_static"]
 # html_logo = "../_static/llama-stack-logo.png"
 # html_style = "../_static/css/my_theme.css"
@@ -119,6 +121,7 @@ html_static_path = ["../_static"]
 
 def setup(app):
     app.add_css_file("css/my_theme.css")
+    app.add_js_file("js/detect_theme.js")
 
     def dockerhub_role(name, rawtext, text, lineno, inliner, options={}, content=[]):
         url = f"https://hub.docker.com/r/llamastack/{text}"

From 770b38f8b5b6139dd4e684f78b39f1868635f05f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Han?= <seb@redhat.com>
Date: Wed, 9 Apr 2025 20:22:29 +0200
Subject: [PATCH 17/21] chore: simplify running the demo UI (#1907)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# What does this PR do?

* Manage UI deps in pyproject
* Use a new "ui" dep group to pull the deps with "uv"
* Simplify the run command
* Bump versions in requirements.txt

Signed-off-by: Sébastien Han <seb@redhat.com>
---
 docs/source/playground/index.md              |   4 +-
 llama_stack/distribution/ui/README.md        |   4 +-
 llama_stack/distribution/ui/requirements.txt |   4 +-
 pyproject.toml                               |   6 +
 uv.lock                                      | 178 +++++++++++++++++++
 5 files changed, 188 insertions(+), 8 deletions(-)

diff --git a/docs/source/playground/index.md b/docs/source/playground/index.md
index 9691609ab..ded2b5772 100644
--- a/docs/source/playground/index.md
+++ b/docs/source/playground/index.md
@@ -103,7 +103,5 @@ llama stack run together
 
 2. Start Streamlit UI
 ```bash
-cd llama_stack/distribution/ui
-pip install -r requirements.txt
-streamlit run app.py
+uv run --with ".[ui]" streamlit run llama_stack/distribution/ui/app.py
 ```
diff --git a/llama_stack/distribution/ui/README.md b/llama_stack/distribution/ui/README.md
index fe660544f..51c2d2bc2 100644
--- a/llama_stack/distribution/ui/README.md
+++ b/llama_stack/distribution/ui/README.md
@@ -36,9 +36,7 @@ llama-stack-client benchmarks register \
 3. Start Streamlit UI
 
 ```bash
-cd llama_stack/distribution/ui
-pip install -r requirements.txt
-streamlit run app.py
+uv run --with ".[ui]" streamlit run llama_stack/distribution/ui/app.py
 ```
 
 ## Environment Variables
diff --git a/llama_stack/distribution/ui/requirements.txt b/llama_stack/distribution/ui/requirements.txt
index 1e0456267..61d42768d 100644
--- a/llama_stack/distribution/ui/requirements.txt
+++ b/llama_stack/distribution/ui/requirements.txt
@@ -1,5 +1,5 @@
 streamlit
 pandas
-llama-stack-client>=0.0.55
+llama-stack-client>=0.2.1
 streamlit-option-menu
-llama-stack>=0.1.9
+llama-stack>=0.2.1
diff --git a/pyproject.toml b/pyproject.toml
index 8ae7ddbb6..83260b681 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -89,6 +89,12 @@ docs = [
     "tomli",
 ]
 codegen = ["rich", "pydantic", "jinja2>=3.1.6"]
+ui = [
+    "streamlit",
+    "pandas",
+    "llama-stack-client>=0.2.1",
+    "streamlit-option-menu",
+]
 
 [project.urls]
 Homepage = "https://github.com/meta-llama/llama-stack"
diff --git a/uv.lock b/uv.lock
index 5d7ce4076..1f7adea82 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,4 +1,5 @@
 version = 1
+revision = 1
 requires-python = ">=3.10"
 resolution-markers = [
     "(python_full_version < '3.11' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux')",
@@ -139,6 +140,22 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/7e/b3/6b4067be973ae96ba0d615946e314c5ae35f9f993eca561b356540bb0c2b/alabaster-1.0.0-py3-none-any.whl", hash = "sha256:fc6786402dc3fcb2de3cabd5fe455a2db534b371124f1f21de8731783dec828b", size = 13929 },
 ]
 
+[[package]]
+name = "altair"
+version = "5.5.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "jinja2" },
+    { name = "jsonschema" },
+    { name = "narwhals" },
+    { name = "packaging" },
+    { name = "typing-extensions", marker = "python_full_version < '3.14'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/16/b1/f2969c7bdb8ad8bbdda031687defdce2c19afba2aa2c8e1d2a17f78376d8/altair-5.5.0.tar.gz", hash = "sha256:d960ebe6178c56de3855a68c47b516be38640b73fb3b5111c2a9ca90546dd73d", size = 705305 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/aa/f3/0b6ced594e51cc95d8c1fc1640d3623770d01e4969d29c0bd09945fafefa/altair-5.5.0-py3-none-any.whl", hash = "sha256:91a310b926508d560fe0148d02a194f38b824122641ef528113d029fcd129f8c", size = 731200 },
+]
+
 [[package]]
 name = "annotated-types"
 version = "0.7.0"
@@ -258,6 +275,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/09/71/54e999902aed72baf26bca0d50781b01838251a462612966e9fc4891eadd/black-25.1.0-py3-none-any.whl", hash = "sha256:95e8176dae143ba9097f351d174fdaf0ccd29efb414b362ae3fd72bf0f710717", size = 207646 },
 ]
 
+[[package]]
+name = "blinker"
+version = "1.9.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/21/28/9b3f50ce0e048515135495f198351908d99540d69bfdc8c1d15b73dc55ce/blinker-1.9.0.tar.gz", hash = "sha256:b4ce2265a7abece45e7cc896e98dbebe6cead56bcf805a3d23136d145f5445bf", size = 22460 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/10/cb/f2ad4230dc2eb1a74edf38f1a38b9b52277f75bef262d8908e60d957e13c/blinker-1.9.0-py3-none-any.whl", hash = "sha256:ba0efaa9080b619ff2f3459d1d500c57bddea4a6b424b60a91141db6fd2f08bc", size = 8458 },
+]
+
 [[package]]
 name = "blobfile"
 version = "3.0.0"
@@ -282,6 +308,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b3/58/a255894436f3eca4a20611785a30a43b85bc75adf1b77f227e1e6d0cce0a/braintrust_core-0.0.58-py3-none-any.whl", hash = "sha256:fa272b70376d2c6692acf00ebd9fb9bae057b0c53b2b6a59a64850bf79757311", size = 4438 },
 ]
 
+[[package]]
+name = "cachetools"
+version = "5.5.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/6c/81/3747dad6b14fa2cf53fcf10548cf5aea6913e96fab41a3c198676f8948a5/cachetools-5.5.2.tar.gz", hash = "sha256:1a661caa9175d26759571b2e19580f9d6393969e5dfca11fdb1f947a23e640d4", size = 28380 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/72/76/20fa66124dbe6be5cafeb312ece67de6b61dd91a0247d1ea13db4ebb33c2/cachetools-5.5.2-py3-none-any.whl", hash = "sha256:d26a22bcc62eb95c3beabd9f1ee5e820d3d2704fe2967cbe350e20c8ffcd3f0a", size = 10080 },
+]
+
 [[package]]
 name = "certifi"
 version = "2025.1.31"
@@ -783,6 +818,30 @@ http = [
     { name = "aiohttp" },
 ]
 
+[[package]]
+name = "gitdb"
+version = "4.0.12"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "smmap" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/72/94/63b0fc47eb32792c7ba1fe1b694daec9a63620db1e313033d18140c2320a/gitdb-4.0.12.tar.gz", hash = "sha256:5ef71f855d191a3326fcfbc0d5da835f26b13fbcba60c32c21091c349ffdb571", size = 394684 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a0/61/5c78b91c3143ed5c14207f463aecfc8f9dbb5092fb2869baf37c273b2705/gitdb-4.0.12-py3-none-any.whl", hash = "sha256:67073e15955400952c6565cc3e707c554a4eea2e428946f7a4c162fab9bd9bcf", size = 62794 },
+]
+
+[[package]]
+name = "gitpython"
+version = "3.1.44"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "gitdb" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c0/89/37df0b71473153574a5cdef8f242de422a0f5d26d7a9e231e6f169b4ad14/gitpython-3.1.44.tar.gz", hash = "sha256:c87e30b26253bf5418b01b0660f818967f3c503193838337fe5e573331249269", size = 214196 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1d/9a/4114a9057db2f1462d5c8f8390ab7383925fe1ac012eaa42402ad65c2963/GitPython-3.1.44-py3-none-any.whl", hash = "sha256:9e0e10cda9bed1ee64bc9a6de50e7e38a9c9943241cd7f585f6df3ed28011110", size = 207599 },
+]
+
 [[package]]
 name = "googleapis-common-protos"
 version = "1.67.0"
@@ -1386,6 +1445,12 @@ test = [
     { name = "torchvision", version = "0.21.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or sys_platform == 'darwin'" },
     { name = "torchvision", version = "0.21.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
 ]
+ui = [
+    { name = "llama-stack-client" },
+    { name = "pandas" },
+    { name = "streamlit" },
+    { name = "streamlit-option-menu" },
+]
 unit = [
     { name = "aiohttp" },
     { name = "aiosqlite" },
@@ -1416,6 +1481,7 @@ requires-dist = [
     { name = "jinja2", marker = "extra == 'codegen'", specifier = ">=3.1.6" },
     { name = "jsonschema" },
     { name = "llama-stack-client", specifier = ">=0.2.1" },
+    { name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.2.1" },
     { name = "mcp", marker = "extra == 'test'" },
     { name = "myst-parser", marker = "extra == 'docs'" },
     { name = "nbval", marker = "extra == 'dev'" },
@@ -1423,6 +1489,7 @@ requires-dist = [
     { name = "openai", marker = "extra == 'unit'" },
     { name = "opentelemetry-exporter-otlp-proto-http", marker = "extra == 'test'" },
     { name = "opentelemetry-sdk", marker = "extra == 'test'" },
+    { name = "pandas", marker = "extra == 'ui'" },
     { name = "pillow" },
     { name = "pre-commit", marker = "extra == 'dev'" },
     { name = "prompt-toolkit" },
@@ -1452,6 +1519,8 @@ requires-dist = [
     { name = "sphinxcontrib-redoc", marker = "extra == 'docs'" },
     { name = "sphinxcontrib-video", marker = "extra == 'docs'" },
     { name = "sqlite-vec", marker = "extra == 'unit'" },
+    { name = "streamlit", marker = "extra == 'ui'" },
+    { name = "streamlit-option-menu", marker = "extra == 'ui'" },
     { name = "termcolor" },
     { name = "tiktoken" },
     { name = "tomli", marker = "extra == 'docs'" },
@@ -1461,6 +1530,7 @@ requires-dist = [
     { name = "types-setuptools", marker = "extra == 'dev'" },
     { name = "uvicorn", marker = "extra == 'dev'" },
 ]
+provides-extras = ["dev", "unit", "test", "docs", "codegen", "ui"]
 
 [[package]]
 name = "llama-stack-client"
@@ -1815,6 +1885,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/5f/df/76d0321c3797b54b60fef9ec3bd6f4cfd124b9e422182156a1dd418722cf/myst_parser-4.0.1-py3-none-any.whl", hash = "sha256:9134e88959ec3b5780aedf8a99680ea242869d012e8821db3126d427edc9c95d", size = 84579 },
 ]
 
+[[package]]
+name = "narwhals"
+version = "1.34.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ec/1d/a21496389436e96394a6e3fb1a644d5bc382250baff76e867f0368a94068/narwhals-1.34.0.tar.gz", hash = "sha256:bdd3fa60bea1f1e8b698e483be18dd43af13290da12dba69ea16dc1f3edbb8f7", size = 265432 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1e/6d/875d5a7f8e14fc044ede74b94e739d7312c3c8d1a3878f649601b15fdd68/narwhals-1.34.0-py3-none-any.whl", hash = "sha256:9502b9aa5dfe125c090a3a0bbca95becfa1fac2cd67f8b80d12b1dc2ed751865", size = 325346 },
+]
+
 [[package]]
 name = "nbformat"
 version = "5.10.4"
@@ -2571,6 +2650,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/0b/53/a64f03044927dc47aafe029c42a5b7aabc38dfb813475e0e1bf71c4a59d0/pydantic_settings-2.8.1-py3-none-any.whl", hash = "sha256:81942d5ac3d905f7f3ee1a70df5dfb62d5569c12f51a5a647defc1c3d9ee2e9c", size = 30839 },
 ]
 
+[[package]]
+name = "pydeck"
+version = "0.9.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "jinja2" },
+    { name = "numpy" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/a1/ca/40e14e196864a0f61a92abb14d09b3d3da98f94ccb03b49cf51688140dab/pydeck-0.9.1.tar.gz", hash = "sha256:f74475ae637951d63f2ee58326757f8d4f9cd9f2a457cf42950715003e2cb605", size = 3832240 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ab/4c/b888e6cf58bd9db9c93f40d1c6be8283ff49d88919231afe93a6bcf61626/pydeck-0.9.1-py2.py3-none-any.whl", hash = "sha256:b3f75ba0d273fc917094fa61224f3f6076ca8752b93d46faf3bcfd9f9d59b038", size = 6900403 },
+]
+
 [[package]]
 name = "pygments"
 version = "2.19.1"
@@ -3220,6 +3312,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050 },
 ]
 
+[[package]]
+name = "smmap"
+version = "5.0.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/44/cd/a040c4b3119bbe532e5b0732286f805445375489fceaec1f48306068ee3b/smmap-5.0.2.tar.gz", hash = "sha256:26ea65a03958fa0c8a1c7e8c7a58fdc77221b8910f6be2131affade476898ad5", size = 22329 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/04/be/d09147ad1ec7934636ad912901c5fd7667e1c858e19d355237db0d0cd5e4/smmap-5.0.2-py3-none-any.whl", hash = "sha256:b30115f0def7d7531d22a0fb6502488d879e75b260a9db4d0819cfb25403af5e", size = 24303 },
+]
+
 [[package]]
 name = "sniffio"
 version = "1.3.1"
@@ -3502,6 +3603,47 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d9/61/f2b52e107b1fc8944b33ef56bf6ac4ebbe16d91b94d2b87ce013bf63fb84/starlette-0.45.3-py3-none-any.whl", hash = "sha256:dfb6d332576f136ec740296c7e8bb8c8a7125044e7c6da30744718880cdd059d", size = 71507 },
 ]
 
+[[package]]
+name = "streamlit"
+version = "1.44.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "altair" },
+    { name = "blinker" },
+    { name = "cachetools" },
+    { name = "click" },
+    { name = "gitpython" },
+    { name = "numpy" },
+    { name = "packaging" },
+    { name = "pandas" },
+    { name = "pillow" },
+    { name = "protobuf" },
+    { name = "pyarrow" },
+    { name = "pydeck" },
+    { name = "requests" },
+    { name = "tenacity" },
+    { name = "toml" },
+    { name = "tornado" },
+    { name = "typing-extensions" },
+    { name = "watchdog", marker = "sys_platform != 'darwin'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/3e/c0/7286284567e5045f0c587c426d0c41aee5d10c0a2e360e627a83037e9f0c/streamlit-1.44.1.tar.gz", hash = "sha256:c6914ed6d5b76870b461510476806db370f36425ae0e6654d227c988288198d3", size = 9423685 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/eb/17/fc425e1d4d86e31b2aaf0812a2ef2163763a0670d671720c7c36e8679323/streamlit-1.44.1-py3-none-any.whl", hash = "sha256:9fe355f58b11f4eb71e74f115ce1f38c4c9eaff2733e6bcffb510ac1298a5990", size = 9812242 },
+]
+
+[[package]]
+name = "streamlit-option-menu"
+version = "0.4.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "streamlit" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/5e/27/72dc451cdaef1714fd0d75cc430e50a06c12c9046295fdf1f94af1b766eb/streamlit-option-menu-0.4.0.tar.gz", hash = "sha256:48ec69d59e547fa2fa4bfae001620df8af56a80de2f765ddbb9fcbfb84017129", size = 827290 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fd/52/2f525ad4262dc83d67297f69ec5afcee1438b9e9ae22aa318396725ddbed/streamlit_option_menu-0.4.0-py3-none-any.whl", hash = "sha256:a55fc7554047b6db371595af2182e435b8a2c715ee6124e8543685bd4670b07e", size = 829255 },
+]
+
 [[package]]
 name = "sympy"
 version = "1.13.1"
@@ -3514,6 +3656,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b2/fe/81695a1aa331a842b582453b605175f419fe8540355886031328089d840a/sympy-1.13.1-py3-none-any.whl", hash = "sha256:db36cdc64bf61b9b24578b6f7bab1ecdd2452cf008f34faa33776680c26d66f8", size = 6189177 },
 ]
 
+[[package]]
+name = "tenacity"
+version = "9.1.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/0a/d4/2b0cd0fe285e14b36db076e78c93766ff1d529d70408bd1d2a5a84f1d929/tenacity-9.1.2.tar.gz", hash = "sha256:1169d376c297e7de388d18b4481760d478b0e99a777cad3a9c86e556f4b697cb", size = 48036 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e5/30/643397144bfbfec6f6ef821f36f33e57d35946c44a2352d3c9f0ae847619/tenacity-9.1.2-py3-none-any.whl", hash = "sha256:f77bf36710d8b73a50b2dd155c97b870017ad21afe6ab300326b0371b3b05138", size = 28248 },
+]
+
 [[package]]
 name = "termcolor"
 version = "2.5.0"
@@ -3559,6 +3710,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/de/a8/8f499c179ec900783ffe133e9aab10044481679bb9aad78436d239eee716/tiktoken-0.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:5ea0edb6f83dc56d794723286215918c1cde03712cbbafa0348b33448faf5b95", size = 894669 },
 ]
 
+[[package]]
+name = "toml"
+version = "0.10.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/be/ba/1f744cdc819428fc6b5084ec34d9b30660f6f9daaf70eead706e3203ec3c/toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f", size = 22253 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/44/6f/7120676b6d73228c96e17f1f794d8ab046fc910d781c8d151120c3f1569e/toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b", size = 16588 },
+]
+
 [[package]]
 name = "tomli"
 version = "2.2.1"
@@ -3836,6 +3996,24 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/93/fa/849483d56773ae29740ae70043ad88e068f98a6401aa819b5d6bee604683/virtualenv-20.29.2-py3-none-any.whl", hash = "sha256:febddfc3d1ea571bdb1dc0f98d7b45d24def7428214d4fb73cc486c9568cce6a", size = 4301478 },
 ]
 
+[[package]]
+name = "watchdog"
+version = "6.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/db/7d/7f3d619e951c88ed75c6037b246ddcf2d322812ee8ea189be89511721d54/watchdog-6.0.0.tar.gz", hash = "sha256:9ddf7c82fda3ae8e24decda1338ede66e1c99883db93711d8fb941eaa2d8c282", size = 131220 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a9/c7/ca4bf3e518cb57a686b2feb4f55a1892fd9a3dd13f470fca14e00f80ea36/watchdog-6.0.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7607498efa04a3542ae3e05e64da8202e58159aa1fa4acddf7678d34a35d4f13", size = 79079 },
+    { url = "https://files.pythonhosted.org/packages/5c/51/d46dc9332f9a647593c947b4b88e2381c8dfc0942d15b8edc0310fa4abb1/watchdog-6.0.0-py3-none-manylinux2014_armv7l.whl", hash = "sha256:9041567ee8953024c83343288ccc458fd0a2d811d6a0fd68c4c22609e3490379", size = 79078 },
+    { url = "https://files.pythonhosted.org/packages/d4/57/04edbf5e169cd318d5f07b4766fee38e825d64b6913ca157ca32d1a42267/watchdog-6.0.0-py3-none-manylinux2014_i686.whl", hash = "sha256:82dc3e3143c7e38ec49d61af98d6558288c415eac98486a5c581726e0737c00e", size = 79076 },
+    { url = "https://files.pythonhosted.org/packages/ab/cc/da8422b300e13cb187d2203f20b9253e91058aaf7db65b74142013478e66/watchdog-6.0.0-py3-none-manylinux2014_ppc64.whl", hash = "sha256:212ac9b8bf1161dc91bd09c048048a95ca3a4c4f5e5d4a7d1b1a7d5752a7f96f", size = 79077 },
+    { url = "https://files.pythonhosted.org/packages/2c/3b/b8964e04ae1a025c44ba8e4291f86e97fac443bca31de8bd98d3263d2fcf/watchdog-6.0.0-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:e3df4cbb9a450c6d49318f6d14f4bbc80d763fa587ba46ec86f99f9e6876bb26", size = 79078 },
+    { url = "https://files.pythonhosted.org/packages/62/ae/a696eb424bedff7407801c257d4b1afda455fe40821a2be430e173660e81/watchdog-6.0.0-py3-none-manylinux2014_s390x.whl", hash = "sha256:2cce7cfc2008eb51feb6aab51251fd79b85d9894e98ba847408f662b3395ca3c", size = 79077 },
+    { url = "https://files.pythonhosted.org/packages/b5/e8/dbf020b4d98251a9860752a094d09a65e1b436ad181faf929983f697048f/watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:20ffe5b202af80ab4266dcd3e91aae72bf2da48c0d33bdb15c66658e685e94e2", size = 79078 },
+    { url = "https://files.pythonhosted.org/packages/07/f6/d0e5b343768e8bcb4cda79f0f2f55051bf26177ecd5651f84c07567461cf/watchdog-6.0.0-py3-none-win32.whl", hash = "sha256:07df1fdd701c5d4c8e55ef6cf55b8f0120fe1aef7ef39a1c6fc6bc2e606d517a", size = 79065 },
+    { url = "https://files.pythonhosted.org/packages/db/d9/c495884c6e548fce18a8f40568ff120bc3a4b7b99813081c8ac0c936fa64/watchdog-6.0.0-py3-none-win_amd64.whl", hash = "sha256:cbafb470cf848d93b5d013e2ecb245d4aa1c8fd0504e863ccefa32445359d680", size = 79070 },
+    { url = "https://files.pythonhosted.org/packages/33/e8/e40370e6d74ddba47f002a32919d91310d6074130fe4e17dabcafc15cbf1/watchdog-6.0.0-py3-none-win_ia64.whl", hash = "sha256:a1914259fa9e1454315171103c6a30961236f508b9b623eae470268bbcc6a22f", size = 79067 },
+]
+
 [[package]]
 name = "watchfiles"
 version = "1.0.4"

From e2299291c42c4d1e29506bbdc366678c8ff4d987 Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Wed, 9 Apr 2025 11:28:45 -0700
Subject: [PATCH 18/21] fix: Mirror llama4 rope scaling fixes, small model
 simplify (#1917)

See:
- https://github.com/meta-llama/llama-models/pull/322
- https://github.com/meta-llama/llama-models/pull/320
---
 llama_stack/models/llama/llama4/args.py  | 13 ++++++
 llama_stack/models/llama/llama4/model.py | 51 +++++++++++-------------
 2 files changed, 36 insertions(+), 28 deletions(-)

diff --git a/llama_stack/models/llama/llama4/args.py b/llama_stack/models/llama/llama4/args.py
index 6d7c1d409..dd5f7cbde 100644
--- a/llama_stack/models/llama/llama4/args.py
+++ b/llama_stack/models/llama/llama4/args.py
@@ -70,6 +70,9 @@ class ModelArgs(BaseModel):
     attention_chunk_size: Optional[int] = None
     rope_theta: float = 500000
     use_scaled_rope: bool = False
+    rope_scaling_factor: Optional[float] = None
+    rope_high_freq_factor: Optional[float] = None
+
     nope_layer_interval: Optional[int] = None  # No position encoding in every n layers
     use_qk_norm: bool = False
     # Set to True to enable inference-time temperature tuning (useful for very long context)
@@ -92,4 +95,14 @@ class ModelArgs(BaseModel):
             f"n_heads ({self.n_heads}) must be divisible by n_kv_heads ({self.n_kv_heads})"
         )
         assert self.dim % self.n_heads == 0, f"dim ({self.dim}) must be divisible by n_heads ({self.n_heads})"
+
+        if self.use_scaled_rope:
+            # NOTE: ideally these values should have come from params.json. However, we have
+            # shipped the models everywhere. Only Llama-4-Scout uses scaled rope and needs these
+            # specific values.
+            if self.rope_scaling_factor is None:
+                self.rope_scaling_factor = 16
+            if self.rope_high_freq_factor is None:
+                self.rope_high_freq_factor = 1
+
         return self
diff --git a/llama_stack/models/llama/llama4/model.py b/llama_stack/models/llama/llama4/model.py
index 08fac7714..2272b868d 100644
--- a/llama_stack/models/llama/llama4/model.py
+++ b/llama_stack/models/llama/llama4/model.py
@@ -23,37 +23,25 @@ from .ffn import FeedForward
 from .moe import MoE
 
 
+def rmsnorm(x, eps):
+    def _norm(y):
+        return y * torch.rsqrt(y.pow(2).mean(-1, keepdim=True) + eps)
+
+    return _norm(x.float()).type_as(x)
+
+
 class RMSNorm(torch.nn.Module):
     def __init__(self, dim: int, eps: float = 1e-6):
         super().__init__()
         self.eps = eps
         self.weight = nn.Parameter(torch.ones(dim))
 
-    def _norm(self, x):
-        return x * torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + self.eps)
-
     def forward(self, x):
-        output = self._norm(x.float()).type_as(x)
-        return output * self.weight
+        return rmsnorm(x, self.eps) * self.weight
 
 
-class L2Norm(torch.nn.Module):
-    def __init__(self, dim: int, eps: float = 1e-6):
-        super().__init__()
-        self.eps = eps
-
-    def _norm(self, x):
-        return x * torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + self.eps)
-
-    def forward(self, x):
-        return self._norm(x.float()).type_as(x)
-
-
-def apply_scaling(freqs: torch.Tensor):
-    # Values obtained from grid search
-    scale_factor = 8
+def apply_scaling(freqs: torch.Tensor, scale_factor: float, high_freq_factor: float):
     low_freq_factor = 1
-    high_freq_factor = 4
     old_context_len = 8192  # original llama3 length
 
     low_freq_wavelen = old_context_len / low_freq_factor
@@ -72,11 +60,18 @@ def apply_scaling(freqs: torch.Tensor):
     return torch.tensor(new_freqs, dtype=freqs.dtype, device=freqs.device)
 
 
-def precompute_freqs_cis(dim: int, end: int, theta: float = 10000.0, use_scaled: bool = False):
+def precompute_freqs_cis(
+    dim: int,
+    end: int,
+    theta: float,
+    use_scaled: bool,
+    scale_factor: float,
+    high_freq_factor: float,
+):
     freqs = 1.0 / (theta ** (torch.arange(0, dim, 2)[: (dim // 2)].float() / dim))
     t = torch.arange(end, device=freqs.device, dtype=torch.float32)
     if use_scaled:
-        freqs = apply_scaling(freqs)
+        freqs = apply_scaling(freqs, scale_factor, high_freq_factor)
     freqs = torch.outer(t, freqs)
     freqs_cis = torch.polar(torch.ones_like(freqs), freqs)  # complex64
     return freqs_cis
@@ -174,9 +169,7 @@ class Attention(nn.Module):
                 self.head_dim,
             )
         ).cuda()
-        self.qk_norm = None
-        if self.use_qk_norm:
-            self.qk_norm = L2Norm(args.norm_eps)
+        self.norm_eps = args.norm_eps
         self._register_load_state_dict_pre_hook(self.load_hook)
 
     def load_hook(
@@ -220,8 +213,8 @@ class Attention(nn.Module):
             xq, xk = apply_rotary_emb(xq, xk, freqs_cis=freqs_cis)
 
         if self.use_qk_norm:
-            xq = self.qk_norm(xq)
-            xk = self.qk_norm(xk)
+            xq = rmsnorm(xq, self.norm_eps)
+            xk = rmsnorm(xk, self.norm_eps)
 
         # We are applying temperature tuning (https://arxiv.org/abs/2501.19399) to NoPE layers, where
         # the inference-time temperature tuning function is customized to not affect short context
@@ -362,6 +355,8 @@ class Transformer(nn.Module):
             args.max_seq_len * 2,
             args.rope_theta,
             args.use_scaled_rope,
+            args.rope_scaling_factor,
+            args.rope_high_freq_factor,
         )
         vision_args = self.args.vision_args
         if vision_args:

From 36a31fe5dd3947a163d94fce7a68484beb35ded1 Mon Sep 17 00:00:00 2001
From: Jiawen Liu <jiawenl@meta.com>
Date: Wed, 9 Apr 2025 15:00:12 -0700
Subject: [PATCH 19/21] fix: on-the-fly int4 quantize parameter (#1920)

Mirror to https://github.com/meta-llama/llama-models/pull/324 with some
clean up

```
with-proxy pip install -e .
export INFERENCE_MODEL=meta-llama/Llama-4-Scout-17B-16E-Instruct
export INFERENCE_CHECKPOINT_DIR=../checkpoints/Llama-4-Scout-17B-16E-Instruct
export QUANTIZATION_TYPE=int4_mixed
with-proxy llama stack build --run --template meta-reference-gpu
```

# What does this PR do?
[Provide a short summary of what this PR does and why. Link to relevant
issues if applicable.]

[//]: # (If resolving an issue, uncomment and update the line below)
[//]: # (Closes #[issue-number])

## Test Plan
[Describe the tests you ran to verify your changes with result
summaries. *Provide clear instructions so the plan can be easily
re-executed.*]

[//]: # (## Documentation)
---
 .../models/llama/llama4/quantization/loader.py |  2 +-
 llama_stack/models/llama/quantize_impls.py     | 18 +-----------------
 2 files changed, 2 insertions(+), 18 deletions(-)

diff --git a/llama_stack/models/llama/llama4/quantization/loader.py b/llama_stack/models/llama/llama4/quantization/loader.py
index b50432896..f11d83c60 100644
--- a/llama_stack/models/llama/llama4/quantization/loader.py
+++ b/llama_stack/models/llama/llama4/quantization/loader.py
@@ -91,7 +91,7 @@ def convert_to_quantized_model(
             log_status(f"Rank {rank}: Quantizing int4 weights from bf16")
 
             def apply_quantization(_, weight):
-                return quantize_int4(weight, fp8_activation_scale_ub, output_device=torch.device("cuda"))
+                return quantize_int4(weight, output_device=torch.device("cuda"))
 
     else:
         fp8_scales_path = os.path.join(checkpoint_dir, f"fp8_scales_{rank}.pt")
diff --git a/llama_stack/models/llama/quantize_impls.py b/llama_stack/models/llama/quantize_impls.py
index 6e1d15cf6..a5da01588 100644
--- a/llama_stack/models/llama/quantize_impls.py
+++ b/llama_stack/models/llama/quantize_impls.py
@@ -65,7 +65,7 @@ class Int4Weights(
     Int4ScaledWeights,
     collections.namedtuple(
         "Int4Weights",
-        ["weight", "scale", "zero_point", "shape", "activation_scale_ub"],
+        ["weight", "scale", "zero_point", "shape"],
     ),
 ):
     pass
@@ -184,20 +184,13 @@ def quantize_fp8(
 @torch.inference_mode()
 def quantize_int4(
     w: Tensor,
-    fp8_activation_scale_ub: float,
     output_device: Optional[torch.device] = None,
 ) -> Int4Weights:
     """Quantize [n, k/2] weight tensor.
 
     Args:
         w (Tensor): [n, k/2] input high precision tensor to quantize.
-        fp8_activation_scale_ub (float): Upper bound for activation max.
     """
-    activation_scale_ub = torch.tensor(
-        [fp8_activation_scale_ub],
-        dtype=torch.float,
-        device=output_device,
-    )
     if w.ndim >= 3:
         wq, scale, zero_point = zip(*[int4_row_quantize(i) for i in w], strict=False)
         wq = torch.stack([pack_int4(i) for i in wq], dim=0)
@@ -212,7 +205,6 @@ def quantize_int4(
         scale=scale.to(output_device),
         zero_point=zero_point.to(output_device),
         shape=wq.shape,
-        activation_scale_ub=activation_scale_ub,
     )
 
 
@@ -247,26 +239,18 @@ def load_int4(
     w: Tensor,
     scale: Tensor,
     zero_point: Tensor,
-    fp8_activation_scale_ub: float,
     output_device: Optional[torch.device] = None,
 ) -> Int4Weights:
     """Load INT4 [n, k/2] weight tensor.
 
     Args:
         w (Tensor): [n, k/2] input INT4.
-        fp8_activation_scale_ub (float): Upper bound for activation max.
     """
-    activation_scale_ub = torch.tensor(
-        [fp8_activation_scale_ub],
-        dtype=torch.float,
-        device=output_device,
-    )
     return Int4Weights(
         weight=w.to(torch.int8).to(device=output_device),
         scale=scale.to(device=output_device),
         zero_point=zero_point.to(device=output_device),
         shape=w.shape,
-        activation_scale_ub=activation_scale_ub,
     )
 
 
From 712c6758c68b228c3b0e8ecb4ce7d53db38ea3e4 Mon Sep 17 00:00:00 2001
From: Yuan Tang <terrytangyuan@gmail.com>
Date: Wed, 9 Apr 2025 18:43:43 -0400
Subject: [PATCH 20/21] docs: Avoid bash script syntax highlighting for dark
 mode (#1918)

See
https://github.com/meta-llama/llama-stack/pull/1913#issuecomment-2790153778

Signed-off-by: Yuan Tang <terrytangyuan@gmail.com>
---
 .../distributions/kubernetes_deployment.md       | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/docs/source/distributions/kubernetes_deployment.md b/docs/source/distributions/kubernetes_deployment.md
index 8ff3f0408..2daf9d785 100644
--- a/docs/source/distributions/kubernetes_deployment.md
+++ b/docs/source/distributions/kubernetes_deployment.md
@@ -7,13 +7,13 @@ In this guide, we'll use a local [Kind](https://kind.sigs.k8s.io/) cluster and a
 
 First, create a local Kubernetes cluster via Kind:
 
-```bash
+```
 kind create cluster --image kindest/node:v1.32.0 --name llama-stack-test
 ```
 
 First, create a Kubernetes PVC and Secret for downloading and storing Hugging Face model:
 
-```bash
+```
 cat <<EOF |kubectl apply -f -
 apiVersion: v1
 kind: PersistentVolumeClaim
@@ -39,7 +39,7 @@ data:
 
 Next, start the vLLM server as a Kubernetes Deployment and Service:
 
-```bash
+```
 cat <<EOF |kubectl apply -f -
 apiVersion: apps/v1
 kind: Deployment
@@ -95,7 +95,7 @@ EOF
 
 We can verify that the vLLM server has started successfully via the logs (this might take a couple of minutes to download the model):
 
-```bash
+```
 $ kubectl logs -l app.kubernetes.io/name=vllm
 ...
 INFO:     Started server process [1]
@@ -119,7 +119,7 @@ providers:
 
 Once we have defined the run configuration for Llama Stack, we can build an image with that configuration and the server source code:
 
-```bash
+```
 cat >/tmp/test-vllm-llama-stack/Containerfile.llama-stack-run-k8s <<EOF
 FROM distribution-myenv:dev
 
@@ -135,7 +135,7 @@ podman build -f /tmp/test-vllm-llama-stack/Containerfile.llama-stack-run-k8s -t
 
 We can then start the Llama Stack server by deploying a Kubernetes Pod and Service:
 
-```bash
+```
 cat <<EOF |kubectl apply -f -
 apiVersion: v1
 kind: PersistentVolumeClaim
@@ -195,7 +195,7 @@ EOF
 ### Verifying the Deployment
 We can check that the LlamaStack server has started:
 
-```bash
+```
 $ kubectl logs -l app.kubernetes.io/name=llama-stack
 ...
 INFO:     Started server process [1]
@@ -207,7 +207,7 @@ INFO:     Uvicorn running on http://['::', '0.0.0.0']:5000 (Press CTRL+C to quit
 
 Finally, we forward the Kubernetes service to a local port and test some inference requests against it via the Llama Stack Client:
 
-```bash
+```
 kubectl port-forward service/llama-stack-service 5000:5000
 llama-stack-client --endpoint http://localhost:5000 inference chat-completion --message "hello, what model are you?"
 ```

From 1be66d754e7fb4f8dcf35d388afbb8ddc85e7449 Mon Sep 17 00:00:00 2001
From: Yuan Tang <terrytangyuan@gmail.com>
Date: Thu, 10 Apr 2025 04:04:17 -0400
Subject: [PATCH 21/21] docs: Redirect instructions for additional hardware
 accelerators for remote vLLM provider (#1923)

# What does this PR do?

vLLM website just added a [new index page for installing for different
hardware
accelerators](https://docs.vllm.ai/en/latest/getting_started/installation.html).
This PR adds a link to that page with additional edits to make sure
readers are aware that the use of GPUs on this page are for
demonstration purposes only.

This closes https://github.com/meta-llama/llama-stack/issues/1813.

Signed-off-by: Yuan Tang <terrytangyuan@gmail.com>
---
 .../source/distributions/self_hosted_distro/remote-vllm.md | 7 +++++--
 llama_stack/templates/remote-vllm/doc_template.md          | 7 +++++--
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/docs/source/distributions/self_hosted_distro/remote-vllm.md b/docs/source/distributions/self_hosted_distro/remote-vllm.md
index 457d703b3..e18b5bf40 100644
--- a/docs/source/distributions/self_hosted_distro/remote-vllm.md
+++ b/docs/source/distributions/self_hosted_distro/remote-vllm.md
@@ -25,7 +25,7 @@ The `llamastack/distribution-remote-vllm` distribution consists of the following
 | vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
 
 
-You can use this distribution if you have GPUs and want to run an independent vLLM server container for running inference.
+You can use this distribution if you want to run an independent vLLM server for inference.
 
 ### Environment Variables
 
@@ -41,7 +41,10 @@ The following environment variables can be configured:
 
 ## Setting up vLLM server
 
-Both AMD and NVIDIA GPUs can serve as accelerators for the vLLM server, which acts as both the LLM inference provider and the safety provider.
+In the following sections, we'll use either AMD and NVIDIA GPUs to serve as hardware accelerators for the vLLM
+server, which acts as both the LLM inference provider and the safety provider. Note that vLLM also
+[supports many other hardware accelerators](https://docs.vllm.ai/en/latest/getting_started/installation.html) and
+that we only use GPUs here for demonstration purposes.
 
 ### Setting up vLLM server on AMD GPU
 
diff --git a/llama_stack/templates/remote-vllm/doc_template.md b/llama_stack/templates/remote-vllm/doc_template.md
index 7543e8239..efcdb62c6 100644
--- a/llama_stack/templates/remote-vllm/doc_template.md
+++ b/llama_stack/templates/remote-vllm/doc_template.md
@@ -13,7 +13,7 @@ The `llamastack/distribution-{{ name }}` distribution consists of the following
 
 {{ providers_table }}
 
-You can use this distribution if you have GPUs and want to run an independent vLLM server container for running inference.
+You can use this distribution if you want to run an independent vLLM server for inference.
 
 {% if run_config_env_vars %}
 ### Environment Variables
@@ -28,7 +28,10 @@ The following environment variables can be configured:
 
 ## Setting up vLLM server
 
-Both AMD and NVIDIA GPUs can serve as accelerators for the vLLM server, which acts as both the LLM inference provider and the safety provider.
+In the following sections, we'll use either AMD and NVIDIA GPUs to serve as hardware accelerators for the vLLM
+server, which acts as both the LLM inference provider and the safety provider. Note that vLLM also
+[supports many other hardware accelerators](https://docs.vllm.ai/en/latest/getting_started/installation.html) and
+that we only use GPUs here for demonstration purposes.
 
 ### Setting up vLLM server on AMD GPU