diff --git a/tests/verifications/REPORT.md b/tests/verifications/REPORT.md
index 449499382..2309c6404 100644
--- a/tests/verifications/REPORT.md
+++ b/tests/verifications/REPORT.md
@@ -1,6 +1,6 @@
 # Test Results Report
 
-*Generated on: 2025-04-09 22:52:19*
+*Generated on: 2025-04-10 16:48:18*
 
 *This report was generated by running `python tests/verifications/generate_report.py`*
 
@@ -15,15 +15,15 @@
 
 | Provider | Pass Rate | Tests Passed | Total Tests |
 | --- | --- | --- | --- |
-| Together | 67.7% | 21 | 31 |
-| Fireworks | 90.3% | 28 | 31 |
-| Openai | 100.0% | 22 | 22 |
+| Together | 64.7% | 22 | 34 |
+| Fireworks | 82.4% | 28 | 34 |
+| Openai | 100.0% | 24 | 24 |
 
 
 
 ## Together
 
-*Tests run on: 2025-04-09 22:50:58*
+*Tests run on: 2025-04-10 16:46:35*
 
 ```bash
 # Run all tests for this provider:
@@ -56,10 +56,11 @@ pytest tests/verifications/openai_api/test_chat_completion.py --provider=togethe
 | test_chat_streaming_image | ⚪ | ❌ | ❌ |
 | test_chat_streaming_structured_output (calendar) | ✅ | ❌ | ❌ |
 | test_chat_streaming_structured_output (math) | ✅ | ❌ | ❌ |
+| test_chat_streaming_tool_calling | ✅ | ❌ | ❌ |
 
 ## Fireworks
 
-*Tests run on: 2025-04-09 22:50:02*
+*Tests run on: 2025-04-10 16:44:44*
 
 ```bash
 # Run all tests for this provider:
@@ -92,10 +93,11 @@ pytest tests/verifications/openai_api/test_chat_completion.py --provider=firewor
 | test_chat_streaming_image | ⚪ | ✅ | ✅ |
 | test_chat_streaming_structured_output (calendar) | ✅ | ✅ | ✅ |
 | test_chat_streaming_structured_output (math) | ✅ | ✅ | ✅ |
+| test_chat_streaming_tool_calling | ❌ | ❌ | ❌ |
 
 ## Openai
 
-*Tests run on: 2025-04-09 22:51:44*
+*Tests run on: 2025-04-10 16:47:28*
 
 ```bash
 # Run all tests for this provider:
@@ -127,3 +129,4 @@ pytest tests/verifications/openai_api/test_chat_completion.py --provider=openai
 | test_chat_streaming_image | ✅ | ✅ |
 | test_chat_streaming_structured_output (calendar) | ✅ | ✅ |
 | test_chat_streaming_structured_output (math) | ✅ | ✅ |
+| test_chat_streaming_tool_calling | ✅ | ✅ |
diff --git a/tests/verifications/generate_report.py b/tests/verifications/generate_report.py
index 1c760ca19..6a7c39ee2 100755
--- a/tests/verifications/generate_report.py
+++ b/tests/verifications/generate_report.py
@@ -77,8 +77,9 @@ def run_tests(provider, keyword=None):
     print(f"Running tests for provider: {provider}")
 
     timestamp = int(time.time())
-    result_file = RESULTS_DIR / f"{provider}_{timestamp}.json"
-    temp_json_file = RESULTS_DIR / f"temp_{provider}_{timestamp}.json"
+    # Use a constant filename for the final result and temp file
+    result_file = RESULTS_DIR / f"{provider}.json"
+    temp_json_file = RESULTS_DIR / f"temp_{provider}.json"
 
     # Determine project root directory relative to this script
     project_root = Path(__file__).parent.parent.parent
@@ -106,11 +107,12 @@ def run_tests(provider, keyword=None):
 
         # Check if the JSON file was created
         if temp_json_file.exists():
-            # Read the JSON file and save it to our results format
             with open(temp_json_file, "r") as f:
                 test_results = json.load(f)
 
-            # Save results to our own format with a trailing newline
+            test_results["run_timestamp"] = timestamp
+
+            # Save results to the final (overwritten) file
             with open(result_file, "w") as f:
                 json.dump(test_results, f, indent=2)
                 f.write("\n")  # Add a trailing newline for precommit
@@ -132,7 +134,7 @@ def run_tests(provider, keyword=None):
 
 def parse_results(
     result_file,
-) -> Tuple[DefaultDict[str, DefaultDict[str, Dict[str, bool]]], DefaultDict[str, Set[str]], Set[str]]:
+) -> Tuple[DefaultDict[str, DefaultDict[str, Dict[str, bool]]], DefaultDict[str, Set[str]], Set[str], str]:
     """Parse a single test results file.
 
     Returns:
@@ -140,11 +142,12 @@ def parse_results(
         - parsed_results: DefaultDict[provider, DefaultDict[model, Dict[test_name, pass_status]]]
         - providers_in_file: DefaultDict[provider, Set[model]] found in this file.
         - tests_in_file: Set[test_name] found in this file.
+        - run_timestamp: Timestamp when the test was run
     """
     if not os.path.exists(result_file):
         print(f"Results file does not exist: {result_file}")
         # Return empty defaultdicts/set matching the type hint
-        return defaultdict(lambda: defaultdict(dict)), defaultdict(set), set()
+        return defaultdict(lambda: defaultdict(dict)), defaultdict(set), set(), ""
 
     with open(result_file, "r") as f:
         results = json.load(f)
@@ -153,7 +156,16 @@ def parse_results(
     parsed_results: DefaultDict[str, DefaultDict[str, Dict[str, bool]]] = defaultdict(lambda: defaultdict(dict))
     providers_in_file: DefaultDict[str, Set[str]] = defaultdict(set)
     tests_in_file: Set[str] = set()
-    provider: str = os.path.basename(result_file).split("_")[0]
+    # Extract provider from filename (e.g., "openai.json" -> "openai")
+    provider: str = result_file.stem
+
+    # Extract run timestamp from the JSON data
+    run_timestamp_unix = results.get("run_timestamp")
+    run_timestamp_str = (
+        time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(run_timestamp_unix))
+        if run_timestamp_unix is not None
+        else "Unknown"
+    )
 
     # Debug: Print summary of test results
     print(f"Test results summary for {provider}:")
@@ -167,7 +179,7 @@ def parse_results(
     if "tests" not in results or not results["tests"]:
         print(f"No test results found in {result_file}")
         # Return empty defaultdicts/set matching the type hint
-        return defaultdict(lambda: defaultdict(dict)), defaultdict(set), set()
+        return defaultdict(lambda: defaultdict(dict)), defaultdict(set), set(), ""
 
     # Process the tests
     for test in results["tests"]:
@@ -225,59 +237,29 @@ def parse_results(
     if not parsed_results.get(provider):
         print(f"Warning: No valid test results parsed for provider {provider} from file {result_file}")
 
-    return parsed_results, providers_in_file, tests_in_file
+    return parsed_results, providers_in_file, tests_in_file, run_timestamp_str
 
 
-def cleanup_old_results(providers_to_clean: Dict[str, Set[str]]):
-    """Clean up old test result files, keeping only the newest N per provider."""
-    # Use the passed-in providers dictionary
-    for provider in providers_to_clean.keys():
-        # Get all result files for this provider
-        provider_files = list(RESULTS_DIR.glob(f"{provider}_*.json"))
-
-        # Sort by timestamp (newest first)
-        provider_files.sort(key=lambda x: int(x.stem.split("_")[1]), reverse=True)
-
-        # Remove old files beyond the max to keep
-        if len(provider_files) > MAX_RESULTS_PER_PROVIDER:
-            for old_file in provider_files[MAX_RESULTS_PER_PROVIDER:]:
-                try:
-                    old_file.unlink()
-                    print(f"Removed old result file: {old_file}")
-                except Exception as e:
-                    print(f"Error removing file {old_file}: {e}")
-
-
-def get_latest_results_by_provider():
-    """Get the latest test result file for each provider"""
+def get_all_result_files_by_provider():
+    """Get all test result files, keyed by provider."""
     provider_results = {}
 
-    # Get all result files
     result_files = list(RESULTS_DIR.glob("*.json"))
 
-    # Extract all provider names from filenames
-    all_providers = set()
     for file in result_files:
-        # File format is provider_timestamp.json
-        parts = file.stem.split("_")
-        if len(parts) >= 2:
-            all_providers.add(parts[0])
-
-    # Group by provider
-    for provider in all_providers:
-        provider_files = [f for f in result_files if f.name.startswith(f"{provider}_")]
-
-        # Sort by timestamp (newest first)
-        provider_files.sort(key=lambda x: int(x.stem.split("_")[1]), reverse=True)
-
-        if provider_files:
-            provider_results[provider] = provider_files[0]
+        provider = file.stem
+        if provider:
+            provider_results[provider] = file
 
     return provider_results
 
 
 def generate_report(
-    results_dict: Dict[str, Any], providers: Dict[str, Set[str]], all_tests: Set[str], output_file=None
+    results_dict: Dict[str, Any],
+    providers: Dict[str, Set[str]],
+    all_tests: Set[str],
+    provider_timestamps: Dict[str, str],
+    output_file=None,
 ):
     """Generate the markdown report.
 
@@ -285,6 +267,7 @@ def generate_report(
         results_dict: Aggregated results [provider][model][test_name] -> status.
         providers: Dict of all providers and their models {provider: {models}}.
         all_tests: Set of all test names found.
+        provider_timestamps: Dict of provider to timestamp when tests were run
         output_file: Optional path to save the report.
     """
     if output_file is None:
@@ -293,19 +276,6 @@ def generate_report(
     else:
         output_file = Path(output_file)
 
-    # Get the timestamp from result files
-    provider_timestamps = {}
-    provider_results_files = get_latest_results_by_provider()
-    for provider, result_file in provider_results_files.items():
-        # Extract timestamp from filename (format: provider_timestamp.json)
-        try:
-            timestamp_str = result_file.stem.split("_")[1]
-            timestamp = int(timestamp_str)
-            formatted_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(timestamp))
-            provider_timestamps[provider] = formatted_time
-        except (IndexError, ValueError):
-            provider_timestamps[provider] = "Unknown"
-
     # Convert provider model sets to sorted lists (use passed-in providers dict)
     providers_sorted = {prov: sorted(models) for prov, models in providers.items()}
 
@@ -416,7 +386,7 @@ def generate_report(
         else:
             example_base_test_name = first_test_name
 
-        base_name = base_test_name_map.get(test, test)  # Get base name
+        base_name = base_test_name_map.get(first_test_name, first_test_name)  # Get base name
         case_count = base_test_case_counts.get(base_name, 1)  # Get count
         filter_str = f"{example_base_test_name} and {example_case_id}" if case_count > 1 else example_base_test_name
 
@@ -491,6 +461,7 @@ def main():
     # Initialize collections to aggregate results in main
     aggregated_providers = defaultdict(set)
     aggregated_tests = set()
+    provider_timestamps = {}
 
     if args.run_tests:
         # Get list of available providers from command line or use detected providers
@@ -512,28 +483,28 @@ def main():
             result_file = run_tests(provider, keyword=args.k)
             if result_file:
                 # Parse and aggregate results
-                parsed_results, providers_in_file, tests_in_file = parse_results(result_file)
+                parsed_results, providers_in_file, tests_in_file, run_timestamp = parse_results(result_file)
                 all_results.update(parsed_results)
                 for prov, models in providers_in_file.items():
                     aggregated_providers[prov].update(models)
+                    if run_timestamp:
+                        provider_timestamps[prov] = run_timestamp
                 aggregated_tests.update(tests_in_file)
     else:
         # Use existing results
-        provider_result_files = get_latest_results_by_provider()
+        provider_result_files = get_all_result_files_by_provider()
 
         for result_file in provider_result_files.values():
             # Parse and aggregate results
-            parsed_results, providers_in_file, tests_in_file = parse_results(result_file)
+            parsed_results, providers_in_file, tests_in_file, run_timestamp = parse_results(result_file)
             all_results.update(parsed_results)
             for prov, models in providers_in_file.items():
                 aggregated_providers[prov].update(models)
+                if run_timestamp:
+                    provider_timestamps[prov] = run_timestamp
             aggregated_tests.update(tests_in_file)
 
-    # Generate the report, passing aggregated data
-    generate_report(all_results, aggregated_providers, aggregated_tests, args.output)
-
-    # Cleanup, passing aggregated providers
-    cleanup_old_results(aggregated_providers)
+    generate_report(all_results, aggregated_providers, aggregated_tests, provider_timestamps, args.output)
 
 
 if __name__ == "__main__":
diff --git a/tests/verifications/test_results/fireworks_1744264202.json b/tests/verifications/test_results/fireworks.json
similarity index 68%
rename from tests/verifications/test_results/fireworks_1744264202.json
rename to tests/verifications/test_results/fireworks.json
index d14738be9..061e44c08 100644
--- a/tests/verifications/test_results/fireworks_1744264202.json
+++ b/tests/verifications/test_results/fireworks.json
@@ -1,15 +1,15 @@
 {
-  "created": 1744264258.730061,
-  "duration": 53.86071586608887,
+  "created": 1744328795.171092,
+  "duration": 107.57908606529236,
   "exitcode": 1,
   "root": "/Users/erichuang/projects/llama-stack",
   "environment": {},
   "summary": {
     "passed": 28,
     "skipped": 2,
-    "failed": 3,
-    "total": 33,
-    "collected": 33
+    "failed": 6,
+    "total": 36,
+    "collected": 36
   },
   "collectors": [
     {
@@ -29,167 +29,182 @@
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]",
           "type": "Function",
-          "lineno": 72
+          "lineno": 73
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]",
           "type": "Function",
-          "lineno": 72
+          "lineno": 73
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]",
           "type": "Function",
-          "lineno": 72
+          "lineno": 73
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]",
           "type": "Function",
-          "lineno": 72
+          "lineno": 73
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]",
           "type": "Function",
-          "lineno": 72
+          "lineno": 73
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]",
           "type": "Function",
-          "lineno": 72
+          "lineno": 73
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]",
           "type": "Function",
-          "lineno": 91
+          "lineno": 92
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]",
           "type": "Function",
-          "lineno": 91
+          "lineno": 92
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]",
           "type": "Function",
-          "lineno": 91
+          "lineno": 92
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]",
           "type": "Function",
-          "lineno": 91
+          "lineno": 92
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]",
           "type": "Function",
-          "lineno": 91
+          "lineno": 92
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]",
           "type": "Function",
-          "lineno": 91
+          "lineno": 92
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
           "type": "Function",
-          "lineno": 115
+          "lineno": 116
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
           "type": "Function",
-          "lineno": 115
+          "lineno": 116
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
           "type": "Function",
-          "lineno": 115
+          "lineno": 116
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
           "type": "Function",
-          "lineno": 134
+          "lineno": 135
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
           "type": "Function",
-          "lineno": 134
+          "lineno": 135
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
           "type": "Function",
-          "lineno": 134
+          "lineno": 135
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]",
           "type": "Function",
-          "lineno": 158
+          "lineno": 159
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]",
           "type": "Function",
-          "lineno": 158
+          "lineno": 159
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]",
           "type": "Function",
-          "lineno": 158
+          "lineno": 159
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]",
           "type": "Function",
-          "lineno": 158
+          "lineno": 159
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]",
           "type": "Function",
-          "lineno": 158
+          "lineno": 159
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]",
           "type": "Function",
-          "lineno": 158
+          "lineno": 159
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]",
           "type": "Function",
-          "lineno": 181
+          "lineno": 182
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]",
           "type": "Function",
-          "lineno": 181
+          "lineno": 182
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]",
           "type": "Function",
-          "lineno": 181
+          "lineno": 182
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]",
           "type": "Function",
-          "lineno": 181
+          "lineno": 182
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]",
           "type": "Function",
-          "lineno": 181
+          "lineno": 182
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]",
           "type": "Function",
-          "lineno": 181
+          "lineno": 182
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
           "type": "Function",
-          "lineno": 203
+          "lineno": 204
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
           "type": "Function",
-          "lineno": 203
+          "lineno": 204
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
           "type": "Function",
-          "lineno": 203
+          "lineno": 204
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
+          "type": "Function",
+          "lineno": 228
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
+          "type": "Function",
+          "lineno": 228
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
+          "type": "Function",
+          "lineno": 228
         }
       ]
     }
@@ -197,7 +212,7 @@
   "tests": [
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]",
-      "lineno": 72,
+      "lineno": 73,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]",
@@ -216,21 +231,21 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.05236550001427531,
+        "duration": 0.2175025000469759,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.5364967910572886,
+        "duration": 0.7433859170414507,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00015075004193931818,
+        "duration": 0.0001592918997630477,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]",
-      "lineno": 72,
+      "lineno": 73,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]",
@@ -249,21 +264,21 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.00699599995277822,
+        "duration": 0.007383499993011355,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.5843954589217901,
+        "duration": 0.5949292909353971,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0003858329728245735,
+        "duration": 0.00015891704242676497,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]",
-      "lineno": 72,
+      "lineno": 73,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]",
@@ -282,21 +297,21 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.009176500025205314,
+        "duration": 0.010730999987572432,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.9258683329680935,
+        "duration": 0.8945954169612378,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00015787500888109207,
+        "duration": 0.0003751249751076102,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]",
-      "lineno": 72,
+      "lineno": 73,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]",
@@ -315,21 +330,21 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.011275375029072165,
+        "duration": 0.01665666699409485,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.6890578339807689,
+        "duration": 0.907927209045738,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0004926669644191861,
+        "duration": 0.00024874997325241566,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]",
-      "lineno": 72,
+      "lineno": 73,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]",
@@ -348,21 +363,21 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.007520624902099371,
+        "duration": 0.01039199996739626,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.6675686669768766,
+        "duration": 0.5971567500382662,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00016137503553181887,
+        "duration": 0.0003488330403342843,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]",
-      "lineno": 72,
+      "lineno": 73,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]",
@@ -381,21 +396,21 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.0076431670458987355,
+        "duration": 0.018627874902449548,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.6813415409997106,
+        "duration": 2.0586736251134425,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0004928340204060078,
+        "duration": 0.00046974990982562304,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]",
-      "lineno": 91,
+      "lineno": 92,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]",
@@ -414,21 +429,21 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.01302404107991606,
+        "duration": 0.01706262503284961,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.3206909999717027,
+        "duration": 0.6679969580145553,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0002220839960500598,
+        "duration": 0.0004670419730246067,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]",
-      "lineno": 91,
+      "lineno": 92,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]",
@@ -447,21 +462,21 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.0071772499941289425,
+        "duration": 0.025956374942325056,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.4109888339880854,
+        "duration": 2.052679874934256,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0005431669997051358,
+        "duration": 0.00026958296075463295,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]",
-      "lineno": 91,
+      "lineno": 92,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]",
@@ -480,21 +495,21 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.012043708004057407,
+        "duration": 0.015856957994401455,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.4509220840409398,
+        "duration": 0.3096678329166025,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00016408402007073164,
+        "duration": 0.0007620420074090362,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]",
-      "lineno": 91,
+      "lineno": 92,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]",
@@ -513,21 +528,21 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.007165874936617911,
+        "duration": 0.013509334065020084,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.6527335830032825,
+        "duration": 0.5914681670255959,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0006419579731300473,
+        "duration": 0.0002906669396907091,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]",
-      "lineno": 91,
+      "lineno": 92,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]",
@@ -546,21 +561,21 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.007546542095951736,
+        "duration": 0.013216375024057925,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.9360042089829221,
+        "duration": 1.8804527079919353,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00020483299158513546,
+        "duration": 0.0002026669681072235,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]",
-      "lineno": 91,
+      "lineno": 92,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]",
@@ -579,21 +594,21 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.046697250101715326,
+        "duration": 0.00827441702131182,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.668349124956876,
+        "duration": 0.7407040420221165,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0005031249020248652,
+        "duration": 0.0005084159784018993,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
-      "lineno": 115,
+      "lineno": 116,
       "outcome": "skipped",
       "keywords": [
         "test_chat_non_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
@@ -612,22 +627,22 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.012287458986975253,
+        "duration": 0.012424499960616231,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.00015287497080862522,
+        "duration": 0.00032762496266514063,
         "outcome": "skipped",
-        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 124, 'Skipped: Skipping test_chat_non_streaming_image for model accounts/fireworks/models/llama-v3p3-70b-instruct on provider fireworks based on config.')"
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 125, 'Skipped: Skipping test_chat_non_streaming_image for model accounts/fireworks/models/llama-v3p3-70b-instruct on provider fireworks based on config.')"
       },
       "teardown": {
-        "duration": 0.00012162502389401197,
+        "duration": 0.00032416603062301874,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
-      "lineno": 115,
+      "lineno": 116,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
@@ -646,21 +661,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.007204124936833978,
+        "duration": 0.02253958396613598,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.8676417920505628,
+        "duration": 2.64042466704268,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0001557499635964632,
+        "duration": 0.0003636250039562583,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
-      "lineno": 115,
+      "lineno": 116,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
@@ -679,21 +694,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.008226625039242208,
+        "duration": 0.014634749968536198,
         "outcome": "passed"
       },
       "call": {
-        "duration": 3.2724285409785807,
+        "duration": 5.126485540997237,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0002898330567404628,
+        "duration": 0.0002988330088555813,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
-      "lineno": 134,
+      "lineno": 135,
       "outcome": "skipped",
       "keywords": [
         "test_chat_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
@@ -712,22 +727,22 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.011927249957807362,
+        "duration": 0.015854416065849364,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.00017358292825520039,
+        "duration": 0.00038058299105614424,
         "outcome": "skipped",
-        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 143, 'Skipped: Skipping test_chat_streaming_image for model accounts/fireworks/models/llama-v3p3-70b-instruct on provider fireworks based on config.')"
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 144, 'Skipped: Skipping test_chat_streaming_image for model accounts/fireworks/models/llama-v3p3-70b-instruct on provider fireworks based on config.')"
       },
       "teardown": {
-        "duration": 0.00014037499204277992,
+        "duration": 0.0002689170651137829,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
-      "lineno": 134,
+      "lineno": 135,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
@@ -746,21 +761,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.008731417008675635,
+        "duration": 0.011205915943719447,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.8333610829431564,
+        "duration": 3.2596546669956297,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0005132080987095833,
+        "duration": 0.0006222500232979655,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
-      "lineno": 134,
+      "lineno": 135,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
@@ -779,21 +794,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.016569208004511893,
+        "duration": 0.016557667055167258,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.302010750048794,
+        "duration": 4.930164708988741,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00016108399722725153,
+        "duration": 0.00048687495291233063,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]",
-      "lineno": 158,
+      "lineno": 159,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]",
@@ -812,21 +827,21 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.039960999973118305,
+        "duration": 0.00886166701093316,
         "outcome": "passed"
       },
       "call": {
-        "duration": 7.661373125039972,
+        "duration": 0.8833738330285996,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00015833403449505568,
+        "duration": 0.00025583396200090647,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]",
-      "lineno": 158,
+      "lineno": 159,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]",
@@ -845,21 +860,21 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.006928625050932169,
+        "duration": 0.01297520799562335,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.762534625013359,
+        "duration": 1.9960687910206616,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0006561250193044543,
+        "duration": 0.0005048330640420318,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]",
-      "lineno": 158,
+      "lineno": 159,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]",
@@ -878,21 +893,21 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.008602249901741743,
+        "duration": 0.007275875075720251,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.8311484589939937,
+        "duration": 0.9094266659813002,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0005021670367568731,
+        "duration": 0.00028041598852723837,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]",
-      "lineno": 158,
+      "lineno": 159,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]",
@@ -911,21 +926,21 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.015500334091484547,
+        "duration": 0.008899332955479622,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.505719291046262,
+        "duration": 3.117967874975875,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0002619170118123293,
+        "duration": 0.00017600005958229303,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]",
-      "lineno": 158,
+      "lineno": 159,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]",
@@ -944,21 +959,21 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.01948041608557105,
+        "duration": 0.0073364999843761325,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.6336237500654534,
+        "duration": 2.2714374579954892,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00016637507360428572,
+        "duration": 0.0001814159331843257,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]",
-      "lineno": 158,
+      "lineno": 159,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]",
@@ -977,21 +992,21 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.006810749997384846,
+        "duration": 0.010546459001488984,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.9086956249084324,
+        "duration": 3.9954450000077486,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00018824997823685408,
+        "duration": 0.0002719159238040447,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]",
-      "lineno": 181,
+      "lineno": 182,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]",
@@ -1010,21 +1025,21 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.007881582947447896,
+        "duration": 0.012508000014349818,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.7142562499502674,
+        "duration": 9.095425167004578,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0007035828894004226,
+        "duration": 0.00029200001154094934,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]",
-      "lineno": 181,
+      "lineno": 182,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]",
@@ -1043,21 +1058,21 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.00848070892971009,
+        "duration": 0.014769250061362982,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.5210869159782305,
+        "duration": 1.9875252910424024,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00021216599270701408,
+        "duration": 0.0006288329605013132,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]",
-      "lineno": 181,
+      "lineno": 182,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]",
@@ -1076,21 +1091,21 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.009669666993431747,
+        "duration": 0.014440709026530385,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.3105999580584466,
+        "duration": 1.2613736250204965,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.000588166993111372,
+        "duration": 0.0001937919296324253,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]",
-      "lineno": 181,
+      "lineno": 182,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]",
@@ -1109,21 +1124,21 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.007745541981421411,
+        "duration": 0.0071510839043185115,
         "outcome": "passed"
       },
       "call": {
-        "duration": 3.250162083073519,
+        "duration": 2.2953888749470934,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0001455000601708889,
+        "duration": 0.00016245793085545301,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]",
-      "lineno": 181,
+      "lineno": 182,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]",
@@ -1142,21 +1157,21 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.009726207936182618,
+        "duration": 0.007294666953384876,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.5564592910232022,
+        "duration": 2.194703874993138,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00019470800179988146,
+        "duration": 0.00017604196909815073,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]",
-      "lineno": 181,
+      "lineno": 182,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]",
@@ -1175,21 +1190,21 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.018431040924042463,
+        "duration": 0.019950625021010637,
         "outcome": "passed"
       },
       "call": {
-        "duration": 3.8501765420660377,
+        "duration": 8.4994609169662,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00015279196668416262,
+        "duration": 0.00026404205709695816,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
-      "lineno": 203,
+      "lineno": 204,
       "outcome": "failed",
       "keywords": [
         "test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
@@ -1208,34 +1223,34 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.007509749964810908,
+        "duration": 0.011928000021725893,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.4906975000631064,
+        "duration": 0.5664792089955881,
         "outcome": "failed",
         "crash": {
           "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 222,
+          "lineno": 223,
           "message": "TypeError: object of type 'NoneType' has no len()"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 222,
+            "lineno": 223,
             "message": "TypeError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]>>\nopenai_client = <openai.OpenAI object at 0x117081540>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=False,\n        )\n    \n        assert response.choices[0].message.role == \"assistant\"\n>       assert len(response.choices[0].message.tool_calls) > 0\nE       TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:222: TypeError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]>>\nopenai_client = <openai.OpenAI object at 0x123a6dcf0>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=False,\n        )\n    \n        assert response.choices[0].message.role == \"assistant\"\n>       assert len(response.choices[0].message.tool_calls) > 0\nE       TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:223: TypeError"
       },
       "teardown": {
-        "duration": 0.00023995805531740189,
+        "duration": 0.00023799994960427284,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
-      "lineno": 203,
+      "lineno": 204,
       "outcome": "failed",
       "keywords": [
         "test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
@@ -1254,34 +1269,34 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.007144959061406553,
+        "duration": 0.006813624990172684,
         "outcome": "passed"
       },
       "call": {
-        "duration": 3.818257624981925,
+        "duration": 3.170418416033499,
         "outcome": "failed",
         "crash": {
           "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 222,
+          "lineno": 223,
           "message": "TypeError: object of type 'NoneType' has no len()"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 222,
+            "lineno": 223,
             "message": "TypeError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]>>\nopenai_client = <openai.OpenAI object at 0x11701c1c0>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=False,\n        )\n    \n        assert response.choices[0].message.role == \"assistant\"\n>       assert len(response.choices[0].message.tool_calls) > 0\nE       TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:222: TypeError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]>>\nopenai_client = <openai.OpenAI object at 0x123af3ac0>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=False,\n        )\n    \n        assert response.choices[0].message.role == \"assistant\"\n>       assert len(response.choices[0].message.tool_calls) > 0\nE       TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:223: TypeError"
       },
       "teardown": {
-        "duration": 0.0002668750239536166,
+        "duration": 0.0004129580920562148,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
-      "lineno": 203,
+      "lineno": 204,
       "outcome": "failed",
       "keywords": [
         "test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
@@ -1300,30 +1315,169 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.015290249953977764,
+        "duration": 0.01656208303757012,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.5883799999719486,
+        "duration": 22.76337137504015,
         "outcome": "failed",
         "crash": {
           "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 222,
+          "lineno": 223,
           "message": "TypeError: object of type 'NoneType' has no len()"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 222,
+            "lineno": 223,
             "message": "TypeError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]>>\nopenai_client = <openai.OpenAI object at 0x115f17fa0>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=False,\n        )\n    \n        assert response.choices[0].message.role == \"assistant\"\n>       assert len(response.choices[0].message.tool_calls) > 0\nE       TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:222: TypeError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]>>\nopenai_client = <openai.OpenAI object at 0x1231f0460>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=False,\n        )\n    \n        assert response.choices[0].message.role == \"assistant\"\n>       assert len(response.choices[0].message.tool_calls) > 0\nE       TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:223: TypeError"
       },
       "teardown": {
-        "duration": 0.0008049579337239265,
+        "duration": 0.00038704206235706806,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
+      "lineno": 228,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama-v3p3-70b-instruct-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.015727541991509497,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.5719050420448184,
+        "outcome": "failed",
+        "crash": {
+          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 274,
+          "message": "assert 0 == 1\n +  where 0 = len({})"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 274,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]>>\nopenai_client = <openai.OpenAI object at 0x122ef2bc0>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=True,\n        )\n    \n        # Accumulate partial tool_calls here\n        tool_calls_buffer = {}\n        current_id = None\n        # Process streaming chunks\n        for chunk in stream:\n            choice = chunk.choices[0]\n            delta = choice.delta\n    \n            if delta.tool_calls is None:\n                continue\n    \n            for tool_call_delta in delta.tool_calls:\n                if tool_call_delta.id:\n                    current_id = tool_call_delta.id\n                call_id = current_id\n                func_delta = tool_call_delta.function\n    \n                if call_id not in tool_calls_buffer:\n                    tool_calls_buffer[call_id] = {\n                        \"id\": call_id,\n                        \"type\": tool_call_delta.type,\n                        \"name\": func_delta.name,\n                        \"arguments\": \"\",\n                    }\n    \n                if func_delta.arguments:\n                    tool_calls_buffer[call_id][\"arguments\"] += func_delta.arguments\n    \n>       assert len(tool_calls_buffer) == 1\nE       assert 0 == 1\nE        +  where 0 = len({})\n\ntests/verifications/openai_api/test_chat_completion.py:274: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.0003532909322530031,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
+      "lineno": 228,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama4-scout-instruct-basic-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama4-scout-instruct-basic",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.011914041941054165,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 5.403063916950487,
+        "outcome": "failed",
+        "crash": {
+          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 274,
+          "message": "assert 0 == 1\n +  where 0 = len({})"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 274,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]>>\nopenai_client = <openai.OpenAI object at 0x1231f19c0>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=True,\n        )\n    \n        # Accumulate partial tool_calls here\n        tool_calls_buffer = {}\n        current_id = None\n        # Process streaming chunks\n        for chunk in stream:\n            choice = chunk.choices[0]\n            delta = choice.delta\n    \n            if delta.tool_calls is None:\n                continue\n    \n            for tool_call_delta in delta.tool_calls:\n                if tool_call_delta.id:\n                    current_id = tool_call_delta.id\n                call_id = current_id\n                func_delta = tool_call_delta.function\n    \n                if call_id not in tool_calls_buffer:\n                    tool_calls_buffer[call_id] = {\n                        \"id\": call_id,\n                        \"type\": tool_call_delta.type,\n                        \"name\": func_delta.name,\n                        \"arguments\": \"\",\n                    }\n    \n                if func_delta.arguments:\n                    tool_calls_buffer[call_id][\"arguments\"] += func_delta.arguments\n    \n>       assert len(tool_calls_buffer) == 1\nE       assert 0 == 1\nE        +  where 0 = len({})\n\ntests/verifications/openai_api/test_chat_completion.py:274: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.0005193749675527215,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
+      "lineno": 228,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
+        "parametrize",
+        "pytestmark",
+        "accounts/fireworks/models/llama4-maverick-instruct-basic-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.012608832912519574,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 7.587262416025624,
+        "outcome": "failed",
+        "crash": {
+          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 274,
+          "message": "assert 0 == 1\n +  where 0 = len({})"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 274,
+            "message": "AssertionError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]>>\nopenai_client = <openai.OpenAI object at 0x123a56d70>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=True,\n        )\n    \n        # Accumulate partial tool_calls here\n        tool_calls_buffer = {}\n        current_id = None\n        # Process streaming chunks\n        for chunk in stream:\n            choice = chunk.choices[0]\n            delta = choice.delta\n    \n            if delta.tool_calls is None:\n                continue\n    \n            for tool_call_delta in delta.tool_calls:\n                if tool_call_delta.id:\n                    current_id = tool_call_delta.id\n                call_id = current_id\n                func_delta = tool_call_delta.function\n    \n                if call_id not in tool_calls_buffer:\n                    tool_calls_buffer[call_id] = {\n                        \"id\": call_id,\n                        \"type\": tool_call_delta.type,\n                        \"name\": func_delta.name,\n                        \"arguments\": \"\",\n                    }\n    \n                if func_delta.arguments:\n                    tool_calls_buffer[call_id][\"arguments\"] += func_delta.arguments\n    \n>       assert len(tool_calls_buffer) == 1\nE       assert 0 == 1\nE        +  where 0 = len({})\n\ntests/verifications/openai_api/test_chat_completion.py:274: AssertionError"
+      },
+      "teardown": {
+        "duration": 0.0008685829816386104,
         "outcome": "passed"
       }
     }
-  ]
+  ],
+  "run_timestamp": 1744328684
 }
diff --git a/tests/verifications/test_results/openai_1744264304.json b/tests/verifications/test_results/openai.json
similarity index 77%
rename from tests/verifications/test_results/openai_1744264304.json
rename to tests/verifications/test_results/openai.json
index fe9c2fcac..0c1892f7e 100644
--- a/tests/verifications/test_results/openai_1744264304.json
+++ b/tests/verifications/test_results/openai.json
@@ -1,13 +1,13 @@
 {
-  "created": 1744264338.9923031,
-  "duration": 32.825536012649536,
+  "created": 1744328898.0248861,
+  "duration": 47.561042070388794,
   "exitcode": 0,
   "root": "/Users/erichuang/projects/llama-stack",
   "environment": {},
   "summary": {
-    "passed": 22,
-    "total": 22,
-    "collected": 22
+    "passed": 24,
+    "total": 24,
+    "collected": 24
   },
   "collectors": [
     {
@@ -27,112 +27,122 @@
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-earth]",
           "type": "Function",
-          "lineno": 72
+          "lineno": 73
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-saturn]",
           "type": "Function",
-          "lineno": 72
+          "lineno": 73
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-mini-earth]",
           "type": "Function",
-          "lineno": 72
+          "lineno": 73
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-mini-saturn]",
           "type": "Function",
-          "lineno": 72
+          "lineno": 73
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-earth]",
           "type": "Function",
-          "lineno": 91
+          "lineno": 92
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-saturn]",
           "type": "Function",
-          "lineno": 91
+          "lineno": 92
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-mini-earth]",
           "type": "Function",
-          "lineno": 91
+          "lineno": 92
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-mini-saturn]",
           "type": "Function",
-          "lineno": 91
+          "lineno": 92
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[gpt-4o-case0]",
           "type": "Function",
-          "lineno": 115
+          "lineno": 116
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[gpt-4o-mini-case0]",
           "type": "Function",
-          "lineno": 115
+          "lineno": 116
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[gpt-4o-case0]",
           "type": "Function",
-          "lineno": 134
+          "lineno": 135
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[gpt-4o-mini-case0]",
           "type": "Function",
-          "lineno": 134
+          "lineno": 135
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-calendar]",
           "type": "Function",
-          "lineno": 158
+          "lineno": 159
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-math]",
           "type": "Function",
-          "lineno": 158
+          "lineno": 159
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-mini-calendar]",
           "type": "Function",
-          "lineno": 158
+          "lineno": 159
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-mini-math]",
           "type": "Function",
-          "lineno": 158
+          "lineno": 159
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-calendar]",
           "type": "Function",
-          "lineno": 181
+          "lineno": 182
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-math]",
           "type": "Function",
-          "lineno": 181
+          "lineno": 182
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-mini-calendar]",
           "type": "Function",
-          "lineno": 181
+          "lineno": 182
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-mini-math]",
           "type": "Function",
-          "lineno": 181
+          "lineno": 182
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[gpt-4o-case0]",
           "type": "Function",
-          "lineno": 203
+          "lineno": 204
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[gpt-4o-mini-case0]",
           "type": "Function",
-          "lineno": 203
+          "lineno": 204
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[gpt-4o-case0]",
+          "type": "Function",
+          "lineno": 228
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[gpt-4o-mini-case0]",
+          "type": "Function",
+          "lineno": 228
         }
       ]
     }
@@ -140,7 +150,7 @@
   "tests": [
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-earth]",
-      "lineno": 72,
+      "lineno": 73,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[gpt-4o-earth]",
@@ -159,21 +169,21 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.05381445901002735,
+        "duration": 0.0694252080284059,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.49848275003023446,
+        "duration": 0.5709165419684723,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00018287496641278267,
+        "duration": 0.0007626248989254236,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-saturn]",
-      "lineno": 72,
+      "lineno": 73,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[gpt-4o-saturn]",
@@ -192,21 +202,21 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.007965500000864267,
+        "duration": 0.010281750001013279,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.9293275829404593,
+        "duration": 0.6309260830748826,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00018229195848107338,
+        "duration": 0.0001824579667299986,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-mini-earth]",
-      "lineno": 72,
+      "lineno": 73,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[gpt-4o-mini-earth]",
@@ -225,21 +235,21 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.00875679193995893,
+        "duration": 0.007922374992631376,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.5793640419142321,
+        "duration": 0.31756504194345325,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0005307920509949327,
+        "duration": 0.0005268750246614218,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-mini-saturn]",
-      "lineno": 72,
+      "lineno": 73,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[gpt-4o-mini-saturn]",
@@ -258,21 +268,21 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.01076845801435411,
+        "duration": 0.01643404201604426,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.8752291660057381,
+        "duration": 0.7479908330133185,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0004834589781239629,
+        "duration": 0.0004037501057609916,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-earth]",
-      "lineno": 91,
+      "lineno": 92,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_basic[gpt-4o-earth]",
@@ -291,21 +301,21 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.01662245800253004,
+        "duration": 0.021671707974746823,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.8336971249664202,
+        "duration": 0.6701172919711098,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0024086670018732548,
+        "duration": 0.0005569590721279383,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-saturn]",
-      "lineno": 91,
+      "lineno": 92,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_basic[gpt-4o-saturn]",
@@ -324,21 +334,21 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.009416291955858469,
+        "duration": 0.015847125090658665,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.43594495789147913,
+        "duration": 0.636536999954842,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0009131249971687794,
+        "duration": 0.00029395800083875656,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-mini-earth]",
-      "lineno": 91,
+      "lineno": 92,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_basic[gpt-4o-mini-earth]",
@@ -357,21 +367,21 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.013155042077414691,
+        "duration": 0.011792832985520363,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.6119836670113727,
+        "duration": 0.5610962919890881,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00023804197553545237,
+        "duration": 0.0003578749019652605,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-mini-saturn]",
-      "lineno": 91,
+      "lineno": 92,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_basic[gpt-4o-mini-saturn]",
@@ -390,21 +400,21 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.009004916995763779,
+        "duration": 0.016500207944773138,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.8327413749648258,
+        "duration": 0.8060244580265135,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00046841695439070463,
+        "duration": 0.0005296670133247972,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[gpt-4o-case0]",
-      "lineno": 115,
+      "lineno": 116,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_image[gpt-4o-case0]",
@@ -423,21 +433,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.009574208059348166,
+        "duration": 0.008338792016729712,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.221839000005275,
+        "duration": 7.009252917021513,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00015945907216519117,
+        "duration": 0.0003042910248041153,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[gpt-4o-mini-case0]",
-      "lineno": 115,
+      "lineno": 116,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_image[gpt-4o-mini-case0]",
@@ -456,21 +466,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.0084402080392465,
+        "duration": 0.007238540914840996,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.298736457945779,
+        "duration": 3.134693874977529,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0002423750702291727,
+        "duration": 0.0003104590578004718,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[gpt-4o-case0]",
-      "lineno": 134,
+      "lineno": 135,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_image[gpt-4o-case0]",
@@ -489,21 +499,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.007330416003242135,
+        "duration": 0.0161851670127362,
         "outcome": "passed"
       },
       "call": {
-        "duration": 4.062959833070636,
+        "duration": 3.0745719589758664,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00015470804646611214,
+        "duration": 0.00022620800882577896,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[gpt-4o-mini-case0]",
-      "lineno": 134,
+      "lineno": 135,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_image[gpt-4o-mini-case0]",
@@ -522,21 +532,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.019998832955025136,
+        "duration": 0.013220708002336323,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.609432084020227,
+        "duration": 3.624867417034693,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.005618917057290673,
+        "duration": 0.00020633300300687551,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-calendar]",
-      "lineno": 158,
+      "lineno": 159,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[gpt-4o-calendar]",
@@ -555,21 +565,21 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.00867662497330457,
+        "duration": 0.017596833989955485,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.6856697499752045,
+        "duration": 1.248568250099197,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00018445902969688177,
+        "duration": 0.0004248750628903508,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-math]",
-      "lineno": 158,
+      "lineno": 159,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[gpt-4o-math]",
@@ -588,21 +598,21 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.01139050000347197,
+        "duration": 0.01512012502644211,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.764390083961189,
+        "duration": 8.170285542029887,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0003164170775562525,
+        "duration": 0.00043537491001188755,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-mini-calendar]",
-      "lineno": 158,
+      "lineno": 159,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[gpt-4o-mini-calendar]",
@@ -621,21 +631,21 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.01321374997496605,
+        "duration": 0.010376665974035859,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.8284227909753099,
+        "duration": 0.756480542011559,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00030170800164341927,
+        "duration": 0.00025695806834846735,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-mini-math]",
-      "lineno": 158,
+      "lineno": 159,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[gpt-4o-mini-math]",
@@ -654,21 +664,21 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.013477458036504686,
+        "duration": 0.006846625008620322,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.4146235829684883,
+        "duration": 2.6833953330060467,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00025754200760275126,
+        "duration": 0.00022558309137821198,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-calendar]",
-      "lineno": 181,
+      "lineno": 182,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_structured_output[gpt-4o-calendar]",
@@ -687,21 +697,21 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.006940583931282163,
+        "duration": 0.009646040969528258,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.5102092920569703,
+        "duration": 0.6117532079806551,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00023379107005894184,
+        "duration": 0.00015258300118148327,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-math]",
-      "lineno": 181,
+      "lineno": 182,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_structured_output[gpt-4o-math]",
@@ -720,21 +730,21 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.007166999974288046,
+        "duration": 0.012024458032101393,
         "outcome": "passed"
       },
       "call": {
-        "duration": 3.5751801669830456,
+        "duration": 4.522625041077845,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00015041697770357132,
+        "duration": 0.0004230838967487216,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-mini-calendar]",
-      "lineno": 181,
+      "lineno": 182,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_structured_output[gpt-4o-mini-calendar]",
@@ -753,21 +763,21 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.010652625001966953,
+        "duration": 0.009566582972183824,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.6648182499920949,
+        "duration": 2.5591942919418216,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0008647920330986381,
+        "duration": 0.0007555419579148293,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-mini-math]",
-      "lineno": 181,
+      "lineno": 182,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_structured_output[gpt-4o-mini-math]",
@@ -786,21 +796,21 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.007372208056040108,
+        "duration": 0.010828875005245209,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.80747462506406,
+        "duration": 2.495122667052783,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00028124998789280653,
+        "duration": 0.0002802090020850301,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[gpt-4o-case0]",
-      "lineno": 203,
+      "lineno": 204,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_tool_calling[gpt-4o-case0]",
@@ -819,21 +829,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.01625587500166148,
+        "duration": 0.012762792059220374,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.6878769160248339,
+        "duration": 0.5655921660363674,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0002637499710544944,
+        "duration": 0.00022304197773337364,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[gpt-4o-mini-case0]",
-      "lineno": 203,
+      "lineno": 204,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_tool_calling[gpt-4o-mini-case0]",
@@ -852,17 +862,84 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.008817250025458634,
+        "duration": 0.03188708401285112,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.7181202919455245,
+        "duration": 0.6159415419679135,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0017147079342976213,
+        "duration": 0.0005549580091610551,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[gpt-4o-case0]",
+      "lineno": 228,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_tool_calling[gpt-4o-case0]",
+        "parametrize",
+        "pytestmark",
+        "gpt-4o-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "gpt-4o",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.014768208027817309,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.47373537498060614,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0005811670562252402,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[gpt-4o-mini-case0]",
+      "lineno": 228,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_tool_calling[gpt-4o-mini-case0]",
+        "parametrize",
+        "pytestmark",
+        "gpt-4o-mini-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "gpt-4o-mini",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.010271625011228025,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.5656027499353513,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.0025699170073494315,
         "outcome": "passed"
       }
     }
-  ]
+  ],
+  "run_timestamp": 1744328848
 }
diff --git a/tests/verifications/test_results/together_1744264258.json b/tests/verifications/test_results/together.json
similarity index 77%
rename from tests/verifications/test_results/together_1744264258.json
rename to tests/verifications/test_results/together.json
index c38dd52b5..2b23089e8 100644
--- a/tests/verifications/test_results/together_1744264258.json
+++ b/tests/verifications/test_results/together.json
@@ -1,15 +1,15 @@
 {
-  "created": 1744264304.064288,
-  "duration": 42.470197916030884,
+  "created": 1744328847.853437,
+  "duration": 49.9419469833374,
   "exitcode": 1,
   "root": "/Users/erichuang/projects/llama-stack",
   "environment": {},
   "summary": {
-    "passed": 21,
-    "failed": 10,
+    "passed": 22,
+    "failed": 12,
     "skipped": 2,
-    "total": 33,
-    "collected": 33
+    "total": 36,
+    "collected": 36
   },
   "collectors": [
     {
@@ -29,167 +29,182 @@
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]",
           "type": "Function",
-          "lineno": 72
+          "lineno": 73
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]",
           "type": "Function",
-          "lineno": 72
+          "lineno": 73
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
           "type": "Function",
-          "lineno": 72
+          "lineno": 73
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
           "type": "Function",
-          "lineno": 72
+          "lineno": 73
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]",
           "type": "Function",
-          "lineno": 72
+          "lineno": 73
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]",
           "type": "Function",
-          "lineno": 72
+          "lineno": 73
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]",
           "type": "Function",
-          "lineno": 91
+          "lineno": 92
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]",
           "type": "Function",
-          "lineno": 91
+          "lineno": 92
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
           "type": "Function",
-          "lineno": 91
+          "lineno": 92
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
           "type": "Function",
-          "lineno": 91
+          "lineno": 92
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]",
           "type": "Function",
-          "lineno": 91
+          "lineno": 92
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]",
           "type": "Function",
-          "lineno": 91
+          "lineno": 92
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
           "type": "Function",
-          "lineno": 115
+          "lineno": 116
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
           "type": "Function",
-          "lineno": 115
+          "lineno": 116
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
           "type": "Function",
-          "lineno": 115
+          "lineno": 116
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
           "type": "Function",
-          "lineno": 134
+          "lineno": 135
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
           "type": "Function",
-          "lineno": 134
+          "lineno": 135
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
           "type": "Function",
-          "lineno": 134
+          "lineno": 135
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]",
           "type": "Function",
-          "lineno": 158
+          "lineno": 159
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]",
           "type": "Function",
-          "lineno": 158
+          "lineno": 159
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
           "type": "Function",
-          "lineno": 158
+          "lineno": 159
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
           "type": "Function",
-          "lineno": 158
+          "lineno": 159
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]",
           "type": "Function",
-          "lineno": 158
+          "lineno": 159
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]",
           "type": "Function",
-          "lineno": 158
+          "lineno": 159
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]",
           "type": "Function",
-          "lineno": 181
+          "lineno": 182
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]",
           "type": "Function",
-          "lineno": 181
+          "lineno": 182
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
           "type": "Function",
-          "lineno": 181
+          "lineno": 182
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
           "type": "Function",
-          "lineno": 181
+          "lineno": 182
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]",
           "type": "Function",
-          "lineno": 181
+          "lineno": 182
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]",
           "type": "Function",
-          "lineno": 181
+          "lineno": 182
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
           "type": "Function",
-          "lineno": 203
+          "lineno": 204
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
           "type": "Function",
-          "lineno": 203
+          "lineno": 204
         },
         {
           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
           "type": "Function",
-          "lineno": 203
+          "lineno": 204
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
+          "type": "Function",
+          "lineno": 228
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+          "type": "Function",
+          "lineno": 228
+        },
+        {
+          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
+          "type": "Function",
+          "lineno": 228
         }
       ]
     }
@@ -197,7 +212,7 @@
   "tests": [
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]",
-      "lineno": 72,
+      "lineno": 73,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]",
@@ -216,21 +231,21 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.06113254197407514,
+        "duration": 0.15774220903404057,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.0720349580515176,
+        "duration": 0.5396400419995189,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00015966698992997408,
+        "duration": 0.0002977499971166253,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]",
-      "lineno": 72,
+      "lineno": 73,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]",
@@ -249,21 +264,21 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.006908083101734519,
+        "duration": 0.015632833004929125,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.5013210839824751,
+        "duration": 0.4675290420418605,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0005375830223783851,
+        "duration": 0.00029129208996891975,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
-      "lineno": 72,
+      "lineno": 73,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
@@ -282,21 +297,21 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.006910792086273432,
+        "duration": 0.01530187507160008,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.5142245410243049,
+        "duration": 0.501894542016089,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0004069580463692546,
+        "duration": 0.0002060839906334877,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
-      "lineno": 72,
+      "lineno": 73,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
@@ -315,21 +330,21 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.009730000048875809,
+        "duration": 0.014841833035461605,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.40133179200347513,
+        "duration": 0.4202229160582647,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0004558749496936798,
+        "duration": 0.0005559159908443689,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]",
-      "lineno": 72,
+      "lineno": 73,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]",
@@ -348,21 +363,21 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.008247417048551142,
+        "duration": 0.008204624988138676,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.7914331250358373,
+        "duration": 1.991508833016269,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00020262505859136581,
+        "duration": 0.000539042055606842,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]",
-      "lineno": 72,
+      "lineno": 73,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]",
@@ -381,21 +396,21 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.00922900007572025,
+        "duration": 0.022528667002916336,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.2742049579974264,
+        "duration": 0.37111237505450845,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.000688415952026844,
+        "duration": 0.0005334159359335899,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]",
-      "lineno": 91,
+      "lineno": 92,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]",
@@ -414,21 +429,21 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.006949124974198639,
+        "duration": 0.00922920904122293,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.4681705000111833,
+        "duration": 1.1684916669037193,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00017795804888010025,
+        "duration": 0.0002740409690886736,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]",
-      "lineno": 91,
+      "lineno": 92,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]",
@@ -447,21 +462,21 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.008564374991692603,
+        "duration": 0.010883333045057952,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.7430362500017509,
+        "duration": 0.4275277080014348,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00015312491450458765,
+        "duration": 0.00043112505227327347,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
-      "lineno": 91,
+      "lineno": 92,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
@@ -480,34 +495,34 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.007404124946333468,
+        "duration": 0.012945958063937724,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.515926624997519,
+        "duration": 0.5551295839250088,
         "outcome": "failed",
         "crash": {
           "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 109,
+          "lineno": 110,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 109,
+            "lineno": 110,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]>>\nopenai_client = <openai.OpenAI object at 0x1059ecfa0>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'earth', 'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:109: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]>>\nopenai_client = <openai.OpenAI object at 0x107df13c0>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'earth', 'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:110: IndexError"
       },
       "teardown": {
-        "duration": 0.0002389999572187662,
+        "duration": 0.0002744169905781746,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
-      "lineno": 91,
+      "lineno": 92,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
@@ -526,34 +541,34 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.0071305419551208615,
+        "duration": 0.017372542060911655,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.37054662499576807,
+        "duration": 0.3579877089941874,
         "outcome": "failed",
         "crash": {
           "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 109,
+          "lineno": 110,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 109,
+            "lineno": 110,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]>>\nopenai_client = <openai.OpenAI object at 0x105917430>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'saturn', 'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:109: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]>>\nopenai_client = <openai.OpenAI object at 0x107dd74f0>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'saturn', 'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:110: IndexError"
       },
       "teardown": {
-        "duration": 0.0006014580139890313,
+        "duration": 0.0005445419810712337,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]",
-      "lineno": 91,
+      "lineno": 92,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]",
@@ -572,34 +587,34 @@
         "case_id": "earth"
       },
       "setup": {
-        "duration": 0.007489709067158401,
+        "duration": 0.014297832967713475,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.7767745839664713,
+        "duration": 0.8067362919682637,
         "outcome": "failed",
         "crash": {
           "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 109,
+          "lineno": 110,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 109,
+            "lineno": 110,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]>>\nopenai_client = <openai.OpenAI object at 0x1072015a0>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'earth', 'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:109: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]>>\nopenai_client = <openai.OpenAI object at 0x110605a80>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'earth', 'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:110: IndexError"
       },
       "teardown": {
-        "duration": 0.00025491707492619753,
+        "duration": 0.0003220830112695694,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]",
-      "lineno": 91,
+      "lineno": 92,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]",
@@ -618,34 +633,34 @@
         "case_id": "saturn"
       },
       "setup": {
-        "duration": 0.006736499955877662,
+        "duration": 0.008816750021651387,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.43948554201051593,
+        "duration": 0.5383605000097305,
         "outcome": "failed",
         "crash": {
           "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 109,
+          "lineno": 110,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 109,
+            "lineno": 110,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]>>\nopenai_client = <openai.OpenAI object at 0x10726a620>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'saturn', 'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:109: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]>>\nopenai_client = <openai.OpenAI object at 0x107dbd480>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'saturn', 'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:110: IndexError"
       },
       "teardown": {
-        "duration": 0.0002264160430058837,
+        "duration": 0.00018316600471735,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
-      "lineno": 115,
+      "lineno": 116,
       "outcome": "skipped",
       "keywords": [
         "test_chat_non_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
@@ -664,22 +679,22 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.007171708042733371,
+        "duration": 0.0074389580404385924,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.00013554200995713472,
+        "duration": 0.00014933396596461535,
         "outcome": "skipped",
-        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 124, 'Skipped: Skipping test_chat_non_streaming_image for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')"
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 125, 'Skipped: Skipping test_chat_non_streaming_image for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')"
       },
       "teardown": {
-        "duration": 0.0001235839445143938,
+        "duration": 0.00012462493032217026,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
-      "lineno": 115,
+      "lineno": 116,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
@@ -698,21 +713,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.008639499894343317,
+        "duration": 0.013580625061877072,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.4001279999502003,
+        "duration": 2.89831429196056,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00014812499284744263,
+        "duration": 0.000491458922624588,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
-      "lineno": 115,
+      "lineno": 116,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
@@ -731,21 +746,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.015450250008143485,
+        "duration": 0.008266666904091835,
         "outcome": "passed"
       },
       "call": {
-        "duration": 3.3522649579681456,
+        "duration": 3.8873212080216035,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00041629199404269457,
+        "duration": 0.00016850000247359276,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
-      "lineno": 134,
+      "lineno": 135,
       "outcome": "skipped",
       "keywords": [
         "test_chat_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
@@ -764,22 +779,22 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.007634000037796795,
+        "duration": 0.0080461660400033,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.0001563339028507471,
+        "duration": 0.00014758307952433825,
         "outcome": "skipped",
-        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 143, 'Skipped: Skipping test_chat_streaming_image for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')"
+        "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 144, 'Skipped: Skipping test_chat_streaming_image for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')"
       },
       "teardown": {
-        "duration": 0.0001324999611824751,
+        "duration": 0.00012695800978690386,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
-      "lineno": 134,
+      "lineno": 135,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
@@ -798,34 +813,34 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.007050334010273218,
+        "duration": 0.00845700001809746,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.7063317500287667,
+        "duration": 1.6604419159702957,
         "outcome": "failed",
         "crash": {
           "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 152,
+          "lineno": 153,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 152,
+            "lineno": 153,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]>>\nopenai_client = <openai.OpenAI object at 0x10725e800>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_image(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:152: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]>>\nopenai_client = <openai.OpenAI object at 0x110665270>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_image(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:153: IndexError"
       },
       "teardown": {
-        "duration": 0.0002109999768435955,
+        "duration": 0.00033458403777331114,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
-      "lineno": 134,
+      "lineno": 135,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
@@ -844,34 +859,34 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.006729208980686963,
+        "duration": 0.012580333976075053,
         "outcome": "passed"
       },
       "call": {
-        "duration": 3.829621708020568,
+        "duration": 4.728511792025529,
         "outcome": "failed",
         "crash": {
           "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 152,
+          "lineno": 153,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 152,
+            "lineno": 153,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]>>\nopenai_client = <openai.OpenAI object at 0x10729bc10>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_image(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:152: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]>>\nopenai_client = <openai.OpenAI object at 0x107df0cd0>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_image(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:153: IndexError"
       },
       "teardown": {
-        "duration": 0.0002882500411942601,
+        "duration": 0.00023266696371138096,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]",
-      "lineno": 158,
+      "lineno": 159,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]",
@@ -890,21 +905,21 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.007713916013017297,
+        "duration": 0.011554082971997559,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.48285808309447,
+        "duration": 1.3857994999270886,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00020350003615021706,
+        "duration": 0.0003951250109821558,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]",
-      "lineno": 158,
+      "lineno": 159,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]",
@@ -923,21 +938,21 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.010098082944750786,
+        "duration": 0.007673708954825997,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.6994713749736547,
+        "duration": 3.082161583006382,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00014512497000396252,
+        "duration": 0.0002532500075176358,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
-      "lineno": 158,
+      "lineno": 159,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
@@ -956,21 +971,21 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.006934792036190629,
+        "duration": 0.014791041961871088,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.277176082949154,
+        "duration": 0.6918012499809265,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0004985419800505042,
+        "duration": 0.00027070799842476845,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
-      "lineno": 158,
+      "lineno": 159,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
@@ -989,21 +1004,21 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.012558708898723125,
+        "duration": 0.014746625092811882,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.442075416096486,
+        "duration": 3.5890139170223847,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0003505420172587037,
+        "duration": 0.00030137505382299423,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]",
-      "lineno": 158,
+      "lineno": 159,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]",
@@ -1022,21 +1037,21 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.012642999994568527,
+        "duration": 0.036798374960199,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.9305703329155222,
+        "duration": 0.6914895409718156,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00016004196368157864,
+        "duration": 0.00023716699797660112,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]",
-      "lineno": 158,
+      "lineno": 159,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]",
@@ -1055,21 +1070,21 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.008792415959760547,
+        "duration": 0.05965254199691117,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.194098167004995,
+        "duration": 2.609581291093491,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0003667499404400587,
+        "duration": 0.0002674580318853259,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]",
-      "lineno": 181,
+      "lineno": 182,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]",
@@ -1088,21 +1103,21 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.01219504198525101,
+        "duration": 0.014533916022628546,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.045097667025402,
+        "duration": 0.6227063750848174,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00029958400409668684,
+        "duration": 0.00019699998665601015,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]",
-      "lineno": 181,
+      "lineno": 182,
       "outcome": "passed",
       "keywords": [
         "test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]",
@@ -1121,21 +1136,21 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.014203459024429321,
+        "duration": 0.009818125050514936,
         "outcome": "passed"
       },
       "call": {
-        "duration": 1.3079068749211729,
+        "duration": 5.144610875053331,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0001914579188451171,
+        "duration": 0.00045220903120934963,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
-      "lineno": 181,
+      "lineno": 182,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
@@ -1154,34 +1169,34 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.04714570892974734,
+        "duration": 0.012392290984280407,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.44743770791683346,
+        "duration": 0.777625665999949,
         "outcome": "failed",
         "crash": {
           "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 200,
+          "lineno": 201,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 200,
+            "lineno": 201,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]>>\nopenai_client = <openai.OpenAI object at 0x1072f7520>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'calendar', 'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'cont...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            response_format=case[\"input\"][\"response_format\"],\n            stream=True,\n        )\n        maybe_json_content = \"\"\n        for chunk in response:\n>           maybe_json_content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:200: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]>>\nopenai_client = <openai.OpenAI object at 0x107dbd600>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'calendar', 'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'cont...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            response_format=case[\"input\"][\"response_format\"],\n            stream=True,\n        )\n        maybe_json_content = \"\"\n        for chunk in response:\n>           maybe_json_content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:201: IndexError"
       },
       "teardown": {
-        "duration": 0.00022199994418770075,
+        "duration": 0.000559916952624917,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
-      "lineno": 181,
+      "lineno": 182,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
@@ -1200,34 +1215,34 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.012237709015607834,
+        "duration": 0.010390624986030161,
         "outcome": "passed"
       },
       "call": {
-        "duration": 3.180020791012794,
+        "duration": 2.680094916955568,
         "outcome": "failed",
         "crash": {
           "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 200,
+          "lineno": 201,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 200,
+            "lineno": 201,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]>>\nopenai_client = <openai.OpenAI object at 0x105916e00>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'math', 'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solut... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            response_format=case[\"input\"][\"response_format\"],\n            stream=True,\n        )\n        maybe_json_content = \"\"\n        for chunk in response:\n>           maybe_json_content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:200: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]>>\nopenai_client = <openai.OpenAI object at 0x107d8ed40>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'math', 'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solut... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            response_format=case[\"input\"][\"response_format\"],\n            stream=True,\n        )\n        maybe_json_content = \"\"\n        for chunk in response:\n>           maybe_json_content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:201: IndexError"
       },
       "teardown": {
-        "duration": 0.000273333047516644,
+        "duration": 0.00041987502481788397,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]",
-      "lineno": 181,
+      "lineno": 182,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]",
@@ -1246,34 +1261,34 @@
         "case_id": "calendar"
       },
       "setup": {
-        "duration": 0.013312208000570536,
+        "duration": 0.01190529193263501,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.4110311249969527,
+        "duration": 0.6690819580107927,
         "outcome": "failed",
         "crash": {
           "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 200,
+          "lineno": 201,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 200,
+            "lineno": 201,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]>>\nopenai_client = <openai.OpenAI object at 0x1072983d0>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'calendar', 'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'cont...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            response_format=case[\"input\"][\"response_format\"],\n            stream=True,\n        )\n        maybe_json_content = \"\"\n        for chunk in response:\n>           maybe_json_content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:200: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]>>\nopenai_client = <openai.OpenAI object at 0x11066f130>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'calendar', 'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'cont...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            response_format=case[\"input\"][\"response_format\"],\n            stream=True,\n        )\n        maybe_json_content = \"\"\n        for chunk in response:\n>           maybe_json_content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:201: IndexError"
       },
       "teardown": {
-        "duration": 0.00022975006140768528,
+        "duration": 0.000247166957706213,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]",
-      "lineno": 181,
+      "lineno": 182,
       "outcome": "failed",
       "keywords": [
         "test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]",
@@ -1292,34 +1307,34 @@
         "case_id": "math"
       },
       "setup": {
-        "duration": 0.006676917080767453,
+        "duration": 0.009588208980858326,
         "outcome": "passed"
       },
       "call": {
-        "duration": 2.316411833046004,
+        "duration": 2.4867218340514228,
         "outcome": "failed",
         "crash": {
           "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
-          "lineno": 200,
+          "lineno": 201,
           "message": "IndexError: list index out of range"
         },
         "traceback": [
           {
             "path": "tests/verifications/openai_api/test_chat_completion.py",
-            "lineno": 200,
+            "lineno": 201,
             "message": "IndexError"
           }
         ],
-        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]>>\nopenai_client = <openai.OpenAI object at 0x107280880>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'math', 'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solut... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            response_format=case[\"input\"][\"response_format\"],\n            stream=True,\n        )\n        maybe_json_content = \"\"\n        for chunk in response:\n>           maybe_json_content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:200: IndexError"
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]>>\nopenai_client = <openai.OpenAI object at 0x11066ba30>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'math', 'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solut... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            response_format=case[\"input\"][\"response_format\"],\n            stream=True,\n        )\n        maybe_json_content = \"\"\n        for chunk in response:\n>           maybe_json_content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:201: IndexError"
       },
       "teardown": {
-        "duration": 0.000245374976657331,
+        "duration": 0.00022487505339086056,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
-      "lineno": 203,
+      "lineno": 204,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
@@ -1338,21 +1353,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.007064500008709729,
+        "duration": 0.008509417064487934,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.606806542025879,
+        "duration": 0.45511841599363834,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00046320806723088026,
+        "duration": 0.00031033402774482965,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
-      "lineno": 203,
+      "lineno": 204,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
@@ -1371,21 +1386,21 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.009071375010535121,
+        "duration": 0.01352791697718203,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.41908070899080485,
+        "duration": 0.7166531670372933,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.00026074994821101427,
+        "duration": 0.00031470798421651125,
         "outcome": "passed"
       }
     },
     {
       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
-      "lineno": 203,
+      "lineno": 204,
       "outcome": "passed",
       "keywords": [
         "test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
@@ -1404,17 +1419,143 @@
         "case_id": "case0"
       },
       "setup": {
-        "duration": 0.0068333749659359455,
+        "duration": 0.01369225000962615,
         "outcome": "passed"
       },
       "call": {
-        "duration": 0.8904451669659466,
+        "duration": 0.34134254103992134,
         "outcome": "passed"
       },
       "teardown": {
-        "duration": 0.0005833340110257268,
+        "duration": 0.0002922919811680913,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
+      "lineno": 228,
+      "outcome": "passed",
+      "keywords": [
+        "test_chat_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.025748749962076545,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.7462511250050738,
+        "outcome": "passed"
+      },
+      "teardown": {
+        "duration": 0.00030449999030679464,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+      "lineno": 228,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.015131957945413888,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.4556894999695942,
+        "outcome": "failed",
+        "crash": {
+          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 251,
+          "message": "IndexError: list index out of range"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 251,
+            "message": "IndexError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]>>\nopenai_client = <openai.OpenAI object at 0x110674070>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=True,\n        )\n    \n        # Accumulate partial tool_calls here\n        tool_calls_buffer = {}\n        current_id = None\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:251: IndexError"
+      },
+      "teardown": {
+        "duration": 0.000539042055606842,
+        "outcome": "passed"
+      }
+    },
+    {
+      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
+      "lineno": 228,
+      "outcome": "failed",
+      "keywords": [
+        "test_chat_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
+        "parametrize",
+        "pytestmark",
+        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0",
+        "test_chat_completion.py",
+        "openai_api",
+        "verifications",
+        "tests",
+        "llama-stack",
+        ""
+      ],
+      "metadata": {
+        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+        "case_id": "case0"
+      },
+      "setup": {
+        "duration": 0.016429082956165075,
+        "outcome": "passed"
+      },
+      "call": {
+        "duration": 0.3677835420239717,
+        "outcome": "failed",
+        "crash": {
+          "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
+          "lineno": 251,
+          "message": "IndexError: list index out of range"
+        },
+        "traceback": [
+          {
+            "path": "tests/verifications/openai_api/test_chat_completion.py",
+            "lineno": 251,
+            "message": "IndexError"
+          }
+        ],
+        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]>>\nopenai_client = <openai.OpenAI object at 0x107ceb910>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=True,\n        )\n    \n        # Accumulate partial tool_calls here\n        tool_calls_buffer = {}\n        current_id = None\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:251: IndexError"
+      },
+      "teardown": {
+        "duration": 0.001610000035725534,
         "outcome": "passed"
       }
     }
-  ]
+  ],
+  "run_timestamp": 1744328795
 }