diff --git a/tests/verifications/REPORT.md b/tests/verifications/REPORT.md index 449499382..2309c6404 100644 --- a/tests/verifications/REPORT.md +++ b/tests/verifications/REPORT.md @@ -1,6 +1,6 @@ # Test Results Report -*Generated on: 2025-04-09 22:52:19* +*Generated on: 2025-04-10 16:48:18* *This report was generated by running `python tests/verifications/generate_report.py`* @@ -15,15 +15,15 @@ | Provider | Pass Rate | Tests Passed | Total Tests | | --- | --- | --- | --- | -| Together | 67.7% | 21 | 31 | -| Fireworks | 90.3% | 28 | 31 | -| Openai | 100.0% | 22 | 22 | +| Together | 64.7% | 22 | 34 | +| Fireworks | 82.4% | 28 | 34 | +| Openai | 100.0% | 24 | 24 | ## Together -*Tests run on: 2025-04-09 22:50:58* +*Tests run on: 2025-04-10 16:46:35* ```bash # Run all tests for this provider: @@ -56,10 +56,11 @@ pytest tests/verifications/openai_api/test_chat_completion.py --provider=togethe | test_chat_streaming_image | ⚪ | ❌ | ❌ | | test_chat_streaming_structured_output (calendar) | ✅ | ❌ | ❌ | | test_chat_streaming_structured_output (math) | ✅ | ❌ | ❌ | +| test_chat_streaming_tool_calling | ✅ | ❌ | ❌ | ## Fireworks -*Tests run on: 2025-04-09 22:50:02* +*Tests run on: 2025-04-10 16:44:44* ```bash # Run all tests for this provider: @@ -92,10 +93,11 @@ pytest tests/verifications/openai_api/test_chat_completion.py --provider=firewor | test_chat_streaming_image | ⚪ | ✅ | ✅ | | test_chat_streaming_structured_output (calendar) | ✅ | ✅ | ✅ | | test_chat_streaming_structured_output (math) | ✅ | ✅ | ✅ | +| test_chat_streaming_tool_calling | ❌ | ❌ | ❌ | ## Openai -*Tests run on: 2025-04-09 22:51:44* +*Tests run on: 2025-04-10 16:47:28* ```bash # Run all tests for this provider: @@ -127,3 +129,4 @@ pytest tests/verifications/openai_api/test_chat_completion.py --provider=openai | test_chat_streaming_image | ✅ | ✅ | | test_chat_streaming_structured_output (calendar) | ✅ | ✅ | | test_chat_streaming_structured_output (math) | ✅ | ✅ | +| test_chat_streaming_tool_calling | ✅ | ✅ | diff --git a/tests/verifications/generate_report.py b/tests/verifications/generate_report.py index 1c760ca19..6a7c39ee2 100755 --- a/tests/verifications/generate_report.py +++ b/tests/verifications/generate_report.py @@ -77,8 +77,9 @@ def run_tests(provider, keyword=None): print(f"Running tests for provider: {provider}") timestamp = int(time.time()) - result_file = RESULTS_DIR / f"{provider}_{timestamp}.json" - temp_json_file = RESULTS_DIR / f"temp_{provider}_{timestamp}.json" + # Use a constant filename for the final result and temp file + result_file = RESULTS_DIR / f"{provider}.json" + temp_json_file = RESULTS_DIR / f"temp_{provider}.json" # Determine project root directory relative to this script project_root = Path(__file__).parent.parent.parent @@ -106,11 +107,12 @@ def run_tests(provider, keyword=None): # Check if the JSON file was created if temp_json_file.exists(): - # Read the JSON file and save it to our results format with open(temp_json_file, "r") as f: test_results = json.load(f) - # Save results to our own format with a trailing newline + test_results["run_timestamp"] = timestamp + + # Save results to the final (overwritten) file with open(result_file, "w") as f: json.dump(test_results, f, indent=2) f.write("\n") # Add a trailing newline for precommit @@ -132,7 +134,7 @@ def run_tests(provider, keyword=None): def parse_results( result_file, -) -> Tuple[DefaultDict[str, DefaultDict[str, Dict[str, bool]]], DefaultDict[str, Set[str]], Set[str]]: +) -> Tuple[DefaultDict[str, DefaultDict[str, Dict[str, bool]]], DefaultDict[str, Set[str]], Set[str], str]: """Parse a single test results file. Returns: @@ -140,11 +142,12 @@ def parse_results( - parsed_results: DefaultDict[provider, DefaultDict[model, Dict[test_name, pass_status]]] - providers_in_file: DefaultDict[provider, Set[model]] found in this file. - tests_in_file: Set[test_name] found in this file. + - run_timestamp: Timestamp when the test was run """ if not os.path.exists(result_file): print(f"Results file does not exist: {result_file}") # Return empty defaultdicts/set matching the type hint - return defaultdict(lambda: defaultdict(dict)), defaultdict(set), set() + return defaultdict(lambda: defaultdict(dict)), defaultdict(set), set(), "" with open(result_file, "r") as f: results = json.load(f) @@ -153,7 +156,16 @@ def parse_results( parsed_results: DefaultDict[str, DefaultDict[str, Dict[str, bool]]] = defaultdict(lambda: defaultdict(dict)) providers_in_file: DefaultDict[str, Set[str]] = defaultdict(set) tests_in_file: Set[str] = set() - provider: str = os.path.basename(result_file).split("_")[0] + # Extract provider from filename (e.g., "openai.json" -> "openai") + provider: str = result_file.stem + + # Extract run timestamp from the JSON data + run_timestamp_unix = results.get("run_timestamp") + run_timestamp_str = ( + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(run_timestamp_unix)) + if run_timestamp_unix is not None + else "Unknown" + ) # Debug: Print summary of test results print(f"Test results summary for {provider}:") @@ -167,7 +179,7 @@ def parse_results( if "tests" not in results or not results["tests"]: print(f"No test results found in {result_file}") # Return empty defaultdicts/set matching the type hint - return defaultdict(lambda: defaultdict(dict)), defaultdict(set), set() + return defaultdict(lambda: defaultdict(dict)), defaultdict(set), set(), "" # Process the tests for test in results["tests"]: @@ -225,59 +237,29 @@ def parse_results( if not parsed_results.get(provider): print(f"Warning: No valid test results parsed for provider {provider} from file {result_file}") - return parsed_results, providers_in_file, tests_in_file + return parsed_results, providers_in_file, tests_in_file, run_timestamp_str -def cleanup_old_results(providers_to_clean: Dict[str, Set[str]]): - """Clean up old test result files, keeping only the newest N per provider.""" - # Use the passed-in providers dictionary - for provider in providers_to_clean.keys(): - # Get all result files for this provider - provider_files = list(RESULTS_DIR.glob(f"{provider}_*.json")) - - # Sort by timestamp (newest first) - provider_files.sort(key=lambda x: int(x.stem.split("_")[1]), reverse=True) - - # Remove old files beyond the max to keep - if len(provider_files) > MAX_RESULTS_PER_PROVIDER: - for old_file in provider_files[MAX_RESULTS_PER_PROVIDER:]: - try: - old_file.unlink() - print(f"Removed old result file: {old_file}") - except Exception as e: - print(f"Error removing file {old_file}: {e}") - - -def get_latest_results_by_provider(): - """Get the latest test result file for each provider""" +def get_all_result_files_by_provider(): + """Get all test result files, keyed by provider.""" provider_results = {} - # Get all result files result_files = list(RESULTS_DIR.glob("*.json")) - # Extract all provider names from filenames - all_providers = set() for file in result_files: - # File format is provider_timestamp.json - parts = file.stem.split("_") - if len(parts) >= 2: - all_providers.add(parts[0]) - - # Group by provider - for provider in all_providers: - provider_files = [f for f in result_files if f.name.startswith(f"{provider}_")] - - # Sort by timestamp (newest first) - provider_files.sort(key=lambda x: int(x.stem.split("_")[1]), reverse=True) - - if provider_files: - provider_results[provider] = provider_files[0] + provider = file.stem + if provider: + provider_results[provider] = file return provider_results def generate_report( - results_dict: Dict[str, Any], providers: Dict[str, Set[str]], all_tests: Set[str], output_file=None + results_dict: Dict[str, Any], + providers: Dict[str, Set[str]], + all_tests: Set[str], + provider_timestamps: Dict[str, str], + output_file=None, ): """Generate the markdown report. @@ -285,6 +267,7 @@ def generate_report( results_dict: Aggregated results [provider][model][test_name] -> status. providers: Dict of all providers and their models {provider: {models}}. all_tests: Set of all test names found. + provider_timestamps: Dict of provider to timestamp when tests were run output_file: Optional path to save the report. """ if output_file is None: @@ -293,19 +276,6 @@ def generate_report( else: output_file = Path(output_file) - # Get the timestamp from result files - provider_timestamps = {} - provider_results_files = get_latest_results_by_provider() - for provider, result_file in provider_results_files.items(): - # Extract timestamp from filename (format: provider_timestamp.json) - try: - timestamp_str = result_file.stem.split("_")[1] - timestamp = int(timestamp_str) - formatted_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(timestamp)) - provider_timestamps[provider] = formatted_time - except (IndexError, ValueError): - provider_timestamps[provider] = "Unknown" - # Convert provider model sets to sorted lists (use passed-in providers dict) providers_sorted = {prov: sorted(models) for prov, models in providers.items()} @@ -416,7 +386,7 @@ def generate_report( else: example_base_test_name = first_test_name - base_name = base_test_name_map.get(test, test) # Get base name + base_name = base_test_name_map.get(first_test_name, first_test_name) # Get base name case_count = base_test_case_counts.get(base_name, 1) # Get count filter_str = f"{example_base_test_name} and {example_case_id}" if case_count > 1 else example_base_test_name @@ -491,6 +461,7 @@ def main(): # Initialize collections to aggregate results in main aggregated_providers = defaultdict(set) aggregated_tests = set() + provider_timestamps = {} if args.run_tests: # Get list of available providers from command line or use detected providers @@ -512,28 +483,28 @@ def main(): result_file = run_tests(provider, keyword=args.k) if result_file: # Parse and aggregate results - parsed_results, providers_in_file, tests_in_file = parse_results(result_file) + parsed_results, providers_in_file, tests_in_file, run_timestamp = parse_results(result_file) all_results.update(parsed_results) for prov, models in providers_in_file.items(): aggregated_providers[prov].update(models) + if run_timestamp: + provider_timestamps[prov] = run_timestamp aggregated_tests.update(tests_in_file) else: # Use existing results - provider_result_files = get_latest_results_by_provider() + provider_result_files = get_all_result_files_by_provider() for result_file in provider_result_files.values(): # Parse and aggregate results - parsed_results, providers_in_file, tests_in_file = parse_results(result_file) + parsed_results, providers_in_file, tests_in_file, run_timestamp = parse_results(result_file) all_results.update(parsed_results) for prov, models in providers_in_file.items(): aggregated_providers[prov].update(models) + if run_timestamp: + provider_timestamps[prov] = run_timestamp aggregated_tests.update(tests_in_file) - # Generate the report, passing aggregated data - generate_report(all_results, aggregated_providers, aggregated_tests, args.output) - - # Cleanup, passing aggregated providers - cleanup_old_results(aggregated_providers) + generate_report(all_results, aggregated_providers, aggregated_tests, provider_timestamps, args.output) if __name__ == "__main__": diff --git a/tests/verifications/test_results/fireworks_1744264202.json b/tests/verifications/test_results/fireworks.json similarity index 68% rename from tests/verifications/test_results/fireworks_1744264202.json rename to tests/verifications/test_results/fireworks.json index d14738be9..061e44c08 100644 --- a/tests/verifications/test_results/fireworks_1744264202.json +++ b/tests/verifications/test_results/fireworks.json @@ -1,15 +1,15 @@ { - "created": 1744264258.730061, - "duration": 53.86071586608887, + "created": 1744328795.171092, + "duration": 107.57908606529236, "exitcode": 1, "root": "/Users/erichuang/projects/llama-stack", "environment": {}, "summary": { "passed": 28, "skipped": 2, - "failed": 3, - "total": 33, - "collected": 33 + "failed": 6, + "total": 36, + "collected": 36 }, "collectors": [ { @@ -29,167 +29,182 @@ { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]", "type": "Function", - "lineno": 72 + "lineno": 73 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]", "type": "Function", - "lineno": 72 + "lineno": 73 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]", "type": "Function", - "lineno": 72 + "lineno": 73 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]", "type": "Function", - "lineno": 72 + "lineno": 73 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]", "type": "Function", - "lineno": 72 + "lineno": 73 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]", "type": "Function", - "lineno": 72 + "lineno": 73 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]", "type": "Function", - "lineno": 91 + "lineno": 92 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]", "type": "Function", - "lineno": 91 + "lineno": 92 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]", "type": "Function", - "lineno": 91 + "lineno": 92 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]", "type": "Function", - "lineno": 91 + "lineno": 92 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]", "type": "Function", - "lineno": 91 + "lineno": 92 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]", "type": "Function", - "lineno": 91 + "lineno": 92 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", "type": "Function", - "lineno": 115 + "lineno": 116 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", "type": "Function", - "lineno": 115 + "lineno": 116 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", "type": "Function", - "lineno": 115 + "lineno": 116 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", "type": "Function", - "lineno": 134 + "lineno": 135 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", "type": "Function", - "lineno": 134 + "lineno": 135 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", "type": "Function", - "lineno": 134 + "lineno": 135 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]", "type": "Function", - "lineno": 158 + "lineno": 159 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]", "type": "Function", - "lineno": 158 + "lineno": 159 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]", "type": "Function", - "lineno": 158 + "lineno": 159 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]", "type": "Function", - "lineno": 158 + "lineno": 159 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]", "type": "Function", - "lineno": 158 + "lineno": 159 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]", "type": "Function", - "lineno": 158 + "lineno": 159 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]", "type": "Function", - "lineno": 181 + "lineno": 182 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]", "type": "Function", - "lineno": 181 + "lineno": 182 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]", "type": "Function", - "lineno": 181 + "lineno": 182 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]", "type": "Function", - "lineno": 181 + "lineno": 182 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]", "type": "Function", - "lineno": 181 + "lineno": 182 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]", "type": "Function", - "lineno": 181 + "lineno": 182 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", "type": "Function", - "lineno": 203 + "lineno": 204 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", "type": "Function", - "lineno": 203 + "lineno": 204 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", "type": "Function", - "lineno": 203 + "lineno": 204 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", + "type": "Function", + "lineno": 228 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", + "type": "Function", + "lineno": 228 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", + "type": "Function", + "lineno": 228 } ] } @@ -197,7 +212,7 @@ "tests": [ { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]", - "lineno": 72, + "lineno": 73, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]", @@ -216,21 +231,21 @@ "case_id": "earth" }, "setup": { - "duration": 0.05236550001427531, + "duration": 0.2175025000469759, "outcome": "passed" }, "call": { - "duration": 0.5364967910572886, + "duration": 0.7433859170414507, "outcome": "passed" }, "teardown": { - "duration": 0.00015075004193931818, + "duration": 0.0001592918997630477, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]", - "lineno": 72, + "lineno": 73, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]", @@ -249,21 +264,21 @@ "case_id": "saturn" }, "setup": { - "duration": 0.00699599995277822, + "duration": 0.007383499993011355, "outcome": "passed" }, "call": { - "duration": 0.5843954589217901, + "duration": 0.5949292909353971, "outcome": "passed" }, "teardown": { - "duration": 0.0003858329728245735, + "duration": 0.00015891704242676497, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]", - "lineno": 72, + "lineno": 73, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]", @@ -282,21 +297,21 @@ "case_id": "earth" }, "setup": { - "duration": 0.009176500025205314, + "duration": 0.010730999987572432, "outcome": "passed" }, "call": { - "duration": 0.9258683329680935, + "duration": 0.8945954169612378, "outcome": "passed" }, "teardown": { - "duration": 0.00015787500888109207, + "duration": 0.0003751249751076102, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]", - "lineno": 72, + "lineno": 73, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]", @@ -315,21 +330,21 @@ "case_id": "saturn" }, "setup": { - "duration": 0.011275375029072165, + "duration": 0.01665666699409485, "outcome": "passed" }, "call": { - "duration": 0.6890578339807689, + "duration": 0.907927209045738, "outcome": "passed" }, "teardown": { - "duration": 0.0004926669644191861, + "duration": 0.00024874997325241566, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]", - "lineno": 72, + "lineno": 73, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]", @@ -348,21 +363,21 @@ "case_id": "earth" }, "setup": { - "duration": 0.007520624902099371, + "duration": 0.01039199996739626, "outcome": "passed" }, "call": { - "duration": 0.6675686669768766, + "duration": 0.5971567500382662, "outcome": "passed" }, "teardown": { - "duration": 0.00016137503553181887, + "duration": 0.0003488330403342843, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]", - "lineno": 72, + "lineno": 73, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]", @@ -381,21 +396,21 @@ "case_id": "saturn" }, "setup": { - "duration": 0.0076431670458987355, + "duration": 0.018627874902449548, "outcome": "passed" }, "call": { - "duration": 1.6813415409997106, + "duration": 2.0586736251134425, "outcome": "passed" }, "teardown": { - "duration": 0.0004928340204060078, + "duration": 0.00046974990982562304, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]", - "lineno": 91, + "lineno": 92, "outcome": "passed", "keywords": [ "test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]", @@ -414,21 +429,21 @@ "case_id": "earth" }, "setup": { - "duration": 0.01302404107991606, + "duration": 0.01706262503284961, "outcome": "passed" }, "call": { - "duration": 1.3206909999717027, + "duration": 0.6679969580145553, "outcome": "passed" }, "teardown": { - "duration": 0.0002220839960500598, + "duration": 0.0004670419730246067, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]", - "lineno": 91, + "lineno": 92, "outcome": "passed", "keywords": [ "test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]", @@ -447,21 +462,21 @@ "case_id": "saturn" }, "setup": { - "duration": 0.0071772499941289425, + "duration": 0.025956374942325056, "outcome": "passed" }, "call": { - "duration": 0.4109888339880854, + "duration": 2.052679874934256, "outcome": "passed" }, "teardown": { - "duration": 0.0005431669997051358, + "duration": 0.00026958296075463295, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]", - "lineno": 91, + "lineno": 92, "outcome": "passed", "keywords": [ "test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]", @@ -480,21 +495,21 @@ "case_id": "earth" }, "setup": { - "duration": 0.012043708004057407, + "duration": 0.015856957994401455, "outcome": "passed" }, "call": { - "duration": 0.4509220840409398, + "duration": 0.3096678329166025, "outcome": "passed" }, "teardown": { - "duration": 0.00016408402007073164, + "duration": 0.0007620420074090362, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]", - "lineno": 91, + "lineno": 92, "outcome": "passed", "keywords": [ "test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]", @@ -513,21 +528,21 @@ "case_id": "saturn" }, "setup": { - "duration": 0.007165874936617911, + "duration": 0.013509334065020084, "outcome": "passed" }, "call": { - "duration": 0.6527335830032825, + "duration": 0.5914681670255959, "outcome": "passed" }, "teardown": { - "duration": 0.0006419579731300473, + "duration": 0.0002906669396907091, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]", - "lineno": 91, + "lineno": 92, "outcome": "passed", "keywords": [ "test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]", @@ -546,21 +561,21 @@ "case_id": "earth" }, "setup": { - "duration": 0.007546542095951736, + "duration": 0.013216375024057925, "outcome": "passed" }, "call": { - "duration": 0.9360042089829221, + "duration": 1.8804527079919353, "outcome": "passed" }, "teardown": { - "duration": 0.00020483299158513546, + "duration": 0.0002026669681072235, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]", - "lineno": 91, + "lineno": 92, "outcome": "passed", "keywords": [ "test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]", @@ -579,21 +594,21 @@ "case_id": "saturn" }, "setup": { - "duration": 0.046697250101715326, + "duration": 0.00827441702131182, "outcome": "passed" }, "call": { - "duration": 0.668349124956876, + "duration": 0.7407040420221165, "outcome": "passed" }, "teardown": { - "duration": 0.0005031249020248652, + "duration": 0.0005084159784018993, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", - "lineno": 115, + "lineno": 116, "outcome": "skipped", "keywords": [ "test_chat_non_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", @@ -612,22 +627,22 @@ "case_id": "case0" }, "setup": { - "duration": 0.012287458986975253, + "duration": 0.012424499960616231, "outcome": "passed" }, "call": { - "duration": 0.00015287497080862522, + "duration": 0.00032762496266514063, "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 124, 'Skipped: Skipping test_chat_non_streaming_image for model accounts/fireworks/models/llama-v3p3-70b-instruct on provider fireworks based on config.')" + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 125, 'Skipped: Skipping test_chat_non_streaming_image for model accounts/fireworks/models/llama-v3p3-70b-instruct on provider fireworks based on config.')" }, "teardown": { - "duration": 0.00012162502389401197, + "duration": 0.00032416603062301874, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", - "lineno": 115, + "lineno": 116, "outcome": "passed", "keywords": [ "test_chat_non_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", @@ -646,21 +661,21 @@ "case_id": "case0" }, "setup": { - "duration": 0.007204124936833978, + "duration": 0.02253958396613598, "outcome": "passed" }, "call": { - "duration": 1.8676417920505628, + "duration": 2.64042466704268, "outcome": "passed" }, "teardown": { - "duration": 0.0001557499635964632, + "duration": 0.0003636250039562583, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", - "lineno": 115, + "lineno": 116, "outcome": "passed", "keywords": [ "test_chat_non_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", @@ -679,21 +694,21 @@ "case_id": "case0" }, "setup": { - "duration": 0.008226625039242208, + "duration": 0.014634749968536198, "outcome": "passed" }, "call": { - "duration": 3.2724285409785807, + "duration": 5.126485540997237, "outcome": "passed" }, "teardown": { - "duration": 0.0002898330567404628, + "duration": 0.0002988330088555813, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", - "lineno": 134, + "lineno": 135, "outcome": "skipped", "keywords": [ "test_chat_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", @@ -712,22 +727,22 @@ "case_id": "case0" }, "setup": { - "duration": 0.011927249957807362, + "duration": 0.015854416065849364, "outcome": "passed" }, "call": { - "duration": 0.00017358292825520039, + "duration": 0.00038058299105614424, "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 143, 'Skipped: Skipping test_chat_streaming_image for model accounts/fireworks/models/llama-v3p3-70b-instruct on provider fireworks based on config.')" + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 144, 'Skipped: Skipping test_chat_streaming_image for model accounts/fireworks/models/llama-v3p3-70b-instruct on provider fireworks based on config.')" }, "teardown": { - "duration": 0.00014037499204277992, + "duration": 0.0002689170651137829, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", - "lineno": 134, + "lineno": 135, "outcome": "passed", "keywords": [ "test_chat_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", @@ -746,21 +761,21 @@ "case_id": "case0" }, "setup": { - "duration": 0.008731417008675635, + "duration": 0.011205915943719447, "outcome": "passed" }, "call": { - "duration": 2.8333610829431564, + "duration": 3.2596546669956297, "outcome": "passed" }, "teardown": { - "duration": 0.0005132080987095833, + "duration": 0.0006222500232979655, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", - "lineno": 134, + "lineno": 135, "outcome": "passed", "keywords": [ "test_chat_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", @@ -779,21 +794,21 @@ "case_id": "case0" }, "setup": { - "duration": 0.016569208004511893, + "duration": 0.016557667055167258, "outcome": "passed" }, "call": { - "duration": 2.302010750048794, + "duration": 4.930164708988741, "outcome": "passed" }, "teardown": { - "duration": 0.00016108399722725153, + "duration": 0.00048687495291233063, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]", - "lineno": 158, + "lineno": 159, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]", @@ -812,21 +827,21 @@ "case_id": "calendar" }, "setup": { - "duration": 0.039960999973118305, + "duration": 0.00886166701093316, "outcome": "passed" }, "call": { - "duration": 7.661373125039972, + "duration": 0.8833738330285996, "outcome": "passed" }, "teardown": { - "duration": 0.00015833403449505568, + "duration": 0.00025583396200090647, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]", - "lineno": 158, + "lineno": 159, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]", @@ -845,21 +860,21 @@ "case_id": "math" }, "setup": { - "duration": 0.006928625050932169, + "duration": 0.01297520799562335, "outcome": "passed" }, "call": { - "duration": 2.762534625013359, + "duration": 1.9960687910206616, "outcome": "passed" }, "teardown": { - "duration": 0.0006561250193044543, + "duration": 0.0005048330640420318, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]", - "lineno": 158, + "lineno": 159, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]", @@ -878,21 +893,21 @@ "case_id": "calendar" }, "setup": { - "duration": 0.008602249901741743, + "duration": 0.007275875075720251, "outcome": "passed" }, "call": { - "duration": 0.8311484589939937, + "duration": 0.9094266659813002, "outcome": "passed" }, "teardown": { - "duration": 0.0005021670367568731, + "duration": 0.00028041598852723837, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]", - "lineno": 158, + "lineno": 159, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]", @@ -911,21 +926,21 @@ "case_id": "math" }, "setup": { - "duration": 0.015500334091484547, + "duration": 0.008899332955479622, "outcome": "passed" }, "call": { - "duration": 2.505719291046262, + "duration": 3.117967874975875, "outcome": "passed" }, "teardown": { - "duration": 0.0002619170118123293, + "duration": 0.00017600005958229303, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]", - "lineno": 158, + "lineno": 159, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]", @@ -944,21 +959,21 @@ "case_id": "calendar" }, "setup": { - "duration": 0.01948041608557105, + "duration": 0.0073364999843761325, "outcome": "passed" }, "call": { - "duration": 0.6336237500654534, + "duration": 2.2714374579954892, "outcome": "passed" }, "teardown": { - "duration": 0.00016637507360428572, + "duration": 0.0001814159331843257, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]", - "lineno": 158, + "lineno": 159, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]", @@ -977,21 +992,21 @@ "case_id": "math" }, "setup": { - "duration": 0.006810749997384846, + "duration": 0.010546459001488984, "outcome": "passed" }, "call": { - "duration": 1.9086956249084324, + "duration": 3.9954450000077486, "outcome": "passed" }, "teardown": { - "duration": 0.00018824997823685408, + "duration": 0.0002719159238040447, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]", - "lineno": 181, + "lineno": 182, "outcome": "passed", "keywords": [ "test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]", @@ -1010,21 +1025,21 @@ "case_id": "calendar" }, "setup": { - "duration": 0.007881582947447896, + "duration": 0.012508000014349818, "outcome": "passed" }, "call": { - "duration": 0.7142562499502674, + "duration": 9.095425167004578, "outcome": "passed" }, "teardown": { - "duration": 0.0007035828894004226, + "duration": 0.00029200001154094934, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]", - "lineno": 181, + "lineno": 182, "outcome": "passed", "keywords": [ "test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]", @@ -1043,21 +1058,21 @@ "case_id": "math" }, "setup": { - "duration": 0.00848070892971009, + "duration": 0.014769250061362982, "outcome": "passed" }, "call": { - "duration": 1.5210869159782305, + "duration": 1.9875252910424024, "outcome": "passed" }, "teardown": { - "duration": 0.00021216599270701408, + "duration": 0.0006288329605013132, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]", - "lineno": 181, + "lineno": 182, "outcome": "passed", "keywords": [ "test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]", @@ -1076,21 +1091,21 @@ "case_id": "calendar" }, "setup": { - "duration": 0.009669666993431747, + "duration": 0.014440709026530385, "outcome": "passed" }, "call": { - "duration": 1.3105999580584466, + "duration": 1.2613736250204965, "outcome": "passed" }, "teardown": { - "duration": 0.000588166993111372, + "duration": 0.0001937919296324253, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]", - "lineno": 181, + "lineno": 182, "outcome": "passed", "keywords": [ "test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]", @@ -1109,21 +1124,21 @@ "case_id": "math" }, "setup": { - "duration": 0.007745541981421411, + "duration": 0.0071510839043185115, "outcome": "passed" }, "call": { - "duration": 3.250162083073519, + "duration": 2.2953888749470934, "outcome": "passed" }, "teardown": { - "duration": 0.0001455000601708889, + "duration": 0.00016245793085545301, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]", - "lineno": 181, + "lineno": 182, "outcome": "passed", "keywords": [ "test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]", @@ -1142,21 +1157,21 @@ "case_id": "calendar" }, "setup": { - "duration": 0.009726207936182618, + "duration": 0.007294666953384876, "outcome": "passed" }, "call": { - "duration": 0.5564592910232022, + "duration": 2.194703874993138, "outcome": "passed" }, "teardown": { - "duration": 0.00019470800179988146, + "duration": 0.00017604196909815073, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]", - "lineno": 181, + "lineno": 182, "outcome": "passed", "keywords": [ "test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]", @@ -1175,21 +1190,21 @@ "case_id": "math" }, "setup": { - "duration": 0.018431040924042463, + "duration": 0.019950625021010637, "outcome": "passed" }, "call": { - "duration": 3.8501765420660377, + "duration": 8.4994609169662, "outcome": "passed" }, "teardown": { - "duration": 0.00015279196668416262, + "duration": 0.00026404205709695816, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", - "lineno": 203, + "lineno": 204, "outcome": "failed", "keywords": [ "test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", @@ -1208,34 +1223,34 @@ "case_id": "case0" }, "setup": { - "duration": 0.007509749964810908, + "duration": 0.011928000021725893, "outcome": "passed" }, "call": { - "duration": 0.4906975000631064, + "duration": 0.5664792089955881, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 222, + "lineno": 223, "message": "TypeError: object of type 'NoneType' has no len()" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 222, + "lineno": 223, "message": "TypeError" } ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert len(response.choices[0].message.tool_calls) > 0\nE TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:222: TypeError" + "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert len(response.choices[0].message.tool_calls) > 0\nE TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:223: TypeError" }, "teardown": { - "duration": 0.00023995805531740189, + "duration": 0.00023799994960427284, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", - "lineno": 203, + "lineno": 204, "outcome": "failed", "keywords": [ "test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", @@ -1254,34 +1269,34 @@ "case_id": "case0" }, "setup": { - "duration": 0.007144959061406553, + "duration": 0.006813624990172684, "outcome": "passed" }, "call": { - "duration": 3.818257624981925, + "duration": 3.170418416033499, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 222, + "lineno": 223, "message": "TypeError: object of type 'NoneType' has no len()" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 222, + "lineno": 223, "message": "TypeError" } ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert len(response.choices[0].message.tool_calls) > 0\nE TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:222: TypeError" + "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert len(response.choices[0].message.tool_calls) > 0\nE TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:223: TypeError" }, "teardown": { - "duration": 0.0002668750239536166, + "duration": 0.0004129580920562148, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", - "lineno": 203, + "lineno": 204, "outcome": "failed", "keywords": [ "test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", @@ -1300,30 +1315,169 @@ "case_id": "case0" }, "setup": { - "duration": 0.015290249953977764, + "duration": 0.01656208303757012, "outcome": "passed" }, "call": { - "duration": 1.5883799999719486, + "duration": 22.76337137504015, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 222, + "lineno": 223, "message": "TypeError: object of type 'NoneType' has no len()" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 222, + "lineno": 223, "message": "TypeError" } ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert len(response.choices[0].message.tool_calls) > 0\nE TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:222: TypeError" + "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert len(response.choices[0].message.tool_calls) > 0\nE TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:223: TypeError" }, "teardown": { - "duration": 0.0008049579337239265, + "duration": 0.00038704206235706806, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", + "lineno": 228, + "outcome": "failed", + "keywords": [ + "test_chat_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama-v3p3-70b-instruct-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", + "case_id": "case0" + }, + "setup": { + "duration": 0.015727541991509497, + "outcome": "passed" + }, + "call": { + "duration": 0.5719050420448184, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", + "lineno": 274, + "message": "assert 0 == 1\n + where 0 = len({})" + }, + "traceback": [ + { + "path": "tests/verifications/openai_api/test_chat_completion.py", + "lineno": 274, + "message": "AssertionError" + } + ], + "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=True,\n )\n \n # Accumulate partial tool_calls here\n tool_calls_buffer = {}\n current_id = None\n # Process streaming chunks\n for chunk in stream:\n choice = chunk.choices[0]\n delta = choice.delta\n \n if delta.tool_calls is None:\n continue\n \n for tool_call_delta in delta.tool_calls:\n if tool_call_delta.id:\n current_id = tool_call_delta.id\n call_id = current_id\n func_delta = tool_call_delta.function\n \n if call_id not in tool_calls_buffer:\n tool_calls_buffer[call_id] = {\n \"id\": call_id,\n \"type\": tool_call_delta.type,\n \"name\": func_delta.name,\n \"arguments\": \"\",\n }\n \n if func_delta.arguments:\n tool_calls_buffer[call_id][\"arguments\"] += func_delta.arguments\n \n> assert len(tool_calls_buffer) == 1\nE assert 0 == 1\nE + where 0 = len({})\n\ntests/verifications/openai_api/test_chat_completion.py:274: AssertionError" + }, + "teardown": { + "duration": 0.0003532909322530031, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", + "lineno": 228, + "outcome": "failed", + "keywords": [ + "test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-scout-instruct-basic-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-scout-instruct-basic", + "case_id": "case0" + }, + "setup": { + "duration": 0.011914041941054165, + "outcome": "passed" + }, + "call": { + "duration": 5.403063916950487, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", + "lineno": 274, + "message": "assert 0 == 1\n + where 0 = len({})" + }, + "traceback": [ + { + "path": "tests/verifications/openai_api/test_chat_completion.py", + "lineno": 274, + "message": "AssertionError" + } + ], + "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=True,\n )\n \n # Accumulate partial tool_calls here\n tool_calls_buffer = {}\n current_id = None\n # Process streaming chunks\n for chunk in stream:\n choice = chunk.choices[0]\n delta = choice.delta\n \n if delta.tool_calls is None:\n continue\n \n for tool_call_delta in delta.tool_calls:\n if tool_call_delta.id:\n current_id = tool_call_delta.id\n call_id = current_id\n func_delta = tool_call_delta.function\n \n if call_id not in tool_calls_buffer:\n tool_calls_buffer[call_id] = {\n \"id\": call_id,\n \"type\": tool_call_delta.type,\n \"name\": func_delta.name,\n \"arguments\": \"\",\n }\n \n if func_delta.arguments:\n tool_calls_buffer[call_id][\"arguments\"] += func_delta.arguments\n \n> assert len(tool_calls_buffer) == 1\nE assert 0 == 1\nE + where 0 = len({})\n\ntests/verifications/openai_api/test_chat_completion.py:274: AssertionError" + }, + "teardown": { + "duration": 0.0005193749675527215, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", + "lineno": 228, + "outcome": "failed", + "keywords": [ + "test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-maverick-instruct-basic-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", + "case_id": "case0" + }, + "setup": { + "duration": 0.012608832912519574, + "outcome": "passed" + }, + "call": { + "duration": 7.587262416025624, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", + "lineno": 274, + "message": "assert 0 == 1\n + where 0 = len({})" + }, + "traceback": [ + { + "path": "tests/verifications/openai_api/test_chat_completion.py", + "lineno": 274, + "message": "AssertionError" + } + ], + "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=True,\n )\n \n # Accumulate partial tool_calls here\n tool_calls_buffer = {}\n current_id = None\n # Process streaming chunks\n for chunk in stream:\n choice = chunk.choices[0]\n delta = choice.delta\n \n if delta.tool_calls is None:\n continue\n \n for tool_call_delta in delta.tool_calls:\n if tool_call_delta.id:\n current_id = tool_call_delta.id\n call_id = current_id\n func_delta = tool_call_delta.function\n \n if call_id not in tool_calls_buffer:\n tool_calls_buffer[call_id] = {\n \"id\": call_id,\n \"type\": tool_call_delta.type,\n \"name\": func_delta.name,\n \"arguments\": \"\",\n }\n \n if func_delta.arguments:\n tool_calls_buffer[call_id][\"arguments\"] += func_delta.arguments\n \n> assert len(tool_calls_buffer) == 1\nE assert 0 == 1\nE + where 0 = len({})\n\ntests/verifications/openai_api/test_chat_completion.py:274: AssertionError" + }, + "teardown": { + "duration": 0.0008685829816386104, "outcome": "passed" } } - ] + ], + "run_timestamp": 1744328684 } diff --git a/tests/verifications/test_results/openai_1744264304.json b/tests/verifications/test_results/openai.json similarity index 77% rename from tests/verifications/test_results/openai_1744264304.json rename to tests/verifications/test_results/openai.json index fe9c2fcac..0c1892f7e 100644 --- a/tests/verifications/test_results/openai_1744264304.json +++ b/tests/verifications/test_results/openai.json @@ -1,13 +1,13 @@ { - "created": 1744264338.9923031, - "duration": 32.825536012649536, + "created": 1744328898.0248861, + "duration": 47.561042070388794, "exitcode": 0, "root": "/Users/erichuang/projects/llama-stack", "environment": {}, "summary": { - "passed": 22, - "total": 22, - "collected": 22 + "passed": 24, + "total": 24, + "collected": 24 }, "collectors": [ { @@ -27,112 +27,122 @@ { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-earth]", "type": "Function", - "lineno": 72 + "lineno": 73 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-saturn]", "type": "Function", - "lineno": 72 + "lineno": 73 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-mini-earth]", "type": "Function", - "lineno": 72 + "lineno": 73 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-mini-saturn]", "type": "Function", - "lineno": 72 + "lineno": 73 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-earth]", "type": "Function", - "lineno": 91 + "lineno": 92 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-saturn]", "type": "Function", - "lineno": 91 + "lineno": 92 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-mini-earth]", "type": "Function", - "lineno": 91 + "lineno": 92 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-mini-saturn]", "type": "Function", - "lineno": 91 + "lineno": 92 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[gpt-4o-case0]", "type": "Function", - "lineno": 115 + "lineno": 116 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[gpt-4o-mini-case0]", "type": "Function", - "lineno": 115 + "lineno": 116 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[gpt-4o-case0]", "type": "Function", - "lineno": 134 + "lineno": 135 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[gpt-4o-mini-case0]", "type": "Function", - "lineno": 134 + "lineno": 135 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-calendar]", "type": "Function", - "lineno": 158 + "lineno": 159 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-math]", "type": "Function", - "lineno": 158 + "lineno": 159 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-mini-calendar]", "type": "Function", - "lineno": 158 + "lineno": 159 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-mini-math]", "type": "Function", - "lineno": 158 + "lineno": 159 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-calendar]", "type": "Function", - "lineno": 181 + "lineno": 182 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-math]", "type": "Function", - "lineno": 181 + "lineno": 182 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-mini-calendar]", "type": "Function", - "lineno": 181 + "lineno": 182 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-mini-math]", "type": "Function", - "lineno": 181 + "lineno": 182 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[gpt-4o-case0]", "type": "Function", - "lineno": 203 + "lineno": 204 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[gpt-4o-mini-case0]", "type": "Function", - "lineno": 203 + "lineno": 204 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[gpt-4o-case0]", + "type": "Function", + "lineno": 228 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[gpt-4o-mini-case0]", + "type": "Function", + "lineno": 228 } ] } @@ -140,7 +150,7 @@ "tests": [ { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-earth]", - "lineno": 72, + "lineno": 73, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[gpt-4o-earth]", @@ -159,21 +169,21 @@ "case_id": "earth" }, "setup": { - "duration": 0.05381445901002735, + "duration": 0.0694252080284059, "outcome": "passed" }, "call": { - "duration": 0.49848275003023446, + "duration": 0.5709165419684723, "outcome": "passed" }, "teardown": { - "duration": 0.00018287496641278267, + "duration": 0.0007626248989254236, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-saturn]", - "lineno": 72, + "lineno": 73, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[gpt-4o-saturn]", @@ -192,21 +202,21 @@ "case_id": "saturn" }, "setup": { - "duration": 0.007965500000864267, + "duration": 0.010281750001013279, "outcome": "passed" }, "call": { - "duration": 0.9293275829404593, + "duration": 0.6309260830748826, "outcome": "passed" }, "teardown": { - "duration": 0.00018229195848107338, + "duration": 0.0001824579667299986, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-mini-earth]", - "lineno": 72, + "lineno": 73, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[gpt-4o-mini-earth]", @@ -225,21 +235,21 @@ "case_id": "earth" }, "setup": { - "duration": 0.00875679193995893, + "duration": 0.007922374992631376, "outcome": "passed" }, "call": { - "duration": 0.5793640419142321, + "duration": 0.31756504194345325, "outcome": "passed" }, "teardown": { - "duration": 0.0005307920509949327, + "duration": 0.0005268750246614218, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-mini-saturn]", - "lineno": 72, + "lineno": 73, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[gpt-4o-mini-saturn]", @@ -258,21 +268,21 @@ "case_id": "saturn" }, "setup": { - "duration": 0.01076845801435411, + "duration": 0.01643404201604426, "outcome": "passed" }, "call": { - "duration": 0.8752291660057381, + "duration": 0.7479908330133185, "outcome": "passed" }, "teardown": { - "duration": 0.0004834589781239629, + "duration": 0.0004037501057609916, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-earth]", - "lineno": 91, + "lineno": 92, "outcome": "passed", "keywords": [ "test_chat_streaming_basic[gpt-4o-earth]", @@ -291,21 +301,21 @@ "case_id": "earth" }, "setup": { - "duration": 0.01662245800253004, + "duration": 0.021671707974746823, "outcome": "passed" }, "call": { - "duration": 0.8336971249664202, + "duration": 0.6701172919711098, "outcome": "passed" }, "teardown": { - "duration": 0.0024086670018732548, + "duration": 0.0005569590721279383, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-saturn]", - "lineno": 91, + "lineno": 92, "outcome": "passed", "keywords": [ "test_chat_streaming_basic[gpt-4o-saturn]", @@ -324,21 +334,21 @@ "case_id": "saturn" }, "setup": { - "duration": 0.009416291955858469, + "duration": 0.015847125090658665, "outcome": "passed" }, "call": { - "duration": 0.43594495789147913, + "duration": 0.636536999954842, "outcome": "passed" }, "teardown": { - "duration": 0.0009131249971687794, + "duration": 0.00029395800083875656, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-mini-earth]", - "lineno": 91, + "lineno": 92, "outcome": "passed", "keywords": [ "test_chat_streaming_basic[gpt-4o-mini-earth]", @@ -357,21 +367,21 @@ "case_id": "earth" }, "setup": { - "duration": 0.013155042077414691, + "duration": 0.011792832985520363, "outcome": "passed" }, "call": { - "duration": 0.6119836670113727, + "duration": 0.5610962919890881, "outcome": "passed" }, "teardown": { - "duration": 0.00023804197553545237, + "duration": 0.0003578749019652605, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-mini-saturn]", - "lineno": 91, + "lineno": 92, "outcome": "passed", "keywords": [ "test_chat_streaming_basic[gpt-4o-mini-saturn]", @@ -390,21 +400,21 @@ "case_id": "saturn" }, "setup": { - "duration": 0.009004916995763779, + "duration": 0.016500207944773138, "outcome": "passed" }, "call": { - "duration": 0.8327413749648258, + "duration": 0.8060244580265135, "outcome": "passed" }, "teardown": { - "duration": 0.00046841695439070463, + "duration": 0.0005296670133247972, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[gpt-4o-case0]", - "lineno": 115, + "lineno": 116, "outcome": "passed", "keywords": [ "test_chat_non_streaming_image[gpt-4o-case0]", @@ -423,21 +433,21 @@ "case_id": "case0" }, "setup": { - "duration": 0.009574208059348166, + "duration": 0.008338792016729712, "outcome": "passed" }, "call": { - "duration": 2.221839000005275, + "duration": 7.009252917021513, "outcome": "passed" }, "teardown": { - "duration": 0.00015945907216519117, + "duration": 0.0003042910248041153, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[gpt-4o-mini-case0]", - "lineno": 115, + "lineno": 116, "outcome": "passed", "keywords": [ "test_chat_non_streaming_image[gpt-4o-mini-case0]", @@ -456,21 +466,21 @@ "case_id": "case0" }, "setup": { - "duration": 0.0084402080392465, + "duration": 0.007238540914840996, "outcome": "passed" }, "call": { - "duration": 2.298736457945779, + "duration": 3.134693874977529, "outcome": "passed" }, "teardown": { - "duration": 0.0002423750702291727, + "duration": 0.0003104590578004718, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[gpt-4o-case0]", - "lineno": 134, + "lineno": 135, "outcome": "passed", "keywords": [ "test_chat_streaming_image[gpt-4o-case0]", @@ -489,21 +499,21 @@ "case_id": "case0" }, "setup": { - "duration": 0.007330416003242135, + "duration": 0.0161851670127362, "outcome": "passed" }, "call": { - "duration": 4.062959833070636, + "duration": 3.0745719589758664, "outcome": "passed" }, "teardown": { - "duration": 0.00015470804646611214, + "duration": 0.00022620800882577896, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[gpt-4o-mini-case0]", - "lineno": 134, + "lineno": 135, "outcome": "passed", "keywords": [ "test_chat_streaming_image[gpt-4o-mini-case0]", @@ -522,21 +532,21 @@ "case_id": "case0" }, "setup": { - "duration": 0.019998832955025136, + "duration": 0.013220708002336323, "outcome": "passed" }, "call": { - "duration": 2.609432084020227, + "duration": 3.624867417034693, "outcome": "passed" }, "teardown": { - "duration": 0.005618917057290673, + "duration": 0.00020633300300687551, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-calendar]", - "lineno": 158, + "lineno": 159, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[gpt-4o-calendar]", @@ -555,21 +565,21 @@ "case_id": "calendar" }, "setup": { - "duration": 0.00867662497330457, + "duration": 0.017596833989955485, "outcome": "passed" }, "call": { - "duration": 0.6856697499752045, + "duration": 1.248568250099197, "outcome": "passed" }, "teardown": { - "duration": 0.00018445902969688177, + "duration": 0.0004248750628903508, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-math]", - "lineno": 158, + "lineno": 159, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[gpt-4o-math]", @@ -588,21 +598,21 @@ "case_id": "math" }, "setup": { - "duration": 0.01139050000347197, + "duration": 0.01512012502644211, "outcome": "passed" }, "call": { - "duration": 2.764390083961189, + "duration": 8.170285542029887, "outcome": "passed" }, "teardown": { - "duration": 0.0003164170775562525, + "duration": 0.00043537491001188755, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-mini-calendar]", - "lineno": 158, + "lineno": 159, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[gpt-4o-mini-calendar]", @@ -621,21 +631,21 @@ "case_id": "calendar" }, "setup": { - "duration": 0.01321374997496605, + "duration": 0.010376665974035859, "outcome": "passed" }, "call": { - "duration": 0.8284227909753099, + "duration": 0.756480542011559, "outcome": "passed" }, "teardown": { - "duration": 0.00030170800164341927, + "duration": 0.00025695806834846735, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-mini-math]", - "lineno": 158, + "lineno": 159, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[gpt-4o-mini-math]", @@ -654,21 +664,21 @@ "case_id": "math" }, "setup": { - "duration": 0.013477458036504686, + "duration": 0.006846625008620322, "outcome": "passed" }, "call": { - "duration": 2.4146235829684883, + "duration": 2.6833953330060467, "outcome": "passed" }, "teardown": { - "duration": 0.00025754200760275126, + "duration": 0.00022558309137821198, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-calendar]", - "lineno": 181, + "lineno": 182, "outcome": "passed", "keywords": [ "test_chat_streaming_structured_output[gpt-4o-calendar]", @@ -687,21 +697,21 @@ "case_id": "calendar" }, "setup": { - "duration": 0.006940583931282163, + "duration": 0.009646040969528258, "outcome": "passed" }, "call": { - "duration": 0.5102092920569703, + "duration": 0.6117532079806551, "outcome": "passed" }, "teardown": { - "duration": 0.00023379107005894184, + "duration": 0.00015258300118148327, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-math]", - "lineno": 181, + "lineno": 182, "outcome": "passed", "keywords": [ "test_chat_streaming_structured_output[gpt-4o-math]", @@ -720,21 +730,21 @@ "case_id": "math" }, "setup": { - "duration": 0.007166999974288046, + "duration": 0.012024458032101393, "outcome": "passed" }, "call": { - "duration": 3.5751801669830456, + "duration": 4.522625041077845, "outcome": "passed" }, "teardown": { - "duration": 0.00015041697770357132, + "duration": 0.0004230838967487216, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-mini-calendar]", - "lineno": 181, + "lineno": 182, "outcome": "passed", "keywords": [ "test_chat_streaming_structured_output[gpt-4o-mini-calendar]", @@ -753,21 +763,21 @@ "case_id": "calendar" }, "setup": { - "duration": 0.010652625001966953, + "duration": 0.009566582972183824, "outcome": "passed" }, "call": { - "duration": 0.6648182499920949, + "duration": 2.5591942919418216, "outcome": "passed" }, "teardown": { - "duration": 0.0008647920330986381, + "duration": 0.0007555419579148293, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-mini-math]", - "lineno": 181, + "lineno": 182, "outcome": "passed", "keywords": [ "test_chat_streaming_structured_output[gpt-4o-mini-math]", @@ -786,21 +796,21 @@ "case_id": "math" }, "setup": { - "duration": 0.007372208056040108, + "duration": 0.010828875005245209, "outcome": "passed" }, "call": { - "duration": 2.80747462506406, + "duration": 2.495122667052783, "outcome": "passed" }, "teardown": { - "duration": 0.00028124998789280653, + "duration": 0.0002802090020850301, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[gpt-4o-case0]", - "lineno": 203, + "lineno": 204, "outcome": "passed", "keywords": [ "test_chat_non_streaming_tool_calling[gpt-4o-case0]", @@ -819,21 +829,21 @@ "case_id": "case0" }, "setup": { - "duration": 0.01625587500166148, + "duration": 0.012762792059220374, "outcome": "passed" }, "call": { - "duration": 0.6878769160248339, + "duration": 0.5655921660363674, "outcome": "passed" }, "teardown": { - "duration": 0.0002637499710544944, + "duration": 0.00022304197773337364, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[gpt-4o-mini-case0]", - "lineno": 203, + "lineno": 204, "outcome": "passed", "keywords": [ "test_chat_non_streaming_tool_calling[gpt-4o-mini-case0]", @@ -852,17 +862,84 @@ "case_id": "case0" }, "setup": { - "duration": 0.008817250025458634, + "duration": 0.03188708401285112, "outcome": "passed" }, "call": { - "duration": 0.7181202919455245, + "duration": 0.6159415419679135, "outcome": "passed" }, "teardown": { - "duration": 0.0017147079342976213, + "duration": 0.0005549580091610551, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[gpt-4o-case0]", + "lineno": 228, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_tool_calling[gpt-4o-case0]", + "parametrize", + "pytestmark", + "gpt-4o-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o", + "case_id": "case0" + }, + "setup": { + "duration": 0.014768208027817309, + "outcome": "passed" + }, + "call": { + "duration": 0.47373537498060614, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0005811670562252402, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[gpt-4o-mini-case0]", + "lineno": 228, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_tool_calling[gpt-4o-mini-case0]", + "parametrize", + "pytestmark", + "gpt-4o-mini-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o-mini", + "case_id": "case0" + }, + "setup": { + "duration": 0.010271625011228025, + "outcome": "passed" + }, + "call": { + "duration": 0.5656027499353513, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0025699170073494315, "outcome": "passed" } } - ] + ], + "run_timestamp": 1744328848 } diff --git a/tests/verifications/test_results/together_1744264258.json b/tests/verifications/test_results/together.json similarity index 77% rename from tests/verifications/test_results/together_1744264258.json rename to tests/verifications/test_results/together.json index c38dd52b5..2b23089e8 100644 --- a/tests/verifications/test_results/together_1744264258.json +++ b/tests/verifications/test_results/together.json @@ -1,15 +1,15 @@ { - "created": 1744264304.064288, - "duration": 42.470197916030884, + "created": 1744328847.853437, + "duration": 49.9419469833374, "exitcode": 1, "root": "/Users/erichuang/projects/llama-stack", "environment": {}, "summary": { - "passed": 21, - "failed": 10, + "passed": 22, + "failed": 12, "skipped": 2, - "total": 33, - "collected": 33 + "total": 36, + "collected": 36 }, "collectors": [ { @@ -29,167 +29,182 @@ { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", "type": "Function", - "lineno": 72 + "lineno": 73 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", "type": "Function", - "lineno": 72 + "lineno": 73 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", "type": "Function", - "lineno": 72 + "lineno": 73 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", "type": "Function", - "lineno": 72 + "lineno": 73 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", "type": "Function", - "lineno": 72 + "lineno": 73 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", "type": "Function", - "lineno": 72 + "lineno": 73 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", "type": "Function", - "lineno": 91 + "lineno": 92 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", "type": "Function", - "lineno": 91 + "lineno": 92 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", "type": "Function", - "lineno": 91 + "lineno": 92 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", "type": "Function", - "lineno": 91 + "lineno": 92 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", "type": "Function", - "lineno": 91 + "lineno": 92 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", "type": "Function", - "lineno": 91 + "lineno": 92 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "type": "Function", - "lineno": 115 + "lineno": 116 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "type": "Function", - "lineno": 115 + "lineno": 116 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "type": "Function", - "lineno": 115 + "lineno": 116 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "type": "Function", - "lineno": 134 + "lineno": 135 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "type": "Function", - "lineno": 134 + "lineno": 135 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "type": "Function", - "lineno": 134 + "lineno": 135 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", "type": "Function", - "lineno": 158 + "lineno": 159 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", "type": "Function", - "lineno": 158 + "lineno": 159 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", "type": "Function", - "lineno": 158 + "lineno": 159 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", "type": "Function", - "lineno": 158 + "lineno": 159 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", "type": "Function", - "lineno": 158 + "lineno": 159 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", "type": "Function", - "lineno": 158 + "lineno": 159 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", "type": "Function", - "lineno": 181 + "lineno": 182 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", "type": "Function", - "lineno": 181 + "lineno": 182 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", "type": "Function", - "lineno": 181 + "lineno": 182 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", "type": "Function", - "lineno": 181 + "lineno": 182 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", "type": "Function", - "lineno": 181 + "lineno": 182 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", "type": "Function", - "lineno": 181 + "lineno": 182 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "type": "Function", - "lineno": 203 + "lineno": 204 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "type": "Function", - "lineno": 203 + "lineno": 204 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "type": "Function", - "lineno": 203 + "lineno": 204 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", + "type": "Function", + "lineno": 228 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", + "type": "Function", + "lineno": 228 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", + "type": "Function", + "lineno": 228 } ] } @@ -197,7 +212,7 @@ "tests": [ { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", - "lineno": 72, + "lineno": 73, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", @@ -216,21 +231,21 @@ "case_id": "earth" }, "setup": { - "duration": 0.06113254197407514, + "duration": 0.15774220903404057, "outcome": "passed" }, "call": { - "duration": 1.0720349580515176, + "duration": 0.5396400419995189, "outcome": "passed" }, "teardown": { - "duration": 0.00015966698992997408, + "duration": 0.0002977499971166253, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", - "lineno": 72, + "lineno": 73, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", @@ -249,21 +264,21 @@ "case_id": "saturn" }, "setup": { - "duration": 0.006908083101734519, + "duration": 0.015632833004929125, "outcome": "passed" }, "call": { - "duration": 0.5013210839824751, + "duration": 0.4675290420418605, "outcome": "passed" }, "teardown": { - "duration": 0.0005375830223783851, + "duration": 0.00029129208996891975, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", - "lineno": 72, + "lineno": 73, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", @@ -282,21 +297,21 @@ "case_id": "earth" }, "setup": { - "duration": 0.006910792086273432, + "duration": 0.01530187507160008, "outcome": "passed" }, "call": { - "duration": 0.5142245410243049, + "duration": 0.501894542016089, "outcome": "passed" }, "teardown": { - "duration": 0.0004069580463692546, + "duration": 0.0002060839906334877, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", - "lineno": 72, + "lineno": 73, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", @@ -315,21 +330,21 @@ "case_id": "saturn" }, "setup": { - "duration": 0.009730000048875809, + "duration": 0.014841833035461605, "outcome": "passed" }, "call": { - "duration": 0.40133179200347513, + "duration": 0.4202229160582647, "outcome": "passed" }, "teardown": { - "duration": 0.0004558749496936798, + "duration": 0.0005559159908443689, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", - "lineno": 72, + "lineno": 73, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", @@ -348,21 +363,21 @@ "case_id": "earth" }, "setup": { - "duration": 0.008247417048551142, + "duration": 0.008204624988138676, "outcome": "passed" }, "call": { - "duration": 0.7914331250358373, + "duration": 1.991508833016269, "outcome": "passed" }, "teardown": { - "duration": 0.00020262505859136581, + "duration": 0.000539042055606842, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", - "lineno": 72, + "lineno": 73, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", @@ -381,21 +396,21 @@ "case_id": "saturn" }, "setup": { - "duration": 0.00922900007572025, + "duration": 0.022528667002916336, "outcome": "passed" }, "call": { - "duration": 1.2742049579974264, + "duration": 0.37111237505450845, "outcome": "passed" }, "teardown": { - "duration": 0.000688415952026844, + "duration": 0.0005334159359335899, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", - "lineno": 91, + "lineno": 92, "outcome": "passed", "keywords": [ "test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", @@ -414,21 +429,21 @@ "case_id": "earth" }, "setup": { - "duration": 0.006949124974198639, + "duration": 0.00922920904122293, "outcome": "passed" }, "call": { - "duration": 0.4681705000111833, + "duration": 1.1684916669037193, "outcome": "passed" }, "teardown": { - "duration": 0.00017795804888010025, + "duration": 0.0002740409690886736, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", - "lineno": 91, + "lineno": 92, "outcome": "passed", "keywords": [ "test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", @@ -447,21 +462,21 @@ "case_id": "saturn" }, "setup": { - "duration": 0.008564374991692603, + "duration": 0.010883333045057952, "outcome": "passed" }, "call": { - "duration": 1.7430362500017509, + "duration": 0.4275277080014348, "outcome": "passed" }, "teardown": { - "duration": 0.00015312491450458765, + "duration": 0.00043112505227327347, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", - "lineno": 91, + "lineno": 92, "outcome": "failed", "keywords": [ "test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", @@ -480,34 +495,34 @@ "case_id": "earth" }, "setup": { - "duration": 0.007404124946333468, + "duration": 0.012945958063937724, "outcome": "passed" }, "call": { - "duration": 0.515926624997519, + "duration": 0.5551295839250088, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 109, + "lineno": 110, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 109, + "lineno": 110, "message": "IndexError" } ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'earth', 'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:109: IndexError" + "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'earth', 'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:110: IndexError" }, "teardown": { - "duration": 0.0002389999572187662, + "duration": 0.0002744169905781746, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", - "lineno": 91, + "lineno": 92, "outcome": "failed", "keywords": [ "test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", @@ -526,34 +541,34 @@ "case_id": "saturn" }, "setup": { - "duration": 0.0071305419551208615, + "duration": 0.017372542060911655, "outcome": "passed" }, "call": { - "duration": 0.37054662499576807, + "duration": 0.3579877089941874, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 109, + "lineno": 110, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 109, + "lineno": 110, "message": "IndexError" } ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'saturn', 'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:109: IndexError" + "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'saturn', 'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:110: IndexError" }, "teardown": { - "duration": 0.0006014580139890313, + "duration": 0.0005445419810712337, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", - "lineno": 91, + "lineno": 92, "outcome": "failed", "keywords": [ "test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", @@ -572,34 +587,34 @@ "case_id": "earth" }, "setup": { - "duration": 0.007489709067158401, + "duration": 0.014297832967713475, "outcome": "passed" }, "call": { - "duration": 0.7767745839664713, + "duration": 0.8067362919682637, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 109, + "lineno": 110, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 109, + "lineno": 110, "message": "IndexError" } ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'earth', 'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:109: IndexError" + "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'earth', 'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:110: IndexError" }, "teardown": { - "duration": 0.00025491707492619753, + "duration": 0.0003220830112695694, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", - "lineno": 91, + "lineno": 92, "outcome": "failed", "keywords": [ "test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", @@ -618,34 +633,34 @@ "case_id": "saturn" }, "setup": { - "duration": 0.006736499955877662, + "duration": 0.008816750021651387, "outcome": "passed" }, "call": { - "duration": 0.43948554201051593, + "duration": 0.5383605000097305, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 109, + "lineno": 110, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 109, + "lineno": 110, "message": "IndexError" } ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'saturn', 'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:109: IndexError" + "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'saturn', 'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:110: IndexError" }, "teardown": { - "duration": 0.0002264160430058837, + "duration": 0.00018316600471735, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", - "lineno": 115, + "lineno": 116, "outcome": "skipped", "keywords": [ "test_chat_non_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", @@ -664,22 +679,22 @@ "case_id": "case0" }, "setup": { - "duration": 0.007171708042733371, + "duration": 0.0074389580404385924, "outcome": "passed" }, "call": { - "duration": 0.00013554200995713472, + "duration": 0.00014933396596461535, "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 124, 'Skipped: Skipping test_chat_non_streaming_image for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')" + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 125, 'Skipped: Skipping test_chat_non_streaming_image for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')" }, "teardown": { - "duration": 0.0001235839445143938, + "duration": 0.00012462493032217026, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "lineno": 115, + "lineno": 116, "outcome": "passed", "keywords": [ "test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", @@ -698,21 +713,21 @@ "case_id": "case0" }, "setup": { - "duration": 0.008639499894343317, + "duration": 0.013580625061877072, "outcome": "passed" }, "call": { - "duration": 1.4001279999502003, + "duration": 2.89831429196056, "outcome": "passed" }, "teardown": { - "duration": 0.00014812499284744263, + "duration": 0.000491458922624588, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", - "lineno": 115, + "lineno": 116, "outcome": "passed", "keywords": [ "test_chat_non_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", @@ -731,21 +746,21 @@ "case_id": "case0" }, "setup": { - "duration": 0.015450250008143485, + "duration": 0.008266666904091835, "outcome": "passed" }, "call": { - "duration": 3.3522649579681456, + "duration": 3.8873212080216035, "outcome": "passed" }, "teardown": { - "duration": 0.00041629199404269457, + "duration": 0.00016850000247359276, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", - "lineno": 134, + "lineno": 135, "outcome": "skipped", "keywords": [ "test_chat_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", @@ -764,22 +779,22 @@ "case_id": "case0" }, "setup": { - "duration": 0.007634000037796795, + "duration": 0.0080461660400033, "outcome": "passed" }, "call": { - "duration": 0.0001563339028507471, + "duration": 0.00014758307952433825, "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 143, 'Skipped: Skipping test_chat_streaming_image for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')" + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 144, 'Skipped: Skipping test_chat_streaming_image for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')" }, "teardown": { - "duration": 0.0001324999611824751, + "duration": 0.00012695800978690386, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "lineno": 134, + "lineno": 135, "outcome": "failed", "keywords": [ "test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", @@ -798,34 +813,34 @@ "case_id": "case0" }, "setup": { - "duration": 0.007050334010273218, + "duration": 0.00845700001809746, "outcome": "passed" }, "call": { - "duration": 1.7063317500287667, + "duration": 1.6604419159702957, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 152, + "lineno": 153, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 152, + "lineno": 153, "message": "IndexError" } ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_image(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:152: IndexError" + "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_image(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:153: IndexError" }, "teardown": { - "duration": 0.0002109999768435955, + "duration": 0.00033458403777331114, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", - "lineno": 134, + "lineno": 135, "outcome": "failed", "keywords": [ "test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", @@ -844,34 +859,34 @@ "case_id": "case0" }, "setup": { - "duration": 0.006729208980686963, + "duration": 0.012580333976075053, "outcome": "passed" }, "call": { - "duration": 3.829621708020568, + "duration": 4.728511792025529, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 152, + "lineno": 153, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 152, + "lineno": 153, "message": "IndexError" } ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_image(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:152: IndexError" + "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_image(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:153: IndexError" }, "teardown": { - "duration": 0.0002882500411942601, + "duration": 0.00023266696371138096, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", - "lineno": 158, + "lineno": 159, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", @@ -890,21 +905,21 @@ "case_id": "calendar" }, "setup": { - "duration": 0.007713916013017297, + "duration": 0.011554082971997559, "outcome": "passed" }, "call": { - "duration": 2.48285808309447, + "duration": 1.3857994999270886, "outcome": "passed" }, "teardown": { - "duration": 0.00020350003615021706, + "duration": 0.0003951250109821558, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", - "lineno": 158, + "lineno": 159, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", @@ -923,21 +938,21 @@ "case_id": "math" }, "setup": { - "duration": 0.010098082944750786, + "duration": 0.007673708954825997, "outcome": "passed" }, "call": { - "duration": 1.6994713749736547, + "duration": 3.082161583006382, "outcome": "passed" }, "teardown": { - "duration": 0.00014512497000396252, + "duration": 0.0002532500075176358, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", - "lineno": 158, + "lineno": 159, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", @@ -956,21 +971,21 @@ "case_id": "calendar" }, "setup": { - "duration": 0.006934792036190629, + "duration": 0.014791041961871088, "outcome": "passed" }, "call": { - "duration": 1.277176082949154, + "duration": 0.6918012499809265, "outcome": "passed" }, "teardown": { - "duration": 0.0004985419800505042, + "duration": 0.00027070799842476845, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", - "lineno": 158, + "lineno": 159, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", @@ -989,21 +1004,21 @@ "case_id": "math" }, "setup": { - "duration": 0.012558708898723125, + "duration": 0.014746625092811882, "outcome": "passed" }, "call": { - "duration": 2.442075416096486, + "duration": 3.5890139170223847, "outcome": "passed" }, "teardown": { - "duration": 0.0003505420172587037, + "duration": 0.00030137505382299423, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", - "lineno": 158, + "lineno": 159, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", @@ -1022,21 +1037,21 @@ "case_id": "calendar" }, "setup": { - "duration": 0.012642999994568527, + "duration": 0.036798374960199, "outcome": "passed" }, "call": { - "duration": 0.9305703329155222, + "duration": 0.6914895409718156, "outcome": "passed" }, "teardown": { - "duration": 0.00016004196368157864, + "duration": 0.00023716699797660112, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", - "lineno": 158, + "lineno": 159, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", @@ -1055,21 +1070,21 @@ "case_id": "math" }, "setup": { - "duration": 0.008792415959760547, + "duration": 0.05965254199691117, "outcome": "passed" }, "call": { - "duration": 2.194098167004995, + "duration": 2.609581291093491, "outcome": "passed" }, "teardown": { - "duration": 0.0003667499404400587, + "duration": 0.0002674580318853259, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", - "lineno": 181, + "lineno": 182, "outcome": "passed", "keywords": [ "test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", @@ -1088,21 +1103,21 @@ "case_id": "calendar" }, "setup": { - "duration": 0.01219504198525101, + "duration": 0.014533916022628546, "outcome": "passed" }, "call": { - "duration": 2.045097667025402, + "duration": 0.6227063750848174, "outcome": "passed" }, "teardown": { - "duration": 0.00029958400409668684, + "duration": 0.00019699998665601015, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", - "lineno": 181, + "lineno": 182, "outcome": "passed", "keywords": [ "test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", @@ -1121,21 +1136,21 @@ "case_id": "math" }, "setup": { - "duration": 0.014203459024429321, + "duration": 0.009818125050514936, "outcome": "passed" }, "call": { - "duration": 1.3079068749211729, + "duration": 5.144610875053331, "outcome": "passed" }, "teardown": { - "duration": 0.0001914579188451171, + "duration": 0.00045220903120934963, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", - "lineno": 181, + "lineno": 182, "outcome": "failed", "keywords": [ "test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", @@ -1154,34 +1169,34 @@ "case_id": "calendar" }, "setup": { - "duration": 0.04714570892974734, + "duration": 0.012392290984280407, "outcome": "passed" }, "call": { - "duration": 0.44743770791683346, + "duration": 0.777625665999949, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 200, + "lineno": 201, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 200, + "lineno": 201, "message": "IndexError" } ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'calendar', 'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'cont...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:200: IndexError" + "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'calendar', 'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'cont...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:201: IndexError" }, "teardown": { - "duration": 0.00022199994418770075, + "duration": 0.000559916952624917, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", - "lineno": 181, + "lineno": 182, "outcome": "failed", "keywords": [ "test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", @@ -1200,34 +1215,34 @@ "case_id": "math" }, "setup": { - "duration": 0.012237709015607834, + "duration": 0.010390624986030161, "outcome": "passed" }, "call": { - "duration": 3.180020791012794, + "duration": 2.680094916955568, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 200, + "lineno": 201, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 200, + "lineno": 201, "message": "IndexError" } ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'math', 'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solut... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:200: IndexError" + "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'math', 'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solut... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:201: IndexError" }, "teardown": { - "duration": 0.000273333047516644, + "duration": 0.00041987502481788397, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", - "lineno": 181, + "lineno": 182, "outcome": "failed", "keywords": [ "test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", @@ -1246,34 +1261,34 @@ "case_id": "calendar" }, "setup": { - "duration": 0.013312208000570536, + "duration": 0.01190529193263501, "outcome": "passed" }, "call": { - "duration": 0.4110311249969527, + "duration": 0.6690819580107927, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 200, + "lineno": 201, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 200, + "lineno": 201, "message": "IndexError" } ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'calendar', 'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'cont...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:200: IndexError" + "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'calendar', 'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'cont...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:201: IndexError" }, "teardown": { - "duration": 0.00022975006140768528, + "duration": 0.000247166957706213, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", - "lineno": 181, + "lineno": 182, "outcome": "failed", "keywords": [ "test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", @@ -1292,34 +1307,34 @@ "case_id": "math" }, "setup": { - "duration": 0.006676917080767453, + "duration": 0.009588208980858326, "outcome": "passed" }, "call": { - "duration": 2.316411833046004, + "duration": 2.4867218340514228, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 200, + "lineno": 201, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 200, + "lineno": 201, "message": "IndexError" } ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'math', 'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solut... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:200: IndexError" + "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'math', 'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solut... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:201: IndexError" }, "teardown": { - "duration": 0.000245374976657331, + "duration": 0.00022487505339086056, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", - "lineno": 203, + "lineno": 204, "outcome": "passed", "keywords": [ "test_chat_non_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", @@ -1338,21 +1353,21 @@ "case_id": "case0" }, "setup": { - "duration": 0.007064500008709729, + "duration": 0.008509417064487934, "outcome": "passed" }, "call": { - "duration": 0.606806542025879, + "duration": 0.45511841599363834, "outcome": "passed" }, "teardown": { - "duration": 0.00046320806723088026, + "duration": 0.00031033402774482965, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "lineno": 203, + "lineno": 204, "outcome": "passed", "keywords": [ "test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", @@ -1371,21 +1386,21 @@ "case_id": "case0" }, "setup": { - "duration": 0.009071375010535121, + "duration": 0.01352791697718203, "outcome": "passed" }, "call": { - "duration": 0.41908070899080485, + "duration": 0.7166531670372933, "outcome": "passed" }, "teardown": { - "duration": 0.00026074994821101427, + "duration": 0.00031470798421651125, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", - "lineno": 203, + "lineno": 204, "outcome": "passed", "keywords": [ "test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", @@ -1404,17 +1419,143 @@ "case_id": "case0" }, "setup": { - "duration": 0.0068333749659359455, + "duration": 0.01369225000962615, "outcome": "passed" }, "call": { - "duration": 0.8904451669659466, + "duration": 0.34134254103992134, "outcome": "passed" }, "teardown": { - "duration": 0.0005833340110257268, + "duration": 0.0002922919811680913, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", + "lineno": 228, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", + "parametrize", + "pytestmark", + "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", + "case_id": "case0" + }, + "setup": { + "duration": 0.025748749962076545, + "outcome": "passed" + }, + "call": { + "duration": 0.7462511250050738, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00030449999030679464, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", + "lineno": 228, + "outcome": "failed", + "keywords": [ + "test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "case_id": "case0" + }, + "setup": { + "duration": 0.015131957945413888, + "outcome": "passed" + }, + "call": { + "duration": 0.4556894999695942, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", + "lineno": 251, + "message": "IndexError: list index out of range" + }, + "traceback": [ + { + "path": "tests/verifications/openai_api/test_chat_completion.py", + "lineno": 251, + "message": "IndexError" + } + ], + "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=True,\n )\n \n # Accumulate partial tool_calls here\n tool_calls_buffer = {}\n current_id = None\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:251: IndexError" + }, + "teardown": { + "duration": 0.000539042055606842, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", + "lineno": 228, + "outcome": "failed", + "keywords": [ + "test_chat_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "case_id": "case0" + }, + "setup": { + "duration": 0.016429082956165075, + "outcome": "passed" + }, + "call": { + "duration": 0.3677835420239717, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", + "lineno": 251, + "message": "IndexError: list index out of range" + }, + "traceback": [ + { + "path": "tests/verifications/openai_api/test_chat_completion.py", + "lineno": 251, + "message": "IndexError" + } + ], + "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=True,\n )\n \n # Accumulate partial tool_calls here\n tool_calls_buffer = {}\n current_id = None\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:251: IndexError" + }, + "teardown": { + "duration": 0.001610000035725534, "outcome": "passed" } } - ] + ], + "run_timestamp": 1744328795 }