forked from phoenix-oss/llama-stack-mirror
test(verification): overwrite test result instead of creating new ones (#1934)
# What does this PR do? ## Test Plan (myenv) ➜ llama-stack python tests/verifications/generate_report.py --providers fireworks,together,openai --run-tests
This commit is contained in:
parent
a4cc4b7e31
commit
2fcb70b789
5 changed files with 926 additions and 580 deletions
|
@ -1,6 +1,6 @@
|
||||||
# Test Results Report
|
# Test Results Report
|
||||||
|
|
||||||
*Generated on: 2025-04-09 22:52:19*
|
*Generated on: 2025-04-10 16:48:18*
|
||||||
|
|
||||||
*This report was generated by running `python tests/verifications/generate_report.py`*
|
*This report was generated by running `python tests/verifications/generate_report.py`*
|
||||||
|
|
||||||
|
@ -15,15 +15,15 @@
|
||||||
|
|
||||||
| Provider | Pass Rate | Tests Passed | Total Tests |
|
| Provider | Pass Rate | Tests Passed | Total Tests |
|
||||||
| --- | --- | --- | --- |
|
| --- | --- | --- | --- |
|
||||||
| Together | 67.7% | 21 | 31 |
|
| Together | 64.7% | 22 | 34 |
|
||||||
| Fireworks | 90.3% | 28 | 31 |
|
| Fireworks | 82.4% | 28 | 34 |
|
||||||
| Openai | 100.0% | 22 | 22 |
|
| Openai | 100.0% | 24 | 24 |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Together
|
## Together
|
||||||
|
|
||||||
*Tests run on: 2025-04-09 22:50:58*
|
*Tests run on: 2025-04-10 16:46:35*
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Run all tests for this provider:
|
# Run all tests for this provider:
|
||||||
|
@ -56,10 +56,11 @@ pytest tests/verifications/openai_api/test_chat_completion.py --provider=togethe
|
||||||
| test_chat_streaming_image | ⚪ | ❌ | ❌ |
|
| test_chat_streaming_image | ⚪ | ❌ | ❌ |
|
||||||
| test_chat_streaming_structured_output (calendar) | ✅ | ❌ | ❌ |
|
| test_chat_streaming_structured_output (calendar) | ✅ | ❌ | ❌ |
|
||||||
| test_chat_streaming_structured_output (math) | ✅ | ❌ | ❌ |
|
| test_chat_streaming_structured_output (math) | ✅ | ❌ | ❌ |
|
||||||
|
| test_chat_streaming_tool_calling | ✅ | ❌ | ❌ |
|
||||||
|
|
||||||
## Fireworks
|
## Fireworks
|
||||||
|
|
||||||
*Tests run on: 2025-04-09 22:50:02*
|
*Tests run on: 2025-04-10 16:44:44*
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Run all tests for this provider:
|
# Run all tests for this provider:
|
||||||
|
@ -92,10 +93,11 @@ pytest tests/verifications/openai_api/test_chat_completion.py --provider=firewor
|
||||||
| test_chat_streaming_image | ⚪ | ✅ | ✅ |
|
| test_chat_streaming_image | ⚪ | ✅ | ✅ |
|
||||||
| test_chat_streaming_structured_output (calendar) | ✅ | ✅ | ✅ |
|
| test_chat_streaming_structured_output (calendar) | ✅ | ✅ | ✅ |
|
||||||
| test_chat_streaming_structured_output (math) | ✅ | ✅ | ✅ |
|
| test_chat_streaming_structured_output (math) | ✅ | ✅ | ✅ |
|
||||||
|
| test_chat_streaming_tool_calling | ❌ | ❌ | ❌ |
|
||||||
|
|
||||||
## Openai
|
## Openai
|
||||||
|
|
||||||
*Tests run on: 2025-04-09 22:51:44*
|
*Tests run on: 2025-04-10 16:47:28*
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Run all tests for this provider:
|
# Run all tests for this provider:
|
||||||
|
@ -127,3 +129,4 @@ pytest tests/verifications/openai_api/test_chat_completion.py --provider=openai
|
||||||
| test_chat_streaming_image | ✅ | ✅ |
|
| test_chat_streaming_image | ✅ | ✅ |
|
||||||
| test_chat_streaming_structured_output (calendar) | ✅ | ✅ |
|
| test_chat_streaming_structured_output (calendar) | ✅ | ✅ |
|
||||||
| test_chat_streaming_structured_output (math) | ✅ | ✅ |
|
| test_chat_streaming_structured_output (math) | ✅ | ✅ |
|
||||||
|
| test_chat_streaming_tool_calling | ✅ | ✅ |
|
||||||
|
|
|
@ -77,8 +77,9 @@ def run_tests(provider, keyword=None):
|
||||||
print(f"Running tests for provider: {provider}")
|
print(f"Running tests for provider: {provider}")
|
||||||
|
|
||||||
timestamp = int(time.time())
|
timestamp = int(time.time())
|
||||||
result_file = RESULTS_DIR / f"{provider}_{timestamp}.json"
|
# Use a constant filename for the final result and temp file
|
||||||
temp_json_file = RESULTS_DIR / f"temp_{provider}_{timestamp}.json"
|
result_file = RESULTS_DIR / f"{provider}.json"
|
||||||
|
temp_json_file = RESULTS_DIR / f"temp_{provider}.json"
|
||||||
|
|
||||||
# Determine project root directory relative to this script
|
# Determine project root directory relative to this script
|
||||||
project_root = Path(__file__).parent.parent.parent
|
project_root = Path(__file__).parent.parent.parent
|
||||||
|
@ -106,11 +107,12 @@ def run_tests(provider, keyword=None):
|
||||||
|
|
||||||
# Check if the JSON file was created
|
# Check if the JSON file was created
|
||||||
if temp_json_file.exists():
|
if temp_json_file.exists():
|
||||||
# Read the JSON file and save it to our results format
|
|
||||||
with open(temp_json_file, "r") as f:
|
with open(temp_json_file, "r") as f:
|
||||||
test_results = json.load(f)
|
test_results = json.load(f)
|
||||||
|
|
||||||
# Save results to our own format with a trailing newline
|
test_results["run_timestamp"] = timestamp
|
||||||
|
|
||||||
|
# Save results to the final (overwritten) file
|
||||||
with open(result_file, "w") as f:
|
with open(result_file, "w") as f:
|
||||||
json.dump(test_results, f, indent=2)
|
json.dump(test_results, f, indent=2)
|
||||||
f.write("\n") # Add a trailing newline for precommit
|
f.write("\n") # Add a trailing newline for precommit
|
||||||
|
@ -132,7 +134,7 @@ def run_tests(provider, keyword=None):
|
||||||
|
|
||||||
def parse_results(
|
def parse_results(
|
||||||
result_file,
|
result_file,
|
||||||
) -> Tuple[DefaultDict[str, DefaultDict[str, Dict[str, bool]]], DefaultDict[str, Set[str]], Set[str]]:
|
) -> Tuple[DefaultDict[str, DefaultDict[str, Dict[str, bool]]], DefaultDict[str, Set[str]], Set[str], str]:
|
||||||
"""Parse a single test results file.
|
"""Parse a single test results file.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
|
@ -140,11 +142,12 @@ def parse_results(
|
||||||
- parsed_results: DefaultDict[provider, DefaultDict[model, Dict[test_name, pass_status]]]
|
- parsed_results: DefaultDict[provider, DefaultDict[model, Dict[test_name, pass_status]]]
|
||||||
- providers_in_file: DefaultDict[provider, Set[model]] found in this file.
|
- providers_in_file: DefaultDict[provider, Set[model]] found in this file.
|
||||||
- tests_in_file: Set[test_name] found in this file.
|
- tests_in_file: Set[test_name] found in this file.
|
||||||
|
- run_timestamp: Timestamp when the test was run
|
||||||
"""
|
"""
|
||||||
if not os.path.exists(result_file):
|
if not os.path.exists(result_file):
|
||||||
print(f"Results file does not exist: {result_file}")
|
print(f"Results file does not exist: {result_file}")
|
||||||
# Return empty defaultdicts/set matching the type hint
|
# Return empty defaultdicts/set matching the type hint
|
||||||
return defaultdict(lambda: defaultdict(dict)), defaultdict(set), set()
|
return defaultdict(lambda: defaultdict(dict)), defaultdict(set), set(), ""
|
||||||
|
|
||||||
with open(result_file, "r") as f:
|
with open(result_file, "r") as f:
|
||||||
results = json.load(f)
|
results = json.load(f)
|
||||||
|
@ -153,7 +156,16 @@ def parse_results(
|
||||||
parsed_results: DefaultDict[str, DefaultDict[str, Dict[str, bool]]] = defaultdict(lambda: defaultdict(dict))
|
parsed_results: DefaultDict[str, DefaultDict[str, Dict[str, bool]]] = defaultdict(lambda: defaultdict(dict))
|
||||||
providers_in_file: DefaultDict[str, Set[str]] = defaultdict(set)
|
providers_in_file: DefaultDict[str, Set[str]] = defaultdict(set)
|
||||||
tests_in_file: Set[str] = set()
|
tests_in_file: Set[str] = set()
|
||||||
provider: str = os.path.basename(result_file).split("_")[0]
|
# Extract provider from filename (e.g., "openai.json" -> "openai")
|
||||||
|
provider: str = result_file.stem
|
||||||
|
|
||||||
|
# Extract run timestamp from the JSON data
|
||||||
|
run_timestamp_unix = results.get("run_timestamp")
|
||||||
|
run_timestamp_str = (
|
||||||
|
time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(run_timestamp_unix))
|
||||||
|
if run_timestamp_unix is not None
|
||||||
|
else "Unknown"
|
||||||
|
)
|
||||||
|
|
||||||
# Debug: Print summary of test results
|
# Debug: Print summary of test results
|
||||||
print(f"Test results summary for {provider}:")
|
print(f"Test results summary for {provider}:")
|
||||||
|
@ -167,7 +179,7 @@ def parse_results(
|
||||||
if "tests" not in results or not results["tests"]:
|
if "tests" not in results or not results["tests"]:
|
||||||
print(f"No test results found in {result_file}")
|
print(f"No test results found in {result_file}")
|
||||||
# Return empty defaultdicts/set matching the type hint
|
# Return empty defaultdicts/set matching the type hint
|
||||||
return defaultdict(lambda: defaultdict(dict)), defaultdict(set), set()
|
return defaultdict(lambda: defaultdict(dict)), defaultdict(set), set(), ""
|
||||||
|
|
||||||
# Process the tests
|
# Process the tests
|
||||||
for test in results["tests"]:
|
for test in results["tests"]:
|
||||||
|
@ -225,59 +237,29 @@ def parse_results(
|
||||||
if not parsed_results.get(provider):
|
if not parsed_results.get(provider):
|
||||||
print(f"Warning: No valid test results parsed for provider {provider} from file {result_file}")
|
print(f"Warning: No valid test results parsed for provider {provider} from file {result_file}")
|
||||||
|
|
||||||
return parsed_results, providers_in_file, tests_in_file
|
return parsed_results, providers_in_file, tests_in_file, run_timestamp_str
|
||||||
|
|
||||||
|
|
||||||
def cleanup_old_results(providers_to_clean: Dict[str, Set[str]]):
|
def get_all_result_files_by_provider():
|
||||||
"""Clean up old test result files, keeping only the newest N per provider."""
|
"""Get all test result files, keyed by provider."""
|
||||||
# Use the passed-in providers dictionary
|
|
||||||
for provider in providers_to_clean.keys():
|
|
||||||
# Get all result files for this provider
|
|
||||||
provider_files = list(RESULTS_DIR.glob(f"{provider}_*.json"))
|
|
||||||
|
|
||||||
# Sort by timestamp (newest first)
|
|
||||||
provider_files.sort(key=lambda x: int(x.stem.split("_")[1]), reverse=True)
|
|
||||||
|
|
||||||
# Remove old files beyond the max to keep
|
|
||||||
if len(provider_files) > MAX_RESULTS_PER_PROVIDER:
|
|
||||||
for old_file in provider_files[MAX_RESULTS_PER_PROVIDER:]:
|
|
||||||
try:
|
|
||||||
old_file.unlink()
|
|
||||||
print(f"Removed old result file: {old_file}")
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error removing file {old_file}: {e}")
|
|
||||||
|
|
||||||
|
|
||||||
def get_latest_results_by_provider():
|
|
||||||
"""Get the latest test result file for each provider"""
|
|
||||||
provider_results = {}
|
provider_results = {}
|
||||||
|
|
||||||
# Get all result files
|
|
||||||
result_files = list(RESULTS_DIR.glob("*.json"))
|
result_files = list(RESULTS_DIR.glob("*.json"))
|
||||||
|
|
||||||
# Extract all provider names from filenames
|
|
||||||
all_providers = set()
|
|
||||||
for file in result_files:
|
for file in result_files:
|
||||||
# File format is provider_timestamp.json
|
provider = file.stem
|
||||||
parts = file.stem.split("_")
|
if provider:
|
||||||
if len(parts) >= 2:
|
provider_results[provider] = file
|
||||||
all_providers.add(parts[0])
|
|
||||||
|
|
||||||
# Group by provider
|
|
||||||
for provider in all_providers:
|
|
||||||
provider_files = [f for f in result_files if f.name.startswith(f"{provider}_")]
|
|
||||||
|
|
||||||
# Sort by timestamp (newest first)
|
|
||||||
provider_files.sort(key=lambda x: int(x.stem.split("_")[1]), reverse=True)
|
|
||||||
|
|
||||||
if provider_files:
|
|
||||||
provider_results[provider] = provider_files[0]
|
|
||||||
|
|
||||||
return provider_results
|
return provider_results
|
||||||
|
|
||||||
|
|
||||||
def generate_report(
|
def generate_report(
|
||||||
results_dict: Dict[str, Any], providers: Dict[str, Set[str]], all_tests: Set[str], output_file=None
|
results_dict: Dict[str, Any],
|
||||||
|
providers: Dict[str, Set[str]],
|
||||||
|
all_tests: Set[str],
|
||||||
|
provider_timestamps: Dict[str, str],
|
||||||
|
output_file=None,
|
||||||
):
|
):
|
||||||
"""Generate the markdown report.
|
"""Generate the markdown report.
|
||||||
|
|
||||||
|
@ -285,6 +267,7 @@ def generate_report(
|
||||||
results_dict: Aggregated results [provider][model][test_name] -> status.
|
results_dict: Aggregated results [provider][model][test_name] -> status.
|
||||||
providers: Dict of all providers and their models {provider: {models}}.
|
providers: Dict of all providers and their models {provider: {models}}.
|
||||||
all_tests: Set of all test names found.
|
all_tests: Set of all test names found.
|
||||||
|
provider_timestamps: Dict of provider to timestamp when tests were run
|
||||||
output_file: Optional path to save the report.
|
output_file: Optional path to save the report.
|
||||||
"""
|
"""
|
||||||
if output_file is None:
|
if output_file is None:
|
||||||
|
@ -293,19 +276,6 @@ def generate_report(
|
||||||
else:
|
else:
|
||||||
output_file = Path(output_file)
|
output_file = Path(output_file)
|
||||||
|
|
||||||
# Get the timestamp from result files
|
|
||||||
provider_timestamps = {}
|
|
||||||
provider_results_files = get_latest_results_by_provider()
|
|
||||||
for provider, result_file in provider_results_files.items():
|
|
||||||
# Extract timestamp from filename (format: provider_timestamp.json)
|
|
||||||
try:
|
|
||||||
timestamp_str = result_file.stem.split("_")[1]
|
|
||||||
timestamp = int(timestamp_str)
|
|
||||||
formatted_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(timestamp))
|
|
||||||
provider_timestamps[provider] = formatted_time
|
|
||||||
except (IndexError, ValueError):
|
|
||||||
provider_timestamps[provider] = "Unknown"
|
|
||||||
|
|
||||||
# Convert provider model sets to sorted lists (use passed-in providers dict)
|
# Convert provider model sets to sorted lists (use passed-in providers dict)
|
||||||
providers_sorted = {prov: sorted(models) for prov, models in providers.items()}
|
providers_sorted = {prov: sorted(models) for prov, models in providers.items()}
|
||||||
|
|
||||||
|
@ -416,7 +386,7 @@ def generate_report(
|
||||||
else:
|
else:
|
||||||
example_base_test_name = first_test_name
|
example_base_test_name = first_test_name
|
||||||
|
|
||||||
base_name = base_test_name_map.get(test, test) # Get base name
|
base_name = base_test_name_map.get(first_test_name, first_test_name) # Get base name
|
||||||
case_count = base_test_case_counts.get(base_name, 1) # Get count
|
case_count = base_test_case_counts.get(base_name, 1) # Get count
|
||||||
filter_str = f"{example_base_test_name} and {example_case_id}" if case_count > 1 else example_base_test_name
|
filter_str = f"{example_base_test_name} and {example_case_id}" if case_count > 1 else example_base_test_name
|
||||||
|
|
||||||
|
@ -491,6 +461,7 @@ def main():
|
||||||
# Initialize collections to aggregate results in main
|
# Initialize collections to aggregate results in main
|
||||||
aggregated_providers = defaultdict(set)
|
aggregated_providers = defaultdict(set)
|
||||||
aggregated_tests = set()
|
aggregated_tests = set()
|
||||||
|
provider_timestamps = {}
|
||||||
|
|
||||||
if args.run_tests:
|
if args.run_tests:
|
||||||
# Get list of available providers from command line or use detected providers
|
# Get list of available providers from command line or use detected providers
|
||||||
|
@ -512,28 +483,28 @@ def main():
|
||||||
result_file = run_tests(provider, keyword=args.k)
|
result_file = run_tests(provider, keyword=args.k)
|
||||||
if result_file:
|
if result_file:
|
||||||
# Parse and aggregate results
|
# Parse and aggregate results
|
||||||
parsed_results, providers_in_file, tests_in_file = parse_results(result_file)
|
parsed_results, providers_in_file, tests_in_file, run_timestamp = parse_results(result_file)
|
||||||
all_results.update(parsed_results)
|
all_results.update(parsed_results)
|
||||||
for prov, models in providers_in_file.items():
|
for prov, models in providers_in_file.items():
|
||||||
aggregated_providers[prov].update(models)
|
aggregated_providers[prov].update(models)
|
||||||
|
if run_timestamp:
|
||||||
|
provider_timestamps[prov] = run_timestamp
|
||||||
aggregated_tests.update(tests_in_file)
|
aggregated_tests.update(tests_in_file)
|
||||||
else:
|
else:
|
||||||
# Use existing results
|
# Use existing results
|
||||||
provider_result_files = get_latest_results_by_provider()
|
provider_result_files = get_all_result_files_by_provider()
|
||||||
|
|
||||||
for result_file in provider_result_files.values():
|
for result_file in provider_result_files.values():
|
||||||
# Parse and aggregate results
|
# Parse and aggregate results
|
||||||
parsed_results, providers_in_file, tests_in_file = parse_results(result_file)
|
parsed_results, providers_in_file, tests_in_file, run_timestamp = parse_results(result_file)
|
||||||
all_results.update(parsed_results)
|
all_results.update(parsed_results)
|
||||||
for prov, models in providers_in_file.items():
|
for prov, models in providers_in_file.items():
|
||||||
aggregated_providers[prov].update(models)
|
aggregated_providers[prov].update(models)
|
||||||
|
if run_timestamp:
|
||||||
|
provider_timestamps[prov] = run_timestamp
|
||||||
aggregated_tests.update(tests_in_file)
|
aggregated_tests.update(tests_in_file)
|
||||||
|
|
||||||
# Generate the report, passing aggregated data
|
generate_report(all_results, aggregated_providers, aggregated_tests, provider_timestamps, args.output)
|
||||||
generate_report(all_results, aggregated_providers, aggregated_tests, args.output)
|
|
||||||
|
|
||||||
# Cleanup, passing aggregated providers
|
|
||||||
cleanup_old_results(aggregated_providers)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,13 +1,13 @@
|
||||||
{
|
{
|
||||||
"created": 1744264338.9923031,
|
"created": 1744328898.0248861,
|
||||||
"duration": 32.825536012649536,
|
"duration": 47.561042070388794,
|
||||||
"exitcode": 0,
|
"exitcode": 0,
|
||||||
"root": "/Users/erichuang/projects/llama-stack",
|
"root": "/Users/erichuang/projects/llama-stack",
|
||||||
"environment": {},
|
"environment": {},
|
||||||
"summary": {
|
"summary": {
|
||||||
"passed": 22,
|
"passed": 24,
|
||||||
"total": 22,
|
"total": 24,
|
||||||
"collected": 22
|
"collected": 24
|
||||||
},
|
},
|
||||||
"collectors": [
|
"collectors": [
|
||||||
{
|
{
|
||||||
|
@ -27,112 +27,122 @@
|
||||||
{
|
{
|
||||||
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-earth]",
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-earth]",
|
||||||
"type": "Function",
|
"type": "Function",
|
||||||
"lineno": 72
|
"lineno": 73
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-saturn]",
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-saturn]",
|
||||||
"type": "Function",
|
"type": "Function",
|
||||||
"lineno": 72
|
"lineno": 73
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-mini-earth]",
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-mini-earth]",
|
||||||
"type": "Function",
|
"type": "Function",
|
||||||
"lineno": 72
|
"lineno": 73
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-mini-saturn]",
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-mini-saturn]",
|
||||||
"type": "Function",
|
"type": "Function",
|
||||||
"lineno": 72
|
"lineno": 73
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-earth]",
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-earth]",
|
||||||
"type": "Function",
|
"type": "Function",
|
||||||
"lineno": 91
|
"lineno": 92
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-saturn]",
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-saturn]",
|
||||||
"type": "Function",
|
"type": "Function",
|
||||||
"lineno": 91
|
"lineno": 92
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-mini-earth]",
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-mini-earth]",
|
||||||
"type": "Function",
|
"type": "Function",
|
||||||
"lineno": 91
|
"lineno": 92
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-mini-saturn]",
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-mini-saturn]",
|
||||||
"type": "Function",
|
"type": "Function",
|
||||||
"lineno": 91
|
"lineno": 92
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[gpt-4o-case0]",
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[gpt-4o-case0]",
|
||||||
"type": "Function",
|
"type": "Function",
|
||||||
"lineno": 115
|
"lineno": 116
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[gpt-4o-mini-case0]",
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[gpt-4o-mini-case0]",
|
||||||
"type": "Function",
|
"type": "Function",
|
||||||
"lineno": 115
|
"lineno": 116
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[gpt-4o-case0]",
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[gpt-4o-case0]",
|
||||||
"type": "Function",
|
"type": "Function",
|
||||||
"lineno": 134
|
"lineno": 135
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[gpt-4o-mini-case0]",
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[gpt-4o-mini-case0]",
|
||||||
"type": "Function",
|
"type": "Function",
|
||||||
"lineno": 134
|
"lineno": 135
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-calendar]",
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-calendar]",
|
||||||
"type": "Function",
|
"type": "Function",
|
||||||
"lineno": 158
|
"lineno": 159
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-math]",
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-math]",
|
||||||
"type": "Function",
|
"type": "Function",
|
||||||
"lineno": 158
|
"lineno": 159
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-mini-calendar]",
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-mini-calendar]",
|
||||||
"type": "Function",
|
"type": "Function",
|
||||||
"lineno": 158
|
"lineno": 159
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-mini-math]",
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-mini-math]",
|
||||||
"type": "Function",
|
"type": "Function",
|
||||||
"lineno": 158
|
"lineno": 159
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-calendar]",
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-calendar]",
|
||||||
"type": "Function",
|
"type": "Function",
|
||||||
"lineno": 181
|
"lineno": 182
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-math]",
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-math]",
|
||||||
"type": "Function",
|
"type": "Function",
|
||||||
"lineno": 181
|
"lineno": 182
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-mini-calendar]",
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-mini-calendar]",
|
||||||
"type": "Function",
|
"type": "Function",
|
||||||
"lineno": 181
|
"lineno": 182
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-mini-math]",
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-mini-math]",
|
||||||
"type": "Function",
|
"type": "Function",
|
||||||
"lineno": 181
|
"lineno": 182
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[gpt-4o-case0]",
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[gpt-4o-case0]",
|
||||||
"type": "Function",
|
"type": "Function",
|
||||||
"lineno": 203
|
"lineno": 204
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[gpt-4o-mini-case0]",
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[gpt-4o-mini-case0]",
|
||||||
"type": "Function",
|
"type": "Function",
|
||||||
"lineno": 203
|
"lineno": 204
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[gpt-4o-case0]",
|
||||||
|
"type": "Function",
|
||||||
|
"lineno": 228
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[gpt-4o-mini-case0]",
|
||||||
|
"type": "Function",
|
||||||
|
"lineno": 228
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
@ -140,7 +150,7 @@
|
||||||
"tests": [
|
"tests": [
|
||||||
{
|
{
|
||||||
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-earth]",
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-earth]",
|
||||||
"lineno": 72,
|
"lineno": 73,
|
||||||
"outcome": "passed",
|
"outcome": "passed",
|
||||||
"keywords": [
|
"keywords": [
|
||||||
"test_chat_non_streaming_basic[gpt-4o-earth]",
|
"test_chat_non_streaming_basic[gpt-4o-earth]",
|
||||||
|
@ -159,21 +169,21 @@
|
||||||
"case_id": "earth"
|
"case_id": "earth"
|
||||||
},
|
},
|
||||||
"setup": {
|
"setup": {
|
||||||
"duration": 0.05381445901002735,
|
"duration": 0.0694252080284059,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
},
|
},
|
||||||
"call": {
|
"call": {
|
||||||
"duration": 0.49848275003023446,
|
"duration": 0.5709165419684723,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
},
|
},
|
||||||
"teardown": {
|
"teardown": {
|
||||||
"duration": 0.00018287496641278267,
|
"duration": 0.0007626248989254236,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-saturn]",
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-saturn]",
|
||||||
"lineno": 72,
|
"lineno": 73,
|
||||||
"outcome": "passed",
|
"outcome": "passed",
|
||||||
"keywords": [
|
"keywords": [
|
||||||
"test_chat_non_streaming_basic[gpt-4o-saturn]",
|
"test_chat_non_streaming_basic[gpt-4o-saturn]",
|
||||||
|
@ -192,21 +202,21 @@
|
||||||
"case_id": "saturn"
|
"case_id": "saturn"
|
||||||
},
|
},
|
||||||
"setup": {
|
"setup": {
|
||||||
"duration": 0.007965500000864267,
|
"duration": 0.010281750001013279,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
},
|
},
|
||||||
"call": {
|
"call": {
|
||||||
"duration": 0.9293275829404593,
|
"duration": 0.6309260830748826,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
},
|
},
|
||||||
"teardown": {
|
"teardown": {
|
||||||
"duration": 0.00018229195848107338,
|
"duration": 0.0001824579667299986,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-mini-earth]",
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-mini-earth]",
|
||||||
"lineno": 72,
|
"lineno": 73,
|
||||||
"outcome": "passed",
|
"outcome": "passed",
|
||||||
"keywords": [
|
"keywords": [
|
||||||
"test_chat_non_streaming_basic[gpt-4o-mini-earth]",
|
"test_chat_non_streaming_basic[gpt-4o-mini-earth]",
|
||||||
|
@ -225,21 +235,21 @@
|
||||||
"case_id": "earth"
|
"case_id": "earth"
|
||||||
},
|
},
|
||||||
"setup": {
|
"setup": {
|
||||||
"duration": 0.00875679193995893,
|
"duration": 0.007922374992631376,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
},
|
},
|
||||||
"call": {
|
"call": {
|
||||||
"duration": 0.5793640419142321,
|
"duration": 0.31756504194345325,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
},
|
},
|
||||||
"teardown": {
|
"teardown": {
|
||||||
"duration": 0.0005307920509949327,
|
"duration": 0.0005268750246614218,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-mini-saturn]",
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-mini-saturn]",
|
||||||
"lineno": 72,
|
"lineno": 73,
|
||||||
"outcome": "passed",
|
"outcome": "passed",
|
||||||
"keywords": [
|
"keywords": [
|
||||||
"test_chat_non_streaming_basic[gpt-4o-mini-saturn]",
|
"test_chat_non_streaming_basic[gpt-4o-mini-saturn]",
|
||||||
|
@ -258,21 +268,21 @@
|
||||||
"case_id": "saturn"
|
"case_id": "saturn"
|
||||||
},
|
},
|
||||||
"setup": {
|
"setup": {
|
||||||
"duration": 0.01076845801435411,
|
"duration": 0.01643404201604426,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
},
|
},
|
||||||
"call": {
|
"call": {
|
||||||
"duration": 0.8752291660057381,
|
"duration": 0.7479908330133185,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
},
|
},
|
||||||
"teardown": {
|
"teardown": {
|
||||||
"duration": 0.0004834589781239629,
|
"duration": 0.0004037501057609916,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-earth]",
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-earth]",
|
||||||
"lineno": 91,
|
"lineno": 92,
|
||||||
"outcome": "passed",
|
"outcome": "passed",
|
||||||
"keywords": [
|
"keywords": [
|
||||||
"test_chat_streaming_basic[gpt-4o-earth]",
|
"test_chat_streaming_basic[gpt-4o-earth]",
|
||||||
|
@ -291,21 +301,21 @@
|
||||||
"case_id": "earth"
|
"case_id": "earth"
|
||||||
},
|
},
|
||||||
"setup": {
|
"setup": {
|
||||||
"duration": 0.01662245800253004,
|
"duration": 0.021671707974746823,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
},
|
},
|
||||||
"call": {
|
"call": {
|
||||||
"duration": 0.8336971249664202,
|
"duration": 0.6701172919711098,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
},
|
},
|
||||||
"teardown": {
|
"teardown": {
|
||||||
"duration": 0.0024086670018732548,
|
"duration": 0.0005569590721279383,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-saturn]",
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-saturn]",
|
||||||
"lineno": 91,
|
"lineno": 92,
|
||||||
"outcome": "passed",
|
"outcome": "passed",
|
||||||
"keywords": [
|
"keywords": [
|
||||||
"test_chat_streaming_basic[gpt-4o-saturn]",
|
"test_chat_streaming_basic[gpt-4o-saturn]",
|
||||||
|
@ -324,21 +334,21 @@
|
||||||
"case_id": "saturn"
|
"case_id": "saturn"
|
||||||
},
|
},
|
||||||
"setup": {
|
"setup": {
|
||||||
"duration": 0.009416291955858469,
|
"duration": 0.015847125090658665,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
},
|
},
|
||||||
"call": {
|
"call": {
|
||||||
"duration": 0.43594495789147913,
|
"duration": 0.636536999954842,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
},
|
},
|
||||||
"teardown": {
|
"teardown": {
|
||||||
"duration": 0.0009131249971687794,
|
"duration": 0.00029395800083875656,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-mini-earth]",
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-mini-earth]",
|
||||||
"lineno": 91,
|
"lineno": 92,
|
||||||
"outcome": "passed",
|
"outcome": "passed",
|
||||||
"keywords": [
|
"keywords": [
|
||||||
"test_chat_streaming_basic[gpt-4o-mini-earth]",
|
"test_chat_streaming_basic[gpt-4o-mini-earth]",
|
||||||
|
@ -357,21 +367,21 @@
|
||||||
"case_id": "earth"
|
"case_id": "earth"
|
||||||
},
|
},
|
||||||
"setup": {
|
"setup": {
|
||||||
"duration": 0.013155042077414691,
|
"duration": 0.011792832985520363,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
},
|
},
|
||||||
"call": {
|
"call": {
|
||||||
"duration": 0.6119836670113727,
|
"duration": 0.5610962919890881,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
},
|
},
|
||||||
"teardown": {
|
"teardown": {
|
||||||
"duration": 0.00023804197553545237,
|
"duration": 0.0003578749019652605,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-mini-saturn]",
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-mini-saturn]",
|
||||||
"lineno": 91,
|
"lineno": 92,
|
||||||
"outcome": "passed",
|
"outcome": "passed",
|
||||||
"keywords": [
|
"keywords": [
|
||||||
"test_chat_streaming_basic[gpt-4o-mini-saturn]",
|
"test_chat_streaming_basic[gpt-4o-mini-saturn]",
|
||||||
|
@ -390,21 +400,21 @@
|
||||||
"case_id": "saturn"
|
"case_id": "saturn"
|
||||||
},
|
},
|
||||||
"setup": {
|
"setup": {
|
||||||
"duration": 0.009004916995763779,
|
"duration": 0.016500207944773138,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
},
|
},
|
||||||
"call": {
|
"call": {
|
||||||
"duration": 0.8327413749648258,
|
"duration": 0.8060244580265135,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
},
|
},
|
||||||
"teardown": {
|
"teardown": {
|
||||||
"duration": 0.00046841695439070463,
|
"duration": 0.0005296670133247972,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[gpt-4o-case0]",
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[gpt-4o-case0]",
|
||||||
"lineno": 115,
|
"lineno": 116,
|
||||||
"outcome": "passed",
|
"outcome": "passed",
|
||||||
"keywords": [
|
"keywords": [
|
||||||
"test_chat_non_streaming_image[gpt-4o-case0]",
|
"test_chat_non_streaming_image[gpt-4o-case0]",
|
||||||
|
@ -423,21 +433,21 @@
|
||||||
"case_id": "case0"
|
"case_id": "case0"
|
||||||
},
|
},
|
||||||
"setup": {
|
"setup": {
|
||||||
"duration": 0.009574208059348166,
|
"duration": 0.008338792016729712,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
},
|
},
|
||||||
"call": {
|
"call": {
|
||||||
"duration": 2.221839000005275,
|
"duration": 7.009252917021513,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
},
|
},
|
||||||
"teardown": {
|
"teardown": {
|
||||||
"duration": 0.00015945907216519117,
|
"duration": 0.0003042910248041153,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[gpt-4o-mini-case0]",
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[gpt-4o-mini-case0]",
|
||||||
"lineno": 115,
|
"lineno": 116,
|
||||||
"outcome": "passed",
|
"outcome": "passed",
|
||||||
"keywords": [
|
"keywords": [
|
||||||
"test_chat_non_streaming_image[gpt-4o-mini-case0]",
|
"test_chat_non_streaming_image[gpt-4o-mini-case0]",
|
||||||
|
@ -456,21 +466,21 @@
|
||||||
"case_id": "case0"
|
"case_id": "case0"
|
||||||
},
|
},
|
||||||
"setup": {
|
"setup": {
|
||||||
"duration": 0.0084402080392465,
|
"duration": 0.007238540914840996,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
},
|
},
|
||||||
"call": {
|
"call": {
|
||||||
"duration": 2.298736457945779,
|
"duration": 3.134693874977529,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
},
|
},
|
||||||
"teardown": {
|
"teardown": {
|
||||||
"duration": 0.0002423750702291727,
|
"duration": 0.0003104590578004718,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[gpt-4o-case0]",
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[gpt-4o-case0]",
|
||||||
"lineno": 134,
|
"lineno": 135,
|
||||||
"outcome": "passed",
|
"outcome": "passed",
|
||||||
"keywords": [
|
"keywords": [
|
||||||
"test_chat_streaming_image[gpt-4o-case0]",
|
"test_chat_streaming_image[gpt-4o-case0]",
|
||||||
|
@ -489,21 +499,21 @@
|
||||||
"case_id": "case0"
|
"case_id": "case0"
|
||||||
},
|
},
|
||||||
"setup": {
|
"setup": {
|
||||||
"duration": 0.007330416003242135,
|
"duration": 0.0161851670127362,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
},
|
},
|
||||||
"call": {
|
"call": {
|
||||||
"duration": 4.062959833070636,
|
"duration": 3.0745719589758664,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
},
|
},
|
||||||
"teardown": {
|
"teardown": {
|
||||||
"duration": 0.00015470804646611214,
|
"duration": 0.00022620800882577896,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[gpt-4o-mini-case0]",
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[gpt-4o-mini-case0]",
|
||||||
"lineno": 134,
|
"lineno": 135,
|
||||||
"outcome": "passed",
|
"outcome": "passed",
|
||||||
"keywords": [
|
"keywords": [
|
||||||
"test_chat_streaming_image[gpt-4o-mini-case0]",
|
"test_chat_streaming_image[gpt-4o-mini-case0]",
|
||||||
|
@ -522,21 +532,21 @@
|
||||||
"case_id": "case0"
|
"case_id": "case0"
|
||||||
},
|
},
|
||||||
"setup": {
|
"setup": {
|
||||||
"duration": 0.019998832955025136,
|
"duration": 0.013220708002336323,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
},
|
},
|
||||||
"call": {
|
"call": {
|
||||||
"duration": 2.609432084020227,
|
"duration": 3.624867417034693,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
},
|
},
|
||||||
"teardown": {
|
"teardown": {
|
||||||
"duration": 0.005618917057290673,
|
"duration": 0.00020633300300687551,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-calendar]",
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-calendar]",
|
||||||
"lineno": 158,
|
"lineno": 159,
|
||||||
"outcome": "passed",
|
"outcome": "passed",
|
||||||
"keywords": [
|
"keywords": [
|
||||||
"test_chat_non_streaming_structured_output[gpt-4o-calendar]",
|
"test_chat_non_streaming_structured_output[gpt-4o-calendar]",
|
||||||
|
@ -555,21 +565,21 @@
|
||||||
"case_id": "calendar"
|
"case_id": "calendar"
|
||||||
},
|
},
|
||||||
"setup": {
|
"setup": {
|
||||||
"duration": 0.00867662497330457,
|
"duration": 0.017596833989955485,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
},
|
},
|
||||||
"call": {
|
"call": {
|
||||||
"duration": 0.6856697499752045,
|
"duration": 1.248568250099197,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
},
|
},
|
||||||
"teardown": {
|
"teardown": {
|
||||||
"duration": 0.00018445902969688177,
|
"duration": 0.0004248750628903508,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-math]",
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-math]",
|
||||||
"lineno": 158,
|
"lineno": 159,
|
||||||
"outcome": "passed",
|
"outcome": "passed",
|
||||||
"keywords": [
|
"keywords": [
|
||||||
"test_chat_non_streaming_structured_output[gpt-4o-math]",
|
"test_chat_non_streaming_structured_output[gpt-4o-math]",
|
||||||
|
@ -588,21 +598,21 @@
|
||||||
"case_id": "math"
|
"case_id": "math"
|
||||||
},
|
},
|
||||||
"setup": {
|
"setup": {
|
||||||
"duration": 0.01139050000347197,
|
"duration": 0.01512012502644211,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
},
|
},
|
||||||
"call": {
|
"call": {
|
||||||
"duration": 2.764390083961189,
|
"duration": 8.170285542029887,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
},
|
},
|
||||||
"teardown": {
|
"teardown": {
|
||||||
"duration": 0.0003164170775562525,
|
"duration": 0.00043537491001188755,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-mini-calendar]",
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-mini-calendar]",
|
||||||
"lineno": 158,
|
"lineno": 159,
|
||||||
"outcome": "passed",
|
"outcome": "passed",
|
||||||
"keywords": [
|
"keywords": [
|
||||||
"test_chat_non_streaming_structured_output[gpt-4o-mini-calendar]",
|
"test_chat_non_streaming_structured_output[gpt-4o-mini-calendar]",
|
||||||
|
@ -621,21 +631,21 @@
|
||||||
"case_id": "calendar"
|
"case_id": "calendar"
|
||||||
},
|
},
|
||||||
"setup": {
|
"setup": {
|
||||||
"duration": 0.01321374997496605,
|
"duration": 0.010376665974035859,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
},
|
},
|
||||||
"call": {
|
"call": {
|
||||||
"duration": 0.8284227909753099,
|
"duration": 0.756480542011559,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
},
|
},
|
||||||
"teardown": {
|
"teardown": {
|
||||||
"duration": 0.00030170800164341927,
|
"duration": 0.00025695806834846735,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-mini-math]",
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-mini-math]",
|
||||||
"lineno": 158,
|
"lineno": 159,
|
||||||
"outcome": "passed",
|
"outcome": "passed",
|
||||||
"keywords": [
|
"keywords": [
|
||||||
"test_chat_non_streaming_structured_output[gpt-4o-mini-math]",
|
"test_chat_non_streaming_structured_output[gpt-4o-mini-math]",
|
||||||
|
@ -654,21 +664,21 @@
|
||||||
"case_id": "math"
|
"case_id": "math"
|
||||||
},
|
},
|
||||||
"setup": {
|
"setup": {
|
||||||
"duration": 0.013477458036504686,
|
"duration": 0.006846625008620322,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
},
|
},
|
||||||
"call": {
|
"call": {
|
||||||
"duration": 2.4146235829684883,
|
"duration": 2.6833953330060467,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
},
|
},
|
||||||
"teardown": {
|
"teardown": {
|
||||||
"duration": 0.00025754200760275126,
|
"duration": 0.00022558309137821198,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-calendar]",
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-calendar]",
|
||||||
"lineno": 181,
|
"lineno": 182,
|
||||||
"outcome": "passed",
|
"outcome": "passed",
|
||||||
"keywords": [
|
"keywords": [
|
||||||
"test_chat_streaming_structured_output[gpt-4o-calendar]",
|
"test_chat_streaming_structured_output[gpt-4o-calendar]",
|
||||||
|
@ -687,21 +697,21 @@
|
||||||
"case_id": "calendar"
|
"case_id": "calendar"
|
||||||
},
|
},
|
||||||
"setup": {
|
"setup": {
|
||||||
"duration": 0.006940583931282163,
|
"duration": 0.009646040969528258,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
},
|
},
|
||||||
"call": {
|
"call": {
|
||||||
"duration": 0.5102092920569703,
|
"duration": 0.6117532079806551,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
},
|
},
|
||||||
"teardown": {
|
"teardown": {
|
||||||
"duration": 0.00023379107005894184,
|
"duration": 0.00015258300118148327,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-math]",
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-math]",
|
||||||
"lineno": 181,
|
"lineno": 182,
|
||||||
"outcome": "passed",
|
"outcome": "passed",
|
||||||
"keywords": [
|
"keywords": [
|
||||||
"test_chat_streaming_structured_output[gpt-4o-math]",
|
"test_chat_streaming_structured_output[gpt-4o-math]",
|
||||||
|
@ -720,21 +730,21 @@
|
||||||
"case_id": "math"
|
"case_id": "math"
|
||||||
},
|
},
|
||||||
"setup": {
|
"setup": {
|
||||||
"duration": 0.007166999974288046,
|
"duration": 0.012024458032101393,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
},
|
},
|
||||||
"call": {
|
"call": {
|
||||||
"duration": 3.5751801669830456,
|
"duration": 4.522625041077845,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
},
|
},
|
||||||
"teardown": {
|
"teardown": {
|
||||||
"duration": 0.00015041697770357132,
|
"duration": 0.0004230838967487216,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-mini-calendar]",
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-mini-calendar]",
|
||||||
"lineno": 181,
|
"lineno": 182,
|
||||||
"outcome": "passed",
|
"outcome": "passed",
|
||||||
"keywords": [
|
"keywords": [
|
||||||
"test_chat_streaming_structured_output[gpt-4o-mini-calendar]",
|
"test_chat_streaming_structured_output[gpt-4o-mini-calendar]",
|
||||||
|
@ -753,21 +763,21 @@
|
||||||
"case_id": "calendar"
|
"case_id": "calendar"
|
||||||
},
|
},
|
||||||
"setup": {
|
"setup": {
|
||||||
"duration": 0.010652625001966953,
|
"duration": 0.009566582972183824,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
},
|
},
|
||||||
"call": {
|
"call": {
|
||||||
"duration": 0.6648182499920949,
|
"duration": 2.5591942919418216,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
},
|
},
|
||||||
"teardown": {
|
"teardown": {
|
||||||
"duration": 0.0008647920330986381,
|
"duration": 0.0007555419579148293,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-mini-math]",
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-mini-math]",
|
||||||
"lineno": 181,
|
"lineno": 182,
|
||||||
"outcome": "passed",
|
"outcome": "passed",
|
||||||
"keywords": [
|
"keywords": [
|
||||||
"test_chat_streaming_structured_output[gpt-4o-mini-math]",
|
"test_chat_streaming_structured_output[gpt-4o-mini-math]",
|
||||||
|
@ -786,21 +796,21 @@
|
||||||
"case_id": "math"
|
"case_id": "math"
|
||||||
},
|
},
|
||||||
"setup": {
|
"setup": {
|
||||||
"duration": 0.007372208056040108,
|
"duration": 0.010828875005245209,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
},
|
},
|
||||||
"call": {
|
"call": {
|
||||||
"duration": 2.80747462506406,
|
"duration": 2.495122667052783,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
},
|
},
|
||||||
"teardown": {
|
"teardown": {
|
||||||
"duration": 0.00028124998789280653,
|
"duration": 0.0002802090020850301,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[gpt-4o-case0]",
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[gpt-4o-case0]",
|
||||||
"lineno": 203,
|
"lineno": 204,
|
||||||
"outcome": "passed",
|
"outcome": "passed",
|
||||||
"keywords": [
|
"keywords": [
|
||||||
"test_chat_non_streaming_tool_calling[gpt-4o-case0]",
|
"test_chat_non_streaming_tool_calling[gpt-4o-case0]",
|
||||||
|
@ -819,21 +829,21 @@
|
||||||
"case_id": "case0"
|
"case_id": "case0"
|
||||||
},
|
},
|
||||||
"setup": {
|
"setup": {
|
||||||
"duration": 0.01625587500166148,
|
"duration": 0.012762792059220374,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
},
|
},
|
||||||
"call": {
|
"call": {
|
||||||
"duration": 0.6878769160248339,
|
"duration": 0.5655921660363674,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
},
|
},
|
||||||
"teardown": {
|
"teardown": {
|
||||||
"duration": 0.0002637499710544944,
|
"duration": 0.00022304197773337364,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[gpt-4o-mini-case0]",
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[gpt-4o-mini-case0]",
|
||||||
"lineno": 203,
|
"lineno": 204,
|
||||||
"outcome": "passed",
|
"outcome": "passed",
|
||||||
"keywords": [
|
"keywords": [
|
||||||
"test_chat_non_streaming_tool_calling[gpt-4o-mini-case0]",
|
"test_chat_non_streaming_tool_calling[gpt-4o-mini-case0]",
|
||||||
|
@ -852,17 +862,84 @@
|
||||||
"case_id": "case0"
|
"case_id": "case0"
|
||||||
},
|
},
|
||||||
"setup": {
|
"setup": {
|
||||||
"duration": 0.008817250025458634,
|
"duration": 0.03188708401285112,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
},
|
},
|
||||||
"call": {
|
"call": {
|
||||||
"duration": 0.7181202919455245,
|
"duration": 0.6159415419679135,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
},
|
},
|
||||||
"teardown": {
|
"teardown": {
|
||||||
"duration": 0.0017147079342976213,
|
"duration": 0.0005549580091610551,
|
||||||
|
"outcome": "passed"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[gpt-4o-case0]",
|
||||||
|
"lineno": 228,
|
||||||
|
"outcome": "passed",
|
||||||
|
"keywords": [
|
||||||
|
"test_chat_streaming_tool_calling[gpt-4o-case0]",
|
||||||
|
"parametrize",
|
||||||
|
"pytestmark",
|
||||||
|
"gpt-4o-case0",
|
||||||
|
"test_chat_completion.py",
|
||||||
|
"openai_api",
|
||||||
|
"verifications",
|
||||||
|
"tests",
|
||||||
|
"llama-stack",
|
||||||
|
""
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"model": "gpt-4o",
|
||||||
|
"case_id": "case0"
|
||||||
|
},
|
||||||
|
"setup": {
|
||||||
|
"duration": 0.014768208027817309,
|
||||||
|
"outcome": "passed"
|
||||||
|
},
|
||||||
|
"call": {
|
||||||
|
"duration": 0.47373537498060614,
|
||||||
|
"outcome": "passed"
|
||||||
|
},
|
||||||
|
"teardown": {
|
||||||
|
"duration": 0.0005811670562252402,
|
||||||
|
"outcome": "passed"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[gpt-4o-mini-case0]",
|
||||||
|
"lineno": 228,
|
||||||
|
"outcome": "passed",
|
||||||
|
"keywords": [
|
||||||
|
"test_chat_streaming_tool_calling[gpt-4o-mini-case0]",
|
||||||
|
"parametrize",
|
||||||
|
"pytestmark",
|
||||||
|
"gpt-4o-mini-case0",
|
||||||
|
"test_chat_completion.py",
|
||||||
|
"openai_api",
|
||||||
|
"verifications",
|
||||||
|
"tests",
|
||||||
|
"llama-stack",
|
||||||
|
""
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"model": "gpt-4o-mini",
|
||||||
|
"case_id": "case0"
|
||||||
|
},
|
||||||
|
"setup": {
|
||||||
|
"duration": 0.010271625011228025,
|
||||||
|
"outcome": "passed"
|
||||||
|
},
|
||||||
|
"call": {
|
||||||
|
"duration": 0.5656027499353513,
|
||||||
|
"outcome": "passed"
|
||||||
|
},
|
||||||
|
"teardown": {
|
||||||
|
"duration": 0.0025699170073494315,
|
||||||
"outcome": "passed"
|
"outcome": "passed"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
],
|
||||||
|
"run_timestamp": 1744328848
|
||||||
}
|
}
|
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue