From 1f2df59ecee2070e49053173d57b1ee44a5f049e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Han?= Date: Thu, 10 Apr 2025 18:37:48 +0200 Subject: [PATCH 01/10] docs: fix model name (#1926) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # What does this PR do? Use llama3.2:3b for consistency. Signed-off-by: Sébastien Han --- docs/source/getting_started/index.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/source/getting_started/index.md b/docs/source/getting_started/index.md index e9ad51961..82329e60e 100644 --- a/docs/source/getting_started/index.md +++ b/docs/source/getting_started/index.md @@ -9,10 +9,10 @@ In this guide, we'll walk through how to build a RAG agent locally using Llama S ### 1. Download a Llama model with Ollama ```bash -ollama pull llama3.2:3b-instruct-fp16 +ollama pull llama3.2:3b ``` -This will instruct the Ollama service to download the Llama 3.2 3B Instruct model, which we'll use in the rest of this guide. +This will instruct the Ollama service to download the Llama 3.2 3B model, which we'll use in the rest of this guide. ```{admonition} Note :class: tip @@ -176,7 +176,7 @@ python inference.py ``` Sample output: ``` -Model: llama3.2:3b-instruct-fp16 +Model: llama3.2:3b Here is a haiku about coding: Lines of code unfold From 09a83b1ec1767242b7949532b07f68ac5b1c97b5 Mon Sep 17 00:00:00 2001 From: Francisco Arceo Date: Thu, 10 Apr 2025 10:38:57 -0600 Subject: [PATCH 02/10] docs: Updating background color for code in darkmode (#1930) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # What does this PR do? A small quality of life adjustment to make the code background for darkmode black. Makes it much easier to differentiate between code and non-code text. From: Screenshot 2025-04-10 at 9 22 23 AM To: Screenshot 2025-04-10 at 9 22 43 AM The CSS was sourced from here: https://github.com/MrDogeBro/sphinx_rtd_dark_mode/blob/main/sphinx_rtd_dark_mode/static/dark_mode_css/dark.css Signed-off-by: Francisco Javier Arceo --- docs/_static/css/my_theme.css | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/_static/css/my_theme.css b/docs/_static/css/my_theme.css index ccd7d2060..470452661 100644 --- a/docs/_static/css/my_theme.css +++ b/docs/_static/css/my_theme.css @@ -16,3 +16,7 @@ .hide-title h1 { display: none; } + +html[data-theme="dark"] .rst-content div[class^="highlight"] { + background-color: #0b0b0b; +} From 14146e4b3f2757b03f449d74b3498d17353bdcb5 Mon Sep 17 00:00:00 2001 From: ehhuang Date: Thu, 10 Apr 2025 10:26:19 -0700 Subject: [PATCH 03/10] feat(verification): various improvements (#1921) # What does this PR do? - provider and their models now live in config.yaml - better distinguish different cases within a test - add model key to surface provider's model_id - include example command to rerun single test case ## Test Plan image --- tests/verifications/REPORT.md | 125 +- tests/verifications/conf/cerebras.yaml | 10 + tests/verifications/conf/fireworks.yaml | 14 + tests/verifications/conf/groq.yaml | 14 + tests/verifications/conf/openai.yaml | 9 + tests/verifications/conf/together.yaml | 14 + tests/verifications/conftest.py | 67 +- tests/verifications/generate_report.py | 415 +-- .../verifications/openai/fixtures/fixtures.py | 97 - .../openai/test_chat_completion.py | 202 -- .../{openai => openai_api}/__init__.py | 0 .../fixtures/__init__.py | 0 .../openai_api/fixtures/fixtures.py | 105 + .../{openai => openai_api}/fixtures/load.py | 0 .../fixtures/test_cases/chat_completion.yaml | 53 +- .../openai_api/test_chat_completion.py | 271 ++ .../test_results/fireworks_1744154308.json | 2744 ---------------- .../test_results/fireworks_1744264202.json | 1329 ++++++++ .../test_results/openai_1744154522.json | 2672 ---------------- .../test_results/openai_1744264304.json | 868 +++++ .../test_results/together_1744154399.json | 2830 ----------------- .../test_results/together_1744264258.json | 1420 +++++++++ 22 files changed, 4449 insertions(+), 8810 deletions(-) create mode 100644 tests/verifications/conf/cerebras.yaml create mode 100644 tests/verifications/conf/fireworks.yaml create mode 100644 tests/verifications/conf/groq.yaml create mode 100644 tests/verifications/conf/openai.yaml create mode 100644 tests/verifications/conf/together.yaml delete mode 100644 tests/verifications/openai/fixtures/fixtures.py delete mode 100644 tests/verifications/openai/test_chat_completion.py rename tests/verifications/{openai => openai_api}/__init__.py (100%) rename tests/verifications/{openai => openai_api}/fixtures/__init__.py (100%) create mode 100644 tests/verifications/openai_api/fixtures/fixtures.py rename tests/verifications/{openai => openai_api}/fixtures/load.py (100%) rename tests/verifications/{openai => openai_api}/fixtures/test_cases/chat_completion.yaml (78%) create mode 100644 tests/verifications/openai_api/test_chat_completion.py delete mode 100644 tests/verifications/test_results/fireworks_1744154308.json create mode 100644 tests/verifications/test_results/fireworks_1744264202.json delete mode 100644 tests/verifications/test_results/openai_1744154522.json create mode 100644 tests/verifications/test_results/openai_1744264304.json delete mode 100644 tests/verifications/test_results/together_1744154399.json create mode 100644 tests/verifications/test_results/together_1744264258.json diff --git a/tests/verifications/REPORT.md b/tests/verifications/REPORT.md index d5715ae21..449499382 100644 --- a/tests/verifications/REPORT.md +++ b/tests/verifications/REPORT.md @@ -1,6 +1,6 @@ # Test Results Report -*Generated on: 2025-04-08 21:14:02* +*Generated on: 2025-04-09 22:52:19* *This report was generated by running `python tests/verifications/generate_report.py`* @@ -23,66 +23,107 @@ ## Together -*Tests run on: 2025-04-08 16:19:59* +*Tests run on: 2025-04-09 22:50:58* ```bash -pytest tests/verifications/openai/test_chat_completion.py --provider=together -v +# Run all tests for this provider: +pytest tests/verifications/openai_api/test_chat_completion.py --provider=together -v + +# Example: Run only the 'earth' case of test_chat_non_streaming_basic: +pytest tests/verifications/openai_api/test_chat_completion.py --provider=together -k "test_chat_non_streaming_basic and earth" ``` -| Test | Llama-3.3-70B-Instruct | Llama-4-Maverick-17B-128E-Instruct | Llama-4-Scout-17B-16E-Instruct | + +**Model Key (Together)** + +| Display Name | Full Model ID | +| --- | --- | +| Llama-3.3-70B-Instruct | `meta-llama/Llama-3.3-70B-Instruct-Turbo` | +| Llama-4-Maverick-Instruct | `meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8` | +| Llama-4-Scout-Instruct | `meta-llama/Llama-4-Scout-17B-16E-Instruct` | + + +| Test | Llama-3.3-70B-Instruct | Llama-4-Maverick-Instruct | Llama-4-Scout-Instruct | | --- | --- | --- | --- | -| test_chat_non_streaming_basic (case 0) | ✅ | ✅ | ✅ | -| test_chat_non_streaming_basic (case 1) | ✅ | ✅ | ✅ | -| test_chat_non_streaming_image (case 0) | ⚪ | ✅ | ✅ | -| test_chat_non_streaming_structured_output (case 0) | ✅ | ✅ | ✅ | -| test_chat_non_streaming_structured_output (case 1) | ✅ | ✅ | ✅ | -| test_chat_non_streaming_tool_calling (case 0) | ✅ | ✅ | ✅ | -| test_chat_streaming_basic (case 0) | ✅ | ❌ | ❌ | -| test_chat_streaming_basic (case 1) | ✅ | ❌ | ❌ | -| test_chat_streaming_image (case 0) | ⚪ | ❌ | ❌ | -| test_chat_streaming_structured_output (case 0) | ✅ | ❌ | ❌ | -| test_chat_streaming_structured_output (case 1) | ✅ | ❌ | ❌ | +| test_chat_non_streaming_basic (earth) | ✅ | ✅ | ✅ | +| test_chat_non_streaming_basic (saturn) | ✅ | ✅ | ✅ | +| test_chat_non_streaming_image | ⚪ | ✅ | ✅ | +| test_chat_non_streaming_structured_output (calendar) | ✅ | ✅ | ✅ | +| test_chat_non_streaming_structured_output (math) | ✅ | ✅ | ✅ | +| test_chat_non_streaming_tool_calling | ✅ | ✅ | ✅ | +| test_chat_streaming_basic (earth) | ✅ | ❌ | ❌ | +| test_chat_streaming_basic (saturn) | ✅ | ❌ | ❌ | +| test_chat_streaming_image | ⚪ | ❌ | ❌ | +| test_chat_streaming_structured_output (calendar) | ✅ | ❌ | ❌ | +| test_chat_streaming_structured_output (math) | ✅ | ❌ | ❌ | ## Fireworks -*Tests run on: 2025-04-08 16:18:28* +*Tests run on: 2025-04-09 22:50:02* ```bash -pytest tests/verifications/openai/test_chat_completion.py --provider=fireworks -v +# Run all tests for this provider: +pytest tests/verifications/openai_api/test_chat_completion.py --provider=fireworks -v + +# Example: Run only the 'earth' case of test_chat_non_streaming_basic: +pytest tests/verifications/openai_api/test_chat_completion.py --provider=fireworks -k "test_chat_non_streaming_basic and earth" ``` -| Test | Llama-3.3-70B-Instruct | Llama-4-Maverick-17B-128E-Instruct | Llama-4-Scout-17B-16E-Instruct | + +**Model Key (Fireworks)** + +| Display Name | Full Model ID | +| --- | --- | +| Llama-3.3-70B-Instruct | `accounts/fireworks/models/llama-v3p3-70b-instruct` | +| Llama-4-Maverick-Instruct | `accounts/fireworks/models/llama4-maverick-instruct-basic` | +| Llama-4-Scout-Instruct | `accounts/fireworks/models/llama4-scout-instruct-basic` | + + +| Test | Llama-3.3-70B-Instruct | Llama-4-Maverick-Instruct | Llama-4-Scout-Instruct | | --- | --- | --- | --- | -| test_chat_non_streaming_basic (case 0) | ✅ | ✅ | ✅ | -| test_chat_non_streaming_basic (case 1) | ✅ | ✅ | ✅ | -| test_chat_non_streaming_image (case 0) | ⚪ | ✅ | ✅ | -| test_chat_non_streaming_structured_output (case 0) | ✅ | ✅ | ✅ | -| test_chat_non_streaming_structured_output (case 1) | ✅ | ✅ | ✅ | -| test_chat_non_streaming_tool_calling (case 0) | ✅ | ❌ | ❌ | -| test_chat_streaming_basic (case 0) | ✅ | ✅ | ✅ | -| test_chat_streaming_basic (case 1) | ✅ | ✅ | ✅ | -| test_chat_streaming_image (case 0) | ⚪ | ✅ | ✅ | -| test_chat_streaming_structured_output (case 0) | ✅ | ✅ | ✅ | -| test_chat_streaming_structured_output (case 1) | ❌ | ✅ | ✅ | +| test_chat_non_streaming_basic (earth) | ✅ | ✅ | ✅ | +| test_chat_non_streaming_basic (saturn) | ✅ | ✅ | ✅ | +| test_chat_non_streaming_image | ⚪ | ✅ | ✅ | +| test_chat_non_streaming_structured_output (calendar) | ✅ | ✅ | ✅ | +| test_chat_non_streaming_structured_output (math) | ✅ | ✅ | ✅ | +| test_chat_non_streaming_tool_calling | ❌ | ❌ | ❌ | +| test_chat_streaming_basic (earth) | ✅ | ✅ | ✅ | +| test_chat_streaming_basic (saturn) | ✅ | ✅ | ✅ | +| test_chat_streaming_image | ⚪ | ✅ | ✅ | +| test_chat_streaming_structured_output (calendar) | ✅ | ✅ | ✅ | +| test_chat_streaming_structured_output (math) | ✅ | ✅ | ✅ | ## Openai -*Tests run on: 2025-04-08 16:22:02* +*Tests run on: 2025-04-09 22:51:44* ```bash -pytest tests/verifications/openai/test_chat_completion.py --provider=openai -v +# Run all tests for this provider: +pytest tests/verifications/openai_api/test_chat_completion.py --provider=openai -v + +# Example: Run only the 'earth' case of test_chat_non_streaming_basic: +pytest tests/verifications/openai_api/test_chat_completion.py --provider=openai -k "test_chat_non_streaming_basic and earth" ``` + +**Model Key (Openai)** + +| Display Name | Full Model ID | +| --- | --- | +| gpt-4o | `gpt-4o` | +| gpt-4o-mini | `gpt-4o-mini` | + + | Test | gpt-4o | gpt-4o-mini | | --- | --- | --- | -| test_chat_non_streaming_basic (case 0) | ✅ | ✅ | -| test_chat_non_streaming_basic (case 1) | ✅ | ✅ | -| test_chat_non_streaming_image (case 0) | ✅ | ✅ | -| test_chat_non_streaming_structured_output (case 0) | ✅ | ✅ | -| test_chat_non_streaming_structured_output (case 1) | ✅ | ✅ | -| test_chat_non_streaming_tool_calling (case 0) | ✅ | ✅ | -| test_chat_streaming_basic (case 0) | ✅ | ✅ | -| test_chat_streaming_basic (case 1) | ✅ | ✅ | -| test_chat_streaming_image (case 0) | ✅ | ✅ | -| test_chat_streaming_structured_output (case 0) | ✅ | ✅ | -| test_chat_streaming_structured_output (case 1) | ✅ | ✅ | +| test_chat_non_streaming_basic (earth) | ✅ | ✅ | +| test_chat_non_streaming_basic (saturn) | ✅ | ✅ | +| test_chat_non_streaming_image | ✅ | ✅ | +| test_chat_non_streaming_structured_output (calendar) | ✅ | ✅ | +| test_chat_non_streaming_structured_output (math) | ✅ | ✅ | +| test_chat_non_streaming_tool_calling | ✅ | ✅ | +| test_chat_streaming_basic (earth) | ✅ | ✅ | +| test_chat_streaming_basic (saturn) | ✅ | ✅ | +| test_chat_streaming_image | ✅ | ✅ | +| test_chat_streaming_structured_output (calendar) | ✅ | ✅ | +| test_chat_streaming_structured_output (math) | ✅ | ✅ | diff --git a/tests/verifications/conf/cerebras.yaml b/tests/verifications/conf/cerebras.yaml new file mode 100644 index 000000000..32a60e766 --- /dev/null +++ b/tests/verifications/conf/cerebras.yaml @@ -0,0 +1,10 @@ +base_url: https://api.cerebras.ai/v1 +api_key_var: CEREBRAS_API_KEY +models: +- llama-3.3-70b +model_display_names: + llama-3.3-70b: Llama-3.3-70B-Instruct +test_exclusions: + llama-3.3-70b: + - test_chat_non_streaming_image + - test_chat_streaming_image \ No newline at end of file diff --git a/tests/verifications/conf/fireworks.yaml b/tests/verifications/conf/fireworks.yaml new file mode 100644 index 000000000..30d6e4d75 --- /dev/null +++ b/tests/verifications/conf/fireworks.yaml @@ -0,0 +1,14 @@ +base_url: https://api.fireworks.ai/inference/v1 +api_key_var: FIREWORKS_API_KEY +models: +- accounts/fireworks/models/llama-v3p3-70b-instruct +- accounts/fireworks/models/llama4-scout-instruct-basic +- accounts/fireworks/models/llama4-maverick-instruct-basic +model_display_names: + accounts/fireworks/models/llama-v3p3-70b-instruct: Llama-3.3-70B-Instruct + accounts/fireworks/models/llama4-scout-instruct-basic: Llama-4-Scout-Instruct + accounts/fireworks/models/llama4-maverick-instruct-basic: Llama-4-Maverick-Instruct +test_exclusions: + accounts/fireworks/models/llama-v3p3-70b-instruct: + - test_chat_non_streaming_image + - test_chat_streaming_image \ No newline at end of file diff --git a/tests/verifications/conf/groq.yaml b/tests/verifications/conf/groq.yaml new file mode 100644 index 000000000..ef31a66e5 --- /dev/null +++ b/tests/verifications/conf/groq.yaml @@ -0,0 +1,14 @@ +base_url: https://api.groq.com/openai/v1 +api_key_var: GROQ_API_KEY +models: +- llama-3.3-70b-versatile +- llama-4-scout-17b-16e-instruct +- llama-4-maverick-17b-128e-instruct +model_display_names: + llama-3.3-70b-versatile: Llama-3.3-70B-Instruct + llama-4-scout-17b-16e-instruct: Llama-4-Scout-Instruct + llama-4-maverick-17b-128e-instruct: Llama-4-Maverick-Instruct +test_exclusions: + llama-3.3-70b-versatile: + - test_chat_non_streaming_image + - test_chat_streaming_image \ No newline at end of file diff --git a/tests/verifications/conf/openai.yaml b/tests/verifications/conf/openai.yaml new file mode 100644 index 000000000..89ae698f3 --- /dev/null +++ b/tests/verifications/conf/openai.yaml @@ -0,0 +1,9 @@ +base_url: https://api.openai.com/v1 +api_key_var: OPENAI_API_KEY +models: +- gpt-4o +- gpt-4o-mini +model_display_names: + gpt-4o: gpt-4o + gpt-4o-mini: gpt-4o-mini +test_exclusions: {} \ No newline at end of file diff --git a/tests/verifications/conf/together.yaml b/tests/verifications/conf/together.yaml new file mode 100644 index 000000000..80e86fa77 --- /dev/null +++ b/tests/verifications/conf/together.yaml @@ -0,0 +1,14 @@ +base_url: https://api.together.xyz/v1 +api_key_var: TOGETHER_API_KEY +models: +- meta-llama/Llama-3.3-70B-Instruct-Turbo +- meta-llama/Llama-4-Scout-17B-16E-Instruct +- meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 +model_display_names: + meta-llama/Llama-3.3-70B-Instruct-Turbo: Llama-3.3-70B-Instruct + meta-llama/Llama-4-Scout-17B-16E-Instruct: Llama-4-Scout-Instruct + meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8: Llama-4-Maverick-Instruct +test_exclusions: + meta-llama/Llama-3.3-70B-Instruct-Turbo: + - test_chat_non_streaming_image + - test_chat_streaming_image \ No newline at end of file diff --git a/tests/verifications/conftest.py b/tests/verifications/conftest.py index 08967e834..0b4a6feb7 100644 --- a/tests/verifications/conftest.py +++ b/tests/verifications/conftest.py @@ -4,6 +4,10 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +import re + +import pytest + def pytest_addoption(parser): parser.addoption( @@ -14,7 +18,7 @@ def pytest_addoption(parser): parser.addoption( "--api-key", action="store", - help="API key", + help="API key to use for the provider", ) parser.addoption( "--provider", @@ -24,5 +28,64 @@ def pytest_addoption(parser): pytest_plugins = [ - "tests.verifications.openai.fixtures.fixtures", + "pytest_jsonreport", + "tests.verifications.openai_api.fixtures.fixtures", + "tests.verifications.openai_api.fixtures.load", ] + + +@pytest.hookimpl(optionalhook=True) +def pytest_json_runtest_metadata(item, call): + """Add model and case_id to pytest-json report metadata.""" + metadata = {} + nodeid = item.nodeid + + # 1. Extract model from callspec if available + model = item.callspec.params.get("model") if hasattr(item, "callspec") else None + if model: + metadata["model"] = model + else: + # Fallback: Try parsing from nodeid (less reliable) + match_model = re.search(r"\[(.*?)-", nodeid) + if match_model: + model = match_model.group(1) # Store model even if found via fallback + metadata["model"] = model + else: + print(f"Warning: Could not determine model for test {nodeid}") + model = None # Ensure model is None if not found + + # 2. Extract case_id using the known model string if possible + if model: + # Construct a regex pattern to find the case_id *after* the model name and a hyphen. + # Escape the model name in case it contains regex special characters. + pattern = re.escape(model) + r"-(.*?)\]$" + match_case = re.search(pattern, nodeid) + if match_case: + case_id = match_case.group(1) + metadata["case_id"] = case_id + else: + # Fallback if the pattern didn't match (e.g., nodeid format unexpected) + # Try the old less specific regex as a last resort. + match_case_fallback = re.search(r"-(.*?)\]$", nodeid) + if match_case_fallback: + case_id = match_case_fallback.group(1) + metadata["case_id"] = case_id + print(f"Warning: Used fallback regex to parse case_id from nodeid {nodeid}") + else: + print(f"Warning: Could not parse case_id from nodeid {nodeid} even with fallback.") + if "case" in (item.callspec.params if hasattr(item, "callspec") else {}): + metadata["case_id"] = "parsing_failed" + elif "case" in (item.callspec.params if hasattr(item, "callspec") else {}): + # Cannot reliably parse case_id without model, but we know it's a case test. + # Try the generic fallback regex. + match_case_fallback = re.search(r"-(.*?)\]$", nodeid) + if match_case_fallback: + case_id = match_case_fallback.group(1) + metadata["case_id"] = case_id + print(f"Warning: Used fallback regex to parse case_id from nodeid {nodeid} (model unknown)") + else: + print(f"Warning: Could not parse case_id from nodeid {nodeid} (model unknown)") + metadata["case_id"] = "parsing_failed_no_model" + # else: Not a test with a model or case param we need to handle. + + return metadata diff --git a/tests/verifications/generate_report.py b/tests/verifications/generate_report.py index 98a5930da..1c760ca19 100755 --- a/tests/verifications/generate_report.py +++ b/tests/verifications/generate_report.py @@ -4,27 +4,48 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "pytest-json-report", +# "pyyaml", +# ] +# /// """ Test Report Generator -Requirements: - pip install pytest-json-report +Description: + This script runs pytest tests (specifically designed for OpenAI API compatibility checks) + for different providers, aggregates the results from JSON reports, and generates + a markdown summary report (REPORT.md). + + It automatically cleans up old test result files, keeping only the latest + per provider. + + +Configuration: + - Provider details (models, display names) are loaded from `tests/verifications/config.yaml`. + - Test cases are defined in YAML files within `tests/verifications/openai_api/fixtures/test_cases/`. + - Test results are stored in `tests/verifications/test_results/`. Usage: - # Generate a report using existing test results + # Generate a report using the latest existing test results python tests/verifications/generate_report.py - # Run tests and generate a report + # Run tests for all configured providers and generate a report python tests/verifications/generate_report.py --run-tests - # Run tests for specific providers + # Run tests only for specific providers (space-separated) python tests/verifications/generate_report.py --run-tests --providers fireworks openai + # Run tests matching a keyword expression (uses pytest -k) + python tests/verifications/generate_report.py --run-tests --providers fireworks --k "streaming" + + # Run a specific test case for a provider + python tests/verifications/generate_report.py --run-tests --providers fireworks --k "test_chat_streaming_basic and basic_earth" + # Save the report to a custom location python tests/verifications/generate_report.py --output custom_report.md - - # Clean up old test result files - python tests/verifications/generate_report.py --cleanup """ import argparse @@ -35,6 +56,9 @@ import subprocess import time from collections import defaultdict from pathlib import Path +from typing import Any, DefaultDict, Dict, Set, Tuple + +from tests.verifications.openai_api.fixtures.fixtures import _load_all_verification_configs # Define the root directory for test results RESULTS_DIR = Path(__file__).parent / "test_results" @@ -43,17 +67,12 @@ RESULTS_DIR.mkdir(exist_ok=True) # Maximum number of test result files to keep per provider MAX_RESULTS_PER_PROVIDER = 1 -# Custom order of providers PROVIDER_ORDER = ["together", "fireworks", "groq", "cerebras", "openai"] -# Dictionary to store providers and their models (will be populated dynamically) -PROVIDERS = defaultdict(set) - -# Tests will be dynamically extracted from results -ALL_TESTS = set() +VERIFICATION_CONFIG = _load_all_verification_configs() -def run_tests(provider): +def run_tests(provider, keyword=None): """Run pytest for a specific provider and save results""" print(f"Running tests for provider: {provider}") @@ -61,20 +80,28 @@ def run_tests(provider): result_file = RESULTS_DIR / f"{provider}_{timestamp}.json" temp_json_file = RESULTS_DIR / f"temp_{provider}_{timestamp}.json" + # Determine project root directory relative to this script + project_root = Path(__file__).parent.parent.parent + # Run pytest with JSON output cmd = [ "python", "-m", "pytest", - "tests/verifications/openai/test_chat_completion.py", + "tests/verifications/openai_api/test_chat_completion.py", f"--provider={provider}", "-v", "--json-report", f"--json-report-file={temp_json_file}", ] + # Append -k argument if provided + if keyword: + cmd.extend(["-k", keyword]) + try: - result = subprocess.run(cmd, capture_output=True, text=True) + # Run subprocess with cwd set to project root + result = subprocess.run(cmd, capture_output=True, text=True, cwd=project_root) print(f"Pytest exit code: {result.returncode}") # Check if the JSON file was created @@ -103,18 +130,30 @@ def run_tests(provider): return None -def parse_results(result_file): - """Parse the test results file and extract pass/fail by model and test""" +def parse_results( + result_file, +) -> Tuple[DefaultDict[str, DefaultDict[str, Dict[str, bool]]], DefaultDict[str, Set[str]], Set[str]]: + """Parse a single test results file. + + Returns: + Tuple containing: + - parsed_results: DefaultDict[provider, DefaultDict[model, Dict[test_name, pass_status]]] + - providers_in_file: DefaultDict[provider, Set[model]] found in this file. + - tests_in_file: Set[test_name] found in this file. + """ if not os.path.exists(result_file): print(f"Results file does not exist: {result_file}") - return {} + # Return empty defaultdicts/set matching the type hint + return defaultdict(lambda: defaultdict(dict)), defaultdict(set), set() with open(result_file, "r") as f: results = json.load(f) - # Initialize results dictionary - parsed_results = defaultdict(lambda: defaultdict(dict)) - provider = os.path.basename(result_file).split("_")[0] + # Initialize results dictionary with specific types + parsed_results: DefaultDict[str, DefaultDict[str, Dict[str, bool]]] = defaultdict(lambda: defaultdict(dict)) + providers_in_file: DefaultDict[str, Set[str]] = defaultdict(set) + tests_in_file: Set[str] = set() + provider: str = os.path.basename(result_file).split("_")[0] # Debug: Print summary of test results print(f"Test results summary for {provider}:") @@ -127,124 +166,72 @@ def parse_results(result_file): # Extract test results if "tests" not in results or not results["tests"]: print(f"No test results found in {result_file}") - return parsed_results + # Return empty defaultdicts/set matching the type hint + return defaultdict(lambda: defaultdict(dict)), defaultdict(set), set() - # Map for normalizing model names - model_name_map = { - "Llama-3.3-8B-Instruct": "Llama-3.3-8B-Instruct", - "Llama-3.3-70B-Instruct": "Llama-3.3-70B-Instruct", - "Llama-3.2-11B-Vision-Instruct": "Llama-3.2-11B-Vision-Instruct", - "Llama-4-Scout-17B-16E": "Llama-4-Scout-17B-16E-Instruct", - "Llama-4-Scout-17B-16E-Instruct": "Llama-4-Scout-17B-16E-Instruct", - "Llama-4-Maverick-17B-128E": "Llama-4-Maverick-17B-128E-Instruct", - "Llama-4-Maverick-17B-128E-Instruct": "Llama-4-Maverick-17B-128E-Instruct", - "gpt-4o": "gpt-4o", - "gpt-4o-mini": "gpt-4o-mini", - } - - # Keep track of all models found for this provider - provider_models = set() - - # Track all unique test cases for each base test - test_case_counts = defaultdict(int) - - # First pass: count the number of cases for each test + # Process the tests for test in results["tests"]: test_id = test.get("nodeid", "") - if "call" in test: - test_name = test_id.split("::")[1].split("[")[0] - input_output_match = re.search(r"\[input_output(\d+)-", test_id) - if input_output_match: - test_case_counts[test_name] += 1 + if not (call_phase := test.get("call")): + continue + call_outcome = call_phase.get("outcome") + if call_outcome not in ("passed", "failed"): + continue - # Second pass: process the tests with case numbers only for tests with multiple cases - for test in results["tests"]: - test_id = test.get("nodeid", "") - outcome = test.get("outcome", "") + # --- Extract data from metadata --- + metadata = test.get("metadata", {}) + model = metadata.get("model") + case_id = metadata.get("case_id") # String ID (if provided) + case_index = metadata.get("case_index") # Integer index (if no ID provided) - # Only process tests that have been executed (not setup errors) - if "call" in test: - # Regular test that actually ran - test_name = test_id.split("::")[1].split("[")[0] + # Check if we have a model and at least one case identifier + if not model or (case_id is None and case_index is None): + print( + f"Warning: Missing 'model' or case identifier ('case_id'/'case_index') metadata for test: {test_id}. Skipping." + ) + continue - # Extract input_output parameter to differentiate between test cases - input_output_match = re.search(r"\[input_output(\d+)-", test_id) - input_output_index = input_output_match.group(1) if input_output_match else "" + try: + test_name_base = test_id.split("::")[1].split("[")[0] + except (IndexError, ValueError) as e: + print(f"Warning: Could not parse base test name for {test_id}. Error: {e}. Skipping.") + continue - # Create a more detailed test name with case number only if there are multiple cases - detailed_test_name = test_name - if input_output_index and test_case_counts[test_name] > 1: - detailed_test_name = f"{test_name} (case {input_output_index})" + # Construct detailed test name using ID or index + if case_id is not None: + detailed_test_name = f"{test_name_base} ({case_id})" + elif case_index == 0: + # If case_id is missing and index is 0, assume single case, use base name only + detailed_test_name = test_name_base + elif case_index is not None: # case_index > 0 + # Use case_index for naming if case_id wasn't provided and index > 0 + detailed_test_name = f"{test_name_base} (case{case_index})" + else: + # This case should be prevented by the earlier check, but handle defensively + print(f"Error: No case identifier found for test {test_id} after initial check. Skipping.") + continue - # Track all unique test names - ALL_TESTS.add(detailed_test_name) + # Populate collections for this file + tests_in_file.add(detailed_test_name) + providers_in_file[provider].add(model) - # Extract model name from test_id using a more robust pattern - model_match = re.search(r"\[input_output\d+-([^\]]+)\]", test_id) - if model_match: - raw_model = model_match.group(1) - model = model_name_map.get(raw_model, raw_model) + if call_outcome == "passed": + parsed_results[provider][model][detailed_test_name] = True + elif call_outcome == "failed": + parsed_results[provider][model][detailed_test_name] = False - # Add to set of known models for this provider - provider_models.add(model) + # Final Summary Warning (Optional) + if not parsed_results.get(provider): + print(f"Warning: No valid test results parsed for provider {provider} from file {result_file}") - # Also update the global PROVIDERS dictionary - PROVIDERS[provider].add(model) - - # Store the result - if outcome == "passed": - parsed_results[provider][model][detailed_test_name] = True - else: - parsed_results[provider][model][detailed_test_name] = False - - print(f"Parsed test result: {detailed_test_name} for model {model}: {outcome}") - elif outcome == "error" and "setup" in test and test.get("setup", {}).get("outcome") == "failed": - # This is a setup failure, which likely means a configuration issue - # Extract the base test name and model name - parts = test_id.split("::") - if len(parts) > 1: - test_name = parts[1].split("[")[0] - - # Extract input_output parameter to differentiate between test cases - input_output_match = re.search(r"\[input_output(\d+)-", test_id) - input_output_index = input_output_match.group(1) if input_output_match else "" - - # Create a more detailed test name with case number only if there are multiple cases - detailed_test_name = test_name - if input_output_index and test_case_counts[test_name] > 1: - detailed_test_name = f"{test_name} (case {input_output_index})" - - if detailed_test_name in ALL_TESTS: - # Use a more robust pattern for model extraction - model_match = re.search(r"\[input_output\d+-([^\]]+)\]", test_id) - if model_match: - raw_model = model_match.group(1) - model = model_name_map.get(raw_model, raw_model) - - # Add to set of known models for this provider - provider_models.add(model) - - # Also update the global PROVIDERS dictionary - PROVIDERS[provider].add(model) - - # Mark setup failures as false (failed) - parsed_results[provider][model][detailed_test_name] = False - print(f"Parsed setup failure: {detailed_test_name} for model {model}") - - # Debug: Print parsed results - if not parsed_results[provider]: - print(f"Warning: No test results parsed for provider {provider}") - else: - for model, tests in parsed_results[provider].items(): - print(f"Model {model}: {len(tests)} test results") - - return parsed_results + return parsed_results, providers_in_file, tests_in_file -def cleanup_old_results(): - """Clean up old test result files, keeping only the newest N per provider""" - for provider in PROVIDERS.keys(): +def cleanup_old_results(providers_to_clean: Dict[str, Set[str]]): + """Clean up old test result files, keeping only the newest N per provider.""" + # Use the passed-in providers dictionary + for provider in providers_to_clean.keys(): # Get all result files for this provider provider_files = list(RESULTS_DIR.glob(f"{provider}_*.json")) @@ -289,8 +276,17 @@ def get_latest_results_by_provider(): return provider_results -def generate_report(results_dict, output_file=None): - """Generate the markdown report""" +def generate_report( + results_dict: Dict[str, Any], providers: Dict[str, Set[str]], all_tests: Set[str], output_file=None +): + """Generate the markdown report. + + Args: + results_dict: Aggregated results [provider][model][test_name] -> status. + providers: Dict of all providers and their models {provider: {models}}. + all_tests: Set of all test names found. + output_file: Optional path to save the report. + """ if output_file is None: # Default to creating the report in the same directory as this script output_file = Path(__file__).parent / "REPORT.md" @@ -299,8 +295,8 @@ def generate_report(results_dict, output_file=None): # Get the timestamp from result files provider_timestamps = {} - provider_results = get_latest_results_by_provider() - for provider, result_file in provider_results.items(): + provider_results_files = get_latest_results_by_provider() + for provider, result_file in provider_results_files.items(): # Extract timestamp from filename (format: provider_timestamp.json) try: timestamp_str = result_file.stem.split("_")[1] @@ -310,12 +306,33 @@ def generate_report(results_dict, output_file=None): except (IndexError, ValueError): provider_timestamps[provider] = "Unknown" - # Convert provider model sets to sorted lists - for provider in PROVIDERS: - PROVIDERS[provider] = sorted(PROVIDERS[provider]) + # Convert provider model sets to sorted lists (use passed-in providers dict) + providers_sorted = {prov: sorted(models) for prov, models in providers.items()} - # Sort tests alphabetically - sorted_tests = sorted(ALL_TESTS) + # Sort tests alphabetically (use passed-in all_tests set) + sorted_tests = sorted(all_tests) + + # Calculate counts for each base test name + base_test_case_counts: DefaultDict[str, int] = defaultdict(int) + base_test_name_map: Dict[str, str] = {} + for test_name in sorted_tests: + match = re.match(r"^(.*?)( \([^)]+\))?$", test_name) + if match: + base_name = match.group(1).strip() + base_test_case_counts[base_name] += 1 + base_test_name_map[test_name] = base_name + else: + # Should not happen with current naming, but handle defensively + base_test_case_counts[test_name] += 1 + base_test_name_map[test_name] = test_name + + if not sorted_tests: + print("Warning: No test results found to generate a report.") + # Optionally create an empty report or return early + with open(output_file, "w") as f: + f.write("# Test Results Report\n\nNo test results found.\n") + print(f"Generated empty report: {output_file}") + return report = ["# Test Results Report\n"] report.append(f"*Generated on: {time.strftime('%Y-%m-%d %H:%M:%S')}*\n") @@ -336,19 +353,15 @@ def generate_report(results_dict, output_file=None): # Add a summary section report.append("## Summary\n") - # Count total tests and passes + # Count total tests and passes (use passed-in providers and all_tests) total_tests = 0 passed_tests = 0 provider_totals = {} - - # Prepare summary data - for provider in PROVIDERS.keys(): + for provider, models in providers_sorted.items(): provider_passed = 0 provider_total = 0 - if provider in results_dict: - provider_models = PROVIDERS[provider] - for model in provider_models: + for model in models: if model in results_dict[provider]: model_results = results_dict[provider][model] for test in sorted_tests: @@ -358,33 +371,26 @@ def generate_report(results_dict, output_file=None): if model_results[test]: provider_passed += 1 passed_tests += 1 - provider_totals[provider] = (provider_passed, provider_total) - # Add summary table + # Add summary table (use passed-in providers dict) report.append("| Provider | Pass Rate | Tests Passed | Total Tests |") report.append("| --- | --- | --- | --- |") - - # Use the custom order for summary table - for provider in [p for p in PROVIDER_ORDER if p in PROVIDERS]: + for provider in [p for p in PROVIDER_ORDER if p in providers]: # Check against keys of passed-in dict passed, total = provider_totals.get(provider, (0, 0)) pass_rate = f"{(passed / total * 100):.1f}%" if total > 0 else "N/A" report.append(f"| {provider.capitalize()} | {pass_rate} | {passed} | {total} |") - - # Add providers not in the custom order - for provider in [p for p in PROVIDERS if p not in PROVIDER_ORDER]: + for provider in [p for p in providers if p not in PROVIDER_ORDER]: # Check against keys of passed-in dict passed, total = provider_totals.get(provider, (0, 0)) pass_rate = f"{(passed / total * 100):.1f}%" if total > 0 else "N/A" report.append(f"| {provider.capitalize()} | {pass_rate} | {passed} | {total} |") - report.append("\n") - # Process each provider in the custom order, then any additional providers for provider in sorted( - PROVIDERS.keys(), key=lambda p: (PROVIDER_ORDER.index(p) if p in PROVIDER_ORDER else float("inf"), p) + providers_sorted.keys(), key=lambda p: (PROVIDER_ORDER.index(p) if p in PROVIDER_ORDER else float("inf"), p) ): - if not PROVIDERS[provider]: - # Skip providers with no models + provider_models = providers_sorted[provider] # Use sorted models + if not provider_models: continue report.append(f"\n## {provider.capitalize()}\n") @@ -394,34 +400,70 @@ def generate_report(results_dict, output_file=None): report.append(f"*Tests run on: {provider_timestamps[provider]}*\n") # Add test command for reproducing results - test_cmd = f"pytest tests/verifications/openai/test_chat_completion.py --provider={provider} -v" - report.append(f"```bash\n{test_cmd}\n```\n") + test_cmd_all = f"pytest tests/verifications/openai_api/test_chat_completion.py --provider={provider} -v" + report.append(f"```bash\n# Run all tests for this provider:\n{test_cmd_all}\n") - # Get the relevant models for this provider - provider_models = PROVIDERS[provider] + # Find an example test with a case ID + example_base_test_name = None + example_case_id = None + # Get first test as fallback base, handle empty list + first_test_name = sorted_tests[0] if sorted_tests else "unknown_test" - # Create table header with models as columns - header = "| Test | " + " | ".join(provider_models) + " |" + match = re.match(r"^(.*?) \((.*?)\)$", first_test_name) + if match: + example_base_test_name = match.group(1).strip() + example_case_id = match.group(2).strip() + else: + example_base_test_name = first_test_name + + base_name = base_test_name_map.get(test, test) # Get base name + case_count = base_test_case_counts.get(base_name, 1) # Get count + filter_str = f"{example_base_test_name} and {example_case_id}" if case_count > 1 else example_base_test_name + + test_cmd_specific_case = ( + f'pytest tests/verifications/openai_api/test_chat_completion.py --provider={provider} -k "{filter_str}"' + ) + report.append( + f"# Example: Run only the '{example_case_id}' case of {example_base_test_name}:\n{test_cmd_specific_case}\n```\n" + ) + + # Get display names (use passed-in providers dict) + provider_config = VERIFICATION_CONFIG.get("providers", {}).get(provider, {}) + display_name_map = provider_config.get("model_display_names", {}) + + # Add Model Key Table (use provider_models) + report.append(f"\n**Model Key ({provider.capitalize()})**\n") + provider_key_lines = ["| Display Name | Full Model ID |", "| --- | --- |"] + for model_id in provider_models: + display_name = display_name_map.get(model_id, model_id) + provider_key_lines.append(f"| {display_name} | `{model_id}` |") + report.extend(provider_key_lines) + report.append("\n") + + # Create results table header (use provider_models) + display_names = [display_name_map.get(m, m) for m in provider_models] + header = "| Test | " + " | ".join(display_names) + " |" separator = "| --- | " + " | ".join(["---"] * len(provider_models)) + " |" - report.append(header) report.append(separator) - # Get results for this provider - provider_results = results_dict.get(provider, {}) + # Get results for this provider from results_dict + provider_results_data = results_dict.get(provider, {}) - # Add rows for each test + # Add rows for each test (use sorted_tests) for test in sorted_tests: - row = f"| {test} |" + # Determine display name based on case count + base_name = base_test_name_map.get(test, test) # Get base name + case_count = base_test_case_counts.get(base_name, 1) # Get count + display_test_name = base_name if case_count == 1 else test # Choose display name + row = f"| {display_test_name} |" # Use display name - # Add results for each model in this test - for model in provider_models: - if model in provider_results and test in provider_results[model]: - result = pass_icon if provider_results[model][test] else fail_icon + for model_id in provider_models: + if model_id in provider_results_data and test in provider_results_data[model_id]: + result = pass_icon if provider_results_data[model_id][test] else fail_icon else: result = na_icon row += f" {result} |" - report.append(row) # Write to file @@ -442,9 +484,13 @@ def main(): help="Specify providers to test (comma-separated or space-separated, default: all)", ) parser.add_argument("--output", type=str, help="Output file location (default: tests/verifications/REPORT.md)") + parser.add_argument("--k", type=str, help="Keyword expression to filter tests (passed to pytest -k)") args = parser.parse_args() all_results = {} + # Initialize collections to aggregate results in main + aggregated_providers = defaultdict(set) + aggregated_tests = set() if args.run_tests: # Get list of available providers from command line or use detected providers @@ -463,22 +509,31 @@ def main(): for provider in test_providers: provider = provider.strip() # Remove any whitespace - result_file = run_tests(provider) + result_file = run_tests(provider, keyword=args.k) if result_file: - provider_results = parse_results(result_file) - all_results.update(provider_results) + # Parse and aggregate results + parsed_results, providers_in_file, tests_in_file = parse_results(result_file) + all_results.update(parsed_results) + for prov, models in providers_in_file.items(): + aggregated_providers[prov].update(models) + aggregated_tests.update(tests_in_file) else: # Use existing results provider_result_files = get_latest_results_by_provider() for result_file in provider_result_files.values(): - provider_results = parse_results(result_file) - all_results.update(provider_results) + # Parse and aggregate results + parsed_results, providers_in_file, tests_in_file = parse_results(result_file) + all_results.update(parsed_results) + for prov, models in providers_in_file.items(): + aggregated_providers[prov].update(models) + aggregated_tests.update(tests_in_file) - # Generate the report - generate_report(all_results, args.output) + # Generate the report, passing aggregated data + generate_report(all_results, aggregated_providers, aggregated_tests, args.output) - cleanup_old_results() + # Cleanup, passing aggregated providers + cleanup_old_results(aggregated_providers) if __name__ == "__main__": diff --git a/tests/verifications/openai/fixtures/fixtures.py b/tests/verifications/openai/fixtures/fixtures.py deleted file mode 100644 index b86de3662..000000000 --- a/tests/verifications/openai/fixtures/fixtures.py +++ /dev/null @@ -1,97 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import os - -import pytest -from openai import OpenAI - - -@pytest.fixture -def providers_model_mapping(): - """ - Mapping from model names used in test cases to provider's model names. - """ - return { - "fireworks": { - "Llama-3.3-70B-Instruct": "accounts/fireworks/models/llama-v3p1-70b-instruct", - "Llama-3.2-11B-Vision-Instruct": "accounts/fireworks/models/llama-v3p2-11b-vision-instruct", - "Llama-4-Scout-17B-16E-Instruct": "accounts/fireworks/models/llama4-scout-instruct-basic", - "Llama-4-Maverick-17B-128E-Instruct": "accounts/fireworks/models/llama4-maverick-instruct-basic", - }, - "together": { - "Llama-3.3-70B-Instruct": "meta-llama/Llama-3.3-70B-Instruct-Turbo", - "Llama-3.2-11B-Vision-Instruct": "meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo", - "Llama-4-Scout-17B-16E-Instruct": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "Llama-4-Maverick-17B-128E-Instruct": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", - }, - "groq": { - "Llama-3.3-70B-Instruct": "llama-3.3-70b-versatile", - "Llama-3.2-11B-Vision-Instruct": "llama-3.2-11b-vision-preview", - "Llama-4-Scout-17B-16E-Instruct": "llama-4-scout-17b-16e-instruct", - "Llama-4-Maverick-17B-128E-Instruct": "llama-4-maverick-17b-128e-instruct", - }, - "cerebras": { - "Llama-3.3-70B-Instruct": "llama-3.3-70b", - }, - "openai": { - "gpt-4o": "gpt-4o", - "gpt-4o-mini": "gpt-4o-mini", - }, - } - - -@pytest.fixture -def provider_metadata(): - return { - "fireworks": ("https://api.fireworks.ai/inference/v1", "FIREWORKS_API_KEY"), - "together": ("https://api.together.xyz/v1", "TOGETHER_API_KEY"), - "groq": ("https://api.groq.com/openai/v1", "GROQ_API_KEY"), - "cerebras": ("https://api.cerebras.ai/v1", "CEREBRAS_API_KEY"), - "openai": ("https://api.openai.com/v1", "OPENAI_API_KEY"), - } - - -@pytest.fixture -def provider(request, provider_metadata): - provider = request.config.getoption("--provider") - base_url = request.config.getoption("--base-url") - - if provider and base_url and provider_metadata[provider][0] != base_url: - raise ValueError(f"Provider {provider} is not supported for base URL {base_url}") - - if not provider: - if not base_url: - raise ValueError("Provider and base URL are not provided") - for provider, metadata in provider_metadata.items(): - if metadata[0] == base_url: - provider = provider - break - - return provider - - -@pytest.fixture -def base_url(request, provider, provider_metadata): - return request.config.getoption("--base-url") or provider_metadata[provider][0] - - -@pytest.fixture -def api_key(request, provider, provider_metadata): - return request.config.getoption("--api-key") or os.getenv(provider_metadata[provider][1]) - - -@pytest.fixture -def model_mapping(provider, providers_model_mapping): - return providers_model_mapping[provider] - - -@pytest.fixture -def openai_client(base_url, api_key): - return OpenAI( - base_url=base_url, - api_key=api_key, - ) diff --git a/tests/verifications/openai/test_chat_completion.py b/tests/verifications/openai/test_chat_completion.py deleted file mode 100644 index c6a10de7b..000000000 --- a/tests/verifications/openai/test_chat_completion.py +++ /dev/null @@ -1,202 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -import pytest -from pydantic import BaseModel - -from tests.verifications.openai.fixtures.load import load_test_cases - -chat_completion_test_cases = load_test_cases("chat_completion") - - -@pytest.fixture -def correct_model_name(model, provider, providers_model_mapping): - """Return the provider-specific model name based on the generic model name.""" - mapping = providers_model_mapping[provider] - if model not in mapping: - pytest.skip(f"Provider {provider} does not support model {model}") - return mapping[model] - - -@pytest.mark.parametrize("model", chat_completion_test_cases["test_chat_basic"]["test_params"]["model"]) -@pytest.mark.parametrize( - "input_output", - chat_completion_test_cases["test_chat_basic"]["test_params"]["input_output"], -) -def test_chat_non_streaming_basic(openai_client, input_output, correct_model_name): - response = openai_client.chat.completions.create( - model=correct_model_name, - messages=input_output["input"]["messages"], - stream=False, - ) - assert response.choices[0].message.role == "assistant" - assert input_output["output"].lower() in response.choices[0].message.content.lower() - - -@pytest.mark.parametrize("model", chat_completion_test_cases["test_chat_basic"]["test_params"]["model"]) -@pytest.mark.parametrize( - "input_output", - chat_completion_test_cases["test_chat_basic"]["test_params"]["input_output"], -) -def test_chat_streaming_basic(openai_client, input_output, correct_model_name): - response = openai_client.chat.completions.create( - model=correct_model_name, - messages=input_output["input"]["messages"], - stream=True, - ) - content = "" - for chunk in response: - content += chunk.choices[0].delta.content or "" - - # TODO: add detailed type validation - - assert input_output["output"].lower() in content.lower() - - -@pytest.mark.parametrize("model", chat_completion_test_cases["test_chat_image"]["test_params"]["model"]) -@pytest.mark.parametrize( - "input_output", - chat_completion_test_cases["test_chat_image"]["test_params"]["input_output"], -) -def test_chat_non_streaming_image(openai_client, input_output, correct_model_name): - response = openai_client.chat.completions.create( - model=correct_model_name, - messages=input_output["input"]["messages"], - stream=False, - ) - assert response.choices[0].message.role == "assistant" - assert input_output["output"].lower() in response.choices[0].message.content.lower() - - -@pytest.mark.parametrize("model", chat_completion_test_cases["test_chat_image"]["test_params"]["model"]) -@pytest.mark.parametrize( - "input_output", - chat_completion_test_cases["test_chat_image"]["test_params"]["input_output"], -) -def test_chat_streaming_image(openai_client, input_output, correct_model_name): - response = openai_client.chat.completions.create( - model=correct_model_name, - messages=input_output["input"]["messages"], - stream=True, - ) - content = "" - for chunk in response: - content += chunk.choices[0].delta.content or "" - - # TODO: add detailed type validation - - assert input_output["output"].lower() in content.lower() - - -@pytest.mark.parametrize( - "model", - chat_completion_test_cases["test_chat_structured_output"]["test_params"]["model"], -) -@pytest.mark.parametrize( - "input_output", - chat_completion_test_cases["test_chat_structured_output"]["test_params"]["input_output"], -) -def test_chat_non_streaming_structured_output(openai_client, input_output, correct_model_name): - response = openai_client.chat.completions.create( - model=correct_model_name, - messages=input_output["input"]["messages"], - response_format=input_output["input"]["response_format"], - stream=False, - ) - - assert response.choices[0].message.role == "assistant" - maybe_json_content = response.choices[0].message.content - - validate_structured_output(maybe_json_content, input_output["output"]) - - -@pytest.mark.parametrize( - "model", - chat_completion_test_cases["test_chat_structured_output"]["test_params"]["model"], -) -@pytest.mark.parametrize( - "input_output", - chat_completion_test_cases["test_chat_structured_output"]["test_params"]["input_output"], -) -def test_chat_streaming_structured_output(openai_client, input_output, correct_model_name): - response = openai_client.chat.completions.create( - model=correct_model_name, - messages=input_output["input"]["messages"], - response_format=input_output["input"]["response_format"], - stream=True, - ) - maybe_json_content = "" - for chunk in response: - maybe_json_content += chunk.choices[0].delta.content or "" - validate_structured_output(maybe_json_content, input_output["output"]) - - -@pytest.mark.parametrize( - "model", - chat_completion_test_cases["test_tool_calling"]["test_params"]["model"], -) -@pytest.mark.parametrize( - "input_output", - chat_completion_test_cases["test_tool_calling"]["test_params"]["input_output"], -) -def test_chat_non_streaming_tool_calling(openai_client, input_output, correct_model_name): - response = openai_client.chat.completions.create( - model=correct_model_name, - messages=input_output["input"]["messages"], - tools=input_output["input"]["tools"], - stream=False, - ) - - assert response.choices[0].message.role == "assistant" - assert len(response.choices[0].message.tool_calls) > 0 - assert input_output["output"] == "get_weather_tool_call" - assert response.choices[0].message.tool_calls[0].function.name == "get_weather" - # TODO: add detailed type validation - - -def get_structured_output(maybe_json_content: str, schema_name: str) -> Any | None: - if schema_name == "valid_calendar_event": - - class CalendarEvent(BaseModel): - name: str - date: str - participants: list[str] - - try: - calendar_event = CalendarEvent.model_validate_json(maybe_json_content) - return calendar_event - except Exception: - return None - elif schema_name == "valid_math_reasoning": - - class Step(BaseModel): - explanation: str - output: str - - class MathReasoning(BaseModel): - steps: list[Step] - final_answer: str - - try: - math_reasoning = MathReasoning.model_validate_json(maybe_json_content) - return math_reasoning - except Exception: - return None - - return None - - -def validate_structured_output(maybe_json_content: str, schema_name: str) -> None: - structured_output = get_structured_output(maybe_json_content, schema_name) - assert structured_output is not None - if schema_name == "valid_calendar_event": - assert structured_output.name is not None - assert structured_output.date is not None - assert len(structured_output.participants) == 2 - elif schema_name == "valid_math_reasoning": - assert len(structured_output.final_answer) > 0 diff --git a/tests/verifications/openai/__init__.py b/tests/verifications/openai_api/__init__.py similarity index 100% rename from tests/verifications/openai/__init__.py rename to tests/verifications/openai_api/__init__.py diff --git a/tests/verifications/openai/fixtures/__init__.py b/tests/verifications/openai_api/fixtures/__init__.py similarity index 100% rename from tests/verifications/openai/fixtures/__init__.py rename to tests/verifications/openai_api/fixtures/__init__.py diff --git a/tests/verifications/openai_api/fixtures/fixtures.py b/tests/verifications/openai_api/fixtures/fixtures.py new file mode 100644 index 000000000..4f8c2e017 --- /dev/null +++ b/tests/verifications/openai_api/fixtures/fixtures.py @@ -0,0 +1,105 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import os +from pathlib import Path + +import pytest +import yaml +from openai import OpenAI + + +# --- Helper Function to Load Config --- +def _load_all_verification_configs(): + """Load and aggregate verification configs from the conf/ directory.""" + # Note: Path is relative to *this* file (fixtures.py) + conf_dir = Path(__file__).parent.parent.parent / "conf" + if not conf_dir.is_dir(): + # Use pytest.fail if called during test collection, otherwise raise error + # For simplicity here, we'll raise an error, assuming direct calls + # are less likely or can handle it. + raise FileNotFoundError(f"Verification config directory not found at {conf_dir}") + + all_provider_configs = {} + yaml_files = list(conf_dir.glob("*.yaml")) + if not yaml_files: + raise FileNotFoundError(f"No YAML configuration files found in {conf_dir}") + + for config_path in yaml_files: + provider_name = config_path.stem + try: + with open(config_path, "r") as f: + provider_config = yaml.safe_load(f) + if provider_config: + all_provider_configs[provider_name] = provider_config + else: + # Log warning if possible, or just skip empty files silently + print(f"Warning: Config file {config_path} is empty or invalid.") + except Exception as e: + raise IOError(f"Error loading config file {config_path}: {e}") from e + + return {"providers": all_provider_configs} + + +# --- End Helper Function --- + + +@pytest.fixture(scope="session") +def verification_config(): + """Pytest fixture to provide the loaded verification config.""" + try: + return _load_all_verification_configs() + except (FileNotFoundError, IOError) as e: + pytest.fail(str(e)) # Fail test collection if config loading fails + + +@pytest.fixture +def provider(request, verification_config): + provider = request.config.getoption("--provider") + base_url = request.config.getoption("--base-url") + + if provider and base_url and verification_config["providers"][provider]["base_url"] != base_url: + raise ValueError(f"Provider {provider} is not supported for base URL {base_url}") + + if not provider: + if not base_url: + raise ValueError("Provider and base URL are not provided") + for provider, metadata in verification_config["providers"].items(): + if metadata["base_url"] == base_url: + provider = provider + break + + return provider + + +@pytest.fixture +def base_url(request, provider, verification_config): + return request.config.getoption("--base-url") or verification_config["providers"][provider]["base_url"] + + +@pytest.fixture +def api_key(request, provider, verification_config): + provider_conf = verification_config.get("providers", {}).get(provider, {}) + api_key_env_var = provider_conf.get("api_key_var") + + key_from_option = request.config.getoption("--api-key") + key_from_env = os.getenv(api_key_env_var) if api_key_env_var else None + + final_key = key_from_option or key_from_env + return final_key + + +@pytest.fixture +def model_mapping(provider, providers_model_mapping): + return providers_model_mapping[provider] + + +@pytest.fixture +def openai_client(base_url, api_key): + return OpenAI( + base_url=base_url, + api_key=api_key, + ) diff --git a/tests/verifications/openai/fixtures/load.py b/tests/verifications/openai_api/fixtures/load.py similarity index 100% rename from tests/verifications/openai/fixtures/load.py rename to tests/verifications/openai_api/fixtures/load.py diff --git a/tests/verifications/openai/fixtures/test_cases/chat_completion.yaml b/tests/verifications/openai_api/fixtures/test_cases/chat_completion.yaml similarity index 78% rename from tests/verifications/openai/fixtures/test_cases/chat_completion.yaml rename to tests/verifications/openai_api/fixtures/test_cases/chat_completion.yaml index 2c302a704..78ea8245d 100644 --- a/tests/verifications/openai/fixtures/test_cases/chat_completion.yaml +++ b/tests/verifications/openai_api/fixtures/test_cases/chat_completion.yaml @@ -1,31 +1,24 @@ test_chat_basic: test_name: test_chat_basic test_params: - input_output: - - input: + case: + - case_id: "earth" + input: messages: - content: Which planet do humans live on? role: user output: Earth - - input: + - case_id: "saturn" + input: messages: - content: Which planet has rings around it with a name starting with letter S? role: user output: Saturn - model: - - Llama-3.3-8B-Instruct - - Llama-3.3-70B-Instruct - - Llama-4-Scout-17B-16E - - Llama-4-Scout-17B-16E-Instruct - - Llama-4-Maverick-17B-128E - - Llama-4-Maverick-17B-128E-Instruct - - gpt-4o - - gpt-4o-mini test_chat_image: test_name: test_chat_image test_params: - input_output: + case: - input: messages: - content: @@ -36,18 +29,12 @@ test_chat_image: type: image_url role: user output: llama - model: - - Llama-4-Scout-17B-16E - - Llama-4-Scout-17B-16E-Instruct - - Llama-4-Maverick-17B-128E - - Llama-4-Maverick-17B-128E-Instruct - - gpt-4o - - gpt-4o-mini test_chat_structured_output: test_name: test_chat_structured_output test_params: - input_output: - - input: + case: + - case_id: "calendar" + input: messages: - content: Extract the event information. role: system @@ -77,7 +64,8 @@ test_chat_structured_output: type: object type: json_schema output: valid_calendar_event - - input: + - case_id: "math" + input: messages: - content: You are a helpful math tutor. Guide the user through the solution step by step. @@ -118,19 +106,10 @@ test_chat_structured_output: type: object type: json_schema output: valid_math_reasoning - model: - - Llama-3.3-8B-Instruct - - Llama-3.3-70B-Instruct - - Llama-4-Scout-17B-16E - - Llama-4-Scout-17B-16E-Instruct - - Llama-4-Maverick-17B-128E - - Llama-4-Maverick-17B-128E-Instruct - - gpt-4o - - gpt-4o-mini test_tool_calling: test_name: test_tool_calling test_params: - input_output: + case: - input: messages: - content: You are a helpful assistant that can use tools to get information. @@ -152,11 +131,3 @@ test_tool_calling: type: object type: function output: get_weather_tool_call - model: - - Llama-3.3-70B-Instruct - - Llama-4-Scout-17B-16E - - Llama-4-Scout-17B-16E-Instruct - - Llama-4-Maverick-17B-128E - - Llama-4-Maverick-17B-128E-Instruct - - gpt-4o - - gpt-4o-mini diff --git a/tests/verifications/openai_api/test_chat_completion.py b/tests/verifications/openai_api/test_chat_completion.py new file mode 100644 index 000000000..dc08ec944 --- /dev/null +++ b/tests/verifications/openai_api/test_chat_completion.py @@ -0,0 +1,271 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import re +from typing import Any + +import pytest +from pydantic import BaseModel + +from tests.verifications.openai_api.fixtures.fixtures import _load_all_verification_configs +from tests.verifications.openai_api.fixtures.load import load_test_cases + +chat_completion_test_cases = load_test_cases("chat_completion") + + +def case_id_generator(case): + """Generate a test ID from the case's 'case_id' field, or use a default.""" + case_id = case.get("case_id") + if isinstance(case_id, (str, int)): + return re.sub(r"\\W|^(?=\\d)", "_", str(case_id)) + return None + + +def pytest_generate_tests(metafunc): + """Dynamically parametrize tests based on the selected provider and config.""" + if "model" in metafunc.fixturenames: + provider = metafunc.config.getoption("provider") + if not provider: + print("Warning: --provider not specified. Skipping model parametrization.") + metafunc.parametrize("model", []) + return + + try: + config_data = _load_all_verification_configs() + except (FileNotFoundError, IOError) as e: + print(f"ERROR loading verification configs: {e}") + config_data = {"providers": {}} + + provider_config = config_data.get("providers", {}).get(provider) + if provider_config: + models = provider_config.get("models", []) + if models: + metafunc.parametrize("model", models) + else: + print(f"Warning: No models found for provider '{provider}' in config.") + metafunc.parametrize("model", []) # Parametrize empty if no models found + else: + print(f"Warning: Provider '{provider}' not found in config. No models parametrized.") + metafunc.parametrize("model", []) # Parametrize empty if provider not found + + +def should_skip_test(verification_config, provider, model, test_name_base): + """Check if a test should be skipped based on config exclusions.""" + provider_config = verification_config.get("providers", {}).get(provider) + if not provider_config: + return False # No config for provider, don't skip + + exclusions = provider_config.get("test_exclusions", {}).get(model, []) + return test_name_base in exclusions + + +# Helper to get the base test name from the request object +def get_base_test_name(request): + return request.node.originalname + + +# --- Test Functions --- + + +@pytest.mark.parametrize( + "case", + chat_completion_test_cases["test_chat_basic"]["test_params"]["case"], + ids=case_id_generator, +) +def test_chat_non_streaming_basic(request, openai_client, model, provider, verification_config, case): + test_name_base = get_base_test_name(request) + if should_skip_test(verification_config, provider, model, test_name_base): + pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") + + response = openai_client.chat.completions.create( + model=model, + messages=case["input"]["messages"], + stream=False, + ) + assert response.choices[0].message.role == "assistant" + assert case["output"].lower() in response.choices[0].message.content.lower() + + +@pytest.mark.parametrize( + "case", + chat_completion_test_cases["test_chat_basic"]["test_params"]["case"], + ids=case_id_generator, +) +def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case): + test_name_base = get_base_test_name(request) + if should_skip_test(verification_config, provider, model, test_name_base): + pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") + + response = openai_client.chat.completions.create( + model=model, + messages=case["input"]["messages"], + stream=True, + ) + content = "" + for chunk in response: + content += chunk.choices[0].delta.content or "" + + # TODO: add detailed type validation + + assert case["output"].lower() in content.lower() + + +@pytest.mark.parametrize( + "case", + chat_completion_test_cases["test_chat_image"]["test_params"]["case"], + ids=case_id_generator, +) +def test_chat_non_streaming_image(request, openai_client, model, provider, verification_config, case): + test_name_base = get_base_test_name(request) + if should_skip_test(verification_config, provider, model, test_name_base): + pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") + + response = openai_client.chat.completions.create( + model=model, + messages=case["input"]["messages"], + stream=False, + ) + assert response.choices[0].message.role == "assistant" + assert case["output"].lower() in response.choices[0].message.content.lower() + + +@pytest.mark.parametrize( + "case", + chat_completion_test_cases["test_chat_image"]["test_params"]["case"], + ids=case_id_generator, +) +def test_chat_streaming_image(request, openai_client, model, provider, verification_config, case): + test_name_base = get_base_test_name(request) + if should_skip_test(verification_config, provider, model, test_name_base): + pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") + + response = openai_client.chat.completions.create( + model=model, + messages=case["input"]["messages"], + stream=True, + ) + content = "" + for chunk in response: + content += chunk.choices[0].delta.content or "" + + # TODO: add detailed type validation + + assert case["output"].lower() in content.lower() + + +@pytest.mark.parametrize( + "case", + chat_completion_test_cases["test_chat_structured_output"]["test_params"]["case"], + ids=case_id_generator, +) +def test_chat_non_streaming_structured_output(request, openai_client, model, provider, verification_config, case): + test_name_base = get_base_test_name(request) + if should_skip_test(verification_config, provider, model, test_name_base): + pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") + + response = openai_client.chat.completions.create( + model=model, + messages=case["input"]["messages"], + response_format=case["input"]["response_format"], + stream=False, + ) + + assert response.choices[0].message.role == "assistant" + maybe_json_content = response.choices[0].message.content + + validate_structured_output(maybe_json_content, case["output"]) + + +@pytest.mark.parametrize( + "case", + chat_completion_test_cases["test_chat_structured_output"]["test_params"]["case"], + ids=case_id_generator, +) +def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case): + test_name_base = get_base_test_name(request) + if should_skip_test(verification_config, provider, model, test_name_base): + pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") + + response = openai_client.chat.completions.create( + model=model, + messages=case["input"]["messages"], + response_format=case["input"]["response_format"], + stream=True, + ) + maybe_json_content = "" + for chunk in response: + maybe_json_content += chunk.choices[0].delta.content or "" + validate_structured_output(maybe_json_content, case["output"]) + + +@pytest.mark.parametrize( + "case", + chat_completion_test_cases["test_tool_calling"]["test_params"]["case"], + ids=case_id_generator, +) +def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case): + test_name_base = get_base_test_name(request) + if should_skip_test(verification_config, provider, model, test_name_base): + pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") + + response = openai_client.chat.completions.create( + model=model, + messages=case["input"]["messages"], + tools=case["input"]["tools"], + stream=False, + ) + + assert response.choices[0].message.role == "assistant" + assert len(response.choices[0].message.tool_calls) > 0 + assert case["output"] == "get_weather_tool_call" + assert response.choices[0].message.tool_calls[0].function.name == "get_weather" + # TODO: add detailed type validation + + +# --- Helper functions (structured output validation) --- + + +def get_structured_output(maybe_json_content: str, schema_name: str) -> Any | None: + if schema_name == "valid_calendar_event": + + class CalendarEvent(BaseModel): + name: str + date: str + participants: list[str] + + try: + calendar_event = CalendarEvent.model_validate_json(maybe_json_content) + return calendar_event + except Exception: + return None + elif schema_name == "valid_math_reasoning": + + class Step(BaseModel): + explanation: str + output: str + + class MathReasoning(BaseModel): + steps: list[Step] + final_answer: str + + try: + math_reasoning = MathReasoning.model_validate_json(maybe_json_content) + return math_reasoning + except Exception: + return None + + return None + + +def validate_structured_output(maybe_json_content: str, schema_name: str) -> None: + structured_output = get_structured_output(maybe_json_content, schema_name) + assert structured_output is not None + if schema_name == "valid_calendar_event": + assert structured_output.name is not None + assert structured_output.date is not None + assert len(structured_output.participants) == 2 + elif schema_name == "valid_math_reasoning": + assert len(structured_output.final_answer) > 0 diff --git a/tests/verifications/test_results/fireworks_1744154308.json b/tests/verifications/test_results/fireworks_1744154308.json deleted file mode 100644 index 691f6e474..000000000 --- a/tests/verifications/test_results/fireworks_1744154308.json +++ /dev/null @@ -1,2744 +0,0 @@ -{ - "created": 1744154399.039055, - "duration": 87.73799800872803, - "exitcode": 1, - "root": "/Users/erichuang/projects/llama-stack", - "environment": {}, - "summary": { - "skipped": 52, - "passed": 28, - "failed": 3, - "total": 83, - "collected": 83 - }, - "collectors": [ - { - "nodeid": "", - "outcome": "passed", - "result": [ - { - "nodeid": "tests/verifications/openai/test_chat_completion.py", - "type": "Module" - } - ] - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py", - "outcome": "passed", - "result": [ - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o-mini]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o-mini]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o-mini]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o-mini]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 60 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 60 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 60 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 60 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o]", - "type": "Function", - "lineno": 60 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o-mini]", - "type": "Function", - "lineno": 60 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 75 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 75 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 75 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 75 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o]", - "type": "Function", - "lineno": 75 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o-mini]", - "type": "Function", - "lineno": 75 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o-mini]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o-mini]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o-mini]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o-mini]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 138 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 138 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 138 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 138 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 138 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o]", - "type": "Function", - "lineno": 138 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o-mini]", - "type": "Function", - "lineno": 138 - } - ] - } - ], - "tests": [ - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.17320987500716, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider fireworks does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.000177707988768816, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", - "lineno": 25, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.009193749981932342, - "outcome": "passed" - }, - "call": { - "duration": 1.1473859580000862, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00043337501119822264, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.01645291701424867, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider fireworks does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.0002898749662563205, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 25, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.01562033302616328, - "outcome": "passed" - }, - "call": { - "duration": 0.8782661251025274, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002795408945530653, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.008571124984882772, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider fireworks does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.0003043749602511525, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 25, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.00842841702979058, - "outcome": "passed" - }, - "call": { - "duration": 1.3863223339430988, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0009970410028472543, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-gpt-4o]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.007089875056408346, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider fireworks does not support model gpt-4o')" - }, - "teardown": { - "duration": 0.00017958390526473522, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o-mini]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.005809499998576939, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider fireworks does not support model gpt-4o-mini')" - }, - "teardown": { - "duration": 0.00016495899762958288, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.0119722920935601, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider fireworks does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.00016962504014372826, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", - "lineno": 25, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.005716291954740882, - "outcome": "passed" - }, - "call": { - "duration": 0.6822018750244752, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0005292498972266912, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.025827708072029054, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider fireworks does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.000295999925583601, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 25, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.010980832972563803, - "outcome": "passed" - }, - "call": { - "duration": 0.7537062909686938, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0008091670460999012, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.006567832897417247, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider fireworks does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.0001545000122860074, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 25, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.005985083989799023, - "outcome": "passed" - }, - "call": { - "duration": 0.7263387079583481, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0006324589485302567, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-gpt-4o]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.0171962499152869, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider fireworks does not support model gpt-4o')" - }, - "teardown": { - "duration": 0.000780042028054595, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o-mini]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.01365620899014175, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider fireworks does not support model gpt-4o-mini')" - }, - "teardown": { - "duration": 0.00016758404672145844, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.0064070840599015355, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider fireworks does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.0002031669719144702, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", - "lineno": 40, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.010951624950394034, - "outcome": "passed" - }, - "call": { - "duration": 0.5433399169705808, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0013178749941289425, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.022056750021874905, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider fireworks does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.0006570409750565886, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 40, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.008314333041198552, - "outcome": "passed" - }, - "call": { - "duration": 0.7779882500180975, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0006799160037189722, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.03601404093205929, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider fireworks does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.000610582996159792, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 40, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.014321292052045465, - "outcome": "passed" - }, - "call": { - "duration": 1.0243758750148118, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0010485410457476974, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output0-gpt-4o]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.021133000031113625, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider fireworks does not support model gpt-4o')" - }, - "teardown": { - "duration": 0.0005400830414146185, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o-mini]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output0-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.007212458993308246, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider fireworks does not support model gpt-4o-mini')" - }, - "teardown": { - "duration": 0.00026770797558128834, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.012334750033915043, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider fireworks does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.00042683398351073265, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", - "lineno": 40, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.011477917083539069, - "outcome": "passed" - }, - "call": { - "duration": 1.670572166913189, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0005759169580414891, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.024620208074338734, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider fireworks does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.0005166250048205256, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 40, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.008708957931958139, - "outcome": "passed" - }, - "call": { - "duration": 0.6654335829662159, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002927089808508754, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.018128167022950947, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider fireworks does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.0001929170684888959, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 40, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.0063874589977785945, - "outcome": "passed" - }, - "call": { - "duration": 0.8047525839647278, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00039245898369699717, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output1-gpt-4o]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.01366533397231251, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider fireworks does not support model gpt-4o')" - }, - "teardown": { - "duration": 0.00028241705149412155, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o-mini]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output1-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.010844790958799422, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider fireworks does not support model gpt-4o-mini')" - }, - "teardown": { - "duration": 0.000258082989603281, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E]", - "lineno": 60, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.00936354196164757, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider fireworks does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.00020533299539238214, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 60, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.008578249951824546, - "outcome": "passed" - }, - "call": { - "duration": 2.6288582499837503, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0006052498938515782, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", - "lineno": 60, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.02061279199551791, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider fireworks does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.00029320805333554745, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 60, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.00995812495239079, - "outcome": "passed" - }, - "call": { - "duration": 3.0904540000483394, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0003214169992133975, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o]", - "lineno": 60, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_image[input_output0-gpt-4o]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.0261635419446975, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider fireworks does not support model gpt-4o')" - }, - "teardown": { - "duration": 0.00032716698478907347, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o-mini]", - "lineno": 60, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_image[input_output0-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.027220541960559785, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider fireworks does not support model gpt-4o-mini')" - }, - "teardown": { - "duration": 0.0003192499279975891, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E]", - "lineno": 75, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.010883458075113595, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider fireworks does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.0002687909873202443, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 75, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.0171177500160411, - "outcome": "passed" - }, - "call": { - "duration": 1.6752691670553759, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0004877089522778988, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", - "lineno": 75, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.011608208995312452, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider fireworks does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.00017137499526143074, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 75, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.009284624946303666, - "outcome": "passed" - }, - "call": { - "duration": 3.537356249988079, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0005068340105935931, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o]", - "lineno": 75, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_image[input_output0-gpt-4o]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.016660499968566, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider fireworks does not support model gpt-4o')" - }, - "teardown": { - "duration": 0.00029341597110033035, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o-mini]", - "lineno": 75, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_image[input_output0-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.01374066702555865, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider fireworks does not support model gpt-4o-mini')" - }, - "teardown": { - "duration": 0.0002625000197440386, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.013120374991558492, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider fireworks does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.00021954195108264685, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.015080374898388982, - "outcome": "passed" - }, - "call": { - "duration": 1.157175041968003, - "outcome": "passed" - }, - "teardown": { - "duration": 0.000495875021442771, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.013946042046882212, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider fireworks does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.0002954580122604966, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.011617792071774602, - "outcome": "passed" - }, - "call": { - "duration": 0.9537639999762177, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0004819999448955059, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.027436082949861884, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider fireworks does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.00030274991877377033, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.016110333963297307, - "outcome": "passed" - }, - "call": { - "duration": 0.8493227910948917, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0004883749643340707, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-gpt-4o]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.017850833013653755, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider fireworks does not support model gpt-4o')" - }, - "teardown": { - "duration": 0.0003287500003352761, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o-mini]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.012523208046332002, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider fireworks does not support model gpt-4o-mini')" - }, - "teardown": { - "duration": 0.00023500004317611456, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.007516667013987899, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider fireworks does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.00018912507221102715, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.007337165996432304, - "outcome": "passed" - }, - "call": { - "duration": 3.124099582899362, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0006703329272568226, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.014259999967180192, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider fireworks does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.00030262500513345003, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.010863124975003302, - "outcome": "passed" - }, - "call": { - "duration": 1.3330956250429153, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00018679199274629354, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.005797958001494408, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider fireworks does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.00017529097385704517, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.005647709011100233, - "outcome": "passed" - }, - "call": { - "duration": 3.2295467499643564, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0005654999986290932, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-gpt-4o]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.007151791942305863, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider fireworks does not support model gpt-4o')" - }, - "teardown": { - "duration": 0.00015316694043576717, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o-mini]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.006435790914110839, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider fireworks does not support model gpt-4o-mini')" - }, - "teardown": { - "duration": 0.00015954102855175734, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.006164791993796825, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider fireworks does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.00014074996579438448, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", - "lineno": 117, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.010064583038911223, - "outcome": "passed" - }, - "call": { - "duration": 1.1676458748988807, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002513329964131117, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.011011417023837566, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider fireworks does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.00020608294289559126, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 117, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.011654542060568929, - "outcome": "passed" - }, - "call": { - "duration": 0.7950789160095155, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002690000692382455, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.0066834589233621955, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider fireworks does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.00017270795069634914, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 117, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.011390416999347508, - "outcome": "passed" - }, - "call": { - "duration": 0.7844940840732306, - "outcome": "passed" - }, - "teardown": { - "duration": 0.000511458027176559, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-gpt-4o]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.005813500029034913, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider fireworks does not support model gpt-4o')" - }, - "teardown": { - "duration": 0.00015495799016207457, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o-mini]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.0075639160349965096, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider fireworks does not support model gpt-4o-mini')" - }, - "teardown": { - "duration": 0.00014358304906636477, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.008526541059836745, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider fireworks does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.00015841599088162184, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", - "lineno": 117, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.007805416011251509, - "outcome": "passed" - }, - "call": { - "duration": 13.25898533302825, - "outcome": "failed", - "crash": { - "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py", - "lineno": 196, - "message": "assert None is not None" - }, - "traceback": [ - { - "path": "tests/verifications/openai/test_chat_completion.py", - "lineno": 136, - "message": "" - }, - { - "path": "tests/verifications/openai/test_chat_completion.py", - "lineno": 196, - "message": "AssertionError" - } - ], - "longrepr": "openai_client = \ninput_output = {'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solution step by step.',... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\ncorrect_model_name = 'accounts/fireworks/models/llama-v3p1-70b-instruct'\n\n @pytest.mark.parametrize(\n \"model\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"model\"],\n )\n @pytest.mark.parametrize(\n \"input_output\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"input_output\"],\n )\n def test_chat_streaming_structured_output(openai_client, input_output, correct_model_name):\n response = openai_client.chat.completions.create(\n model=correct_model_name,\n messages=input_output[\"input\"][\"messages\"],\n response_format=input_output[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n maybe_json_content += chunk.choices[0].delta.content or \"\"\n> validate_structured_output(maybe_json_content, input_output[\"output\"])\n\ntests/verifications/openai/test_chat_completion.py:136: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nmaybe_json_content = '{ \"final_answer\": \"}To solve the equation 8x + 7 = -23, we need to isolate the variable x. We can do this by followin...tassistantassistantassistantassistantassistantassistantassistantassistantassistantassistantassistantassistantassistant'\nschema_name = 'valid_math_reasoning'\n\n def validate_structured_output(maybe_json_content: str, schema_name: str) -> None:\n structured_output = get_structured_output(maybe_json_content, schema_name)\n> assert structured_output is not None\nE assert None is not None\n\ntests/verifications/openai/test_chat_completion.py:196: AssertionError" - }, - "teardown": { - "duration": 0.00022583396639674902, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.006412541959434748, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider fireworks does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.0001449589617550373, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 117, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.010353000019676983, - "outcome": "passed" - }, - "call": { - "duration": 4.559281209018081, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00021179206669330597, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.011320417048409581, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider fireworks does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.0001623749267309904, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 117, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.005637791007757187, - "outcome": "passed" - }, - "call": { - "duration": 2.9282109580235556, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00019149994477629662, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-gpt-4o]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.021475916961207986, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider fireworks does not support model gpt-4o')" - }, - "teardown": { - "duration": 0.0002605828922241926, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o-mini]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.012046082993037999, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider fireworks does not support model gpt-4o-mini')" - }, - "teardown": { - "duration": 0.00016966694965958595, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-3.3-70B-Instruct]", - "lineno": 138, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_tool_calling[input_output0-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.00782629195600748, - "outcome": "passed" - }, - "call": { - "duration": 0.9290615000063553, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0004110001027584076, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E]", - "lineno": 138, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.00842183397617191, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider fireworks does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.00023745803628116846, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 138, - "outcome": "failed", - "keywords": [ - "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.010762874968349934, - "outcome": "passed" - }, - "call": { - "duration": 23.62101216695737, - "outcome": "failed", - "crash": { - "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py", - "lineno": 156, - "message": "TypeError: object of type 'NoneType' has no len()" - }, - "traceback": [ - { - "path": "tests/verifications/openai/test_chat_completion.py", - "lineno": 156, - "message": "TypeError" - } - ], - "longrepr": "openai_client = \ninput_output = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\ncorrect_model_name = 'accounts/fireworks/models/llama4-scout-instruct-basic'\n\n @pytest.mark.parametrize(\n \"model\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"model\"],\n )\n @pytest.mark.parametrize(\n \"input_output\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"input_output\"],\n )\n def test_chat_non_streaming_tool_calling(openai_client, input_output, correct_model_name):\n response = openai_client.chat.completions.create(\n model=correct_model_name,\n messages=input_output[\"input\"][\"messages\"],\n tools=input_output[\"input\"][\"tools\"],\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert len(response.choices[0].message.tool_calls) > 0\nE TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai/test_chat_completion.py:156: TypeError" - }, - "teardown": { - "duration": 0.0004520840011537075, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E]", - "lineno": 138, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.00953104195650667, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider fireworks does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.00017912499606609344, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 138, - "outcome": "failed", - "keywords": [ - "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.010302042006514966, - "outcome": "passed" - }, - "call": { - "duration": 5.55651158397086, - "outcome": "failed", - "crash": { - "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py", - "lineno": 156, - "message": "TypeError: object of type 'NoneType' has no len()" - }, - "traceback": [ - { - "path": "tests/verifications/openai/test_chat_completion.py", - "lineno": 156, - "message": "TypeError" - } - ], - "longrepr": "openai_client = \ninput_output = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\ncorrect_model_name = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\n\n @pytest.mark.parametrize(\n \"model\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"model\"],\n )\n @pytest.mark.parametrize(\n \"input_output\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"input_output\"],\n )\n def test_chat_non_streaming_tool_calling(openai_client, input_output, correct_model_name):\n response = openai_client.chat.completions.create(\n model=correct_model_name,\n messages=input_output[\"input\"][\"messages\"],\n tools=input_output[\"input\"][\"tools\"],\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert len(response.choices[0].message.tool_calls) > 0\nE TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai/test_chat_completion.py:156: TypeError" - }, - "teardown": { - "duration": 0.0003929579397663474, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o]", - "lineno": 138, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_tool_calling[input_output0-gpt-4o]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.01593891705852002, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider fireworks does not support model gpt-4o')" - }, - "teardown": { - "duration": 0.0003579579060897231, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o-mini]", - "lineno": 138, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_tool_calling[input_output0-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.01874550001230091, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider fireworks does not support model gpt-4o-mini')" - }, - "teardown": { - "duration": 0.00031995808240026236, - "outcome": "passed" - } - } - ] -} diff --git a/tests/verifications/test_results/fireworks_1744264202.json b/tests/verifications/test_results/fireworks_1744264202.json new file mode 100644 index 000000000..d14738be9 --- /dev/null +++ b/tests/verifications/test_results/fireworks_1744264202.json @@ -0,0 +1,1329 @@ +{ + "created": 1744264258.730061, + "duration": 53.86071586608887, + "exitcode": 1, + "root": "/Users/erichuang/projects/llama-stack", + "environment": {}, + "summary": { + "passed": 28, + "skipped": 2, + "failed": 3, + "total": 33, + "collected": 33 + }, + "collectors": [ + { + "nodeid": "", + "outcome": "passed", + "result": [ + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py", + "type": "Module" + } + ] + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py", + "outcome": "passed", + "result": [ + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]", + "type": "Function", + "lineno": 72 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]", + "type": "Function", + "lineno": 72 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]", + "type": "Function", + "lineno": 72 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]", + "type": "Function", + "lineno": 72 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]", + "type": "Function", + "lineno": 72 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]", + "type": "Function", + "lineno": 72 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]", + "type": "Function", + "lineno": 91 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]", + "type": "Function", + "lineno": 91 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]", + "type": "Function", + "lineno": 91 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]", + "type": "Function", + "lineno": 91 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]", + "type": "Function", + "lineno": 91 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]", + "type": "Function", + "lineno": 91 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", + "type": "Function", + "lineno": 115 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", + "type": "Function", + "lineno": 115 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", + "type": "Function", + "lineno": 115 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", + "type": "Function", + "lineno": 134 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", + "type": "Function", + "lineno": 134 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", + "type": "Function", + "lineno": 134 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]", + "type": "Function", + "lineno": 158 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]", + "type": "Function", + "lineno": 158 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]", + "type": "Function", + "lineno": 158 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]", + "type": "Function", + "lineno": 158 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]", + "type": "Function", + "lineno": 158 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]", + "type": "Function", + "lineno": 158 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]", + "type": "Function", + "lineno": 181 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]", + "type": "Function", + "lineno": 181 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]", + "type": "Function", + "lineno": 181 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]", + "type": "Function", + "lineno": 181 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]", + "type": "Function", + "lineno": 181 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]", + "type": "Function", + "lineno": 181 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", + "type": "Function", + "lineno": 203 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", + "type": "Function", + "lineno": 203 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", + "type": "Function", + "lineno": 203 + } + ] + } + ], + "tests": [ + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]", + "lineno": 72, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama-v3p3-70b-instruct-earth", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", + "case_id": "earth" + }, + "setup": { + "duration": 0.05236550001427531, + "outcome": "passed" + }, + "call": { + "duration": 0.5364967910572886, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00015075004193931818, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]", + "lineno": 72, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama-v3p3-70b-instruct-saturn", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", + "case_id": "saturn" + }, + "setup": { + "duration": 0.00699599995277822, + "outcome": "passed" + }, + "call": { + "duration": 0.5843954589217901, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0003858329728245735, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]", + "lineno": 72, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-scout-instruct-basic-earth", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-scout-instruct-basic", + "case_id": "earth" + }, + "setup": { + "duration": 0.009176500025205314, + "outcome": "passed" + }, + "call": { + "duration": 0.9258683329680935, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00015787500888109207, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]", + "lineno": 72, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-scout-instruct-basic-saturn", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-scout-instruct-basic", + "case_id": "saturn" + }, + "setup": { + "duration": 0.011275375029072165, + "outcome": "passed" + }, + "call": { + "duration": 0.6890578339807689, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0004926669644191861, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]", + "lineno": 72, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-maverick-instruct-basic-earth", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", + "case_id": "earth" + }, + "setup": { + "duration": 0.007520624902099371, + "outcome": "passed" + }, + "call": { + "duration": 0.6675686669768766, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00016137503553181887, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]", + "lineno": 72, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-maverick-instruct-basic-saturn", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", + "case_id": "saturn" + }, + "setup": { + "duration": 0.0076431670458987355, + "outcome": "passed" + }, + "call": { + "duration": 1.6813415409997106, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0004928340204060078, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]", + "lineno": 91, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama-v3p3-70b-instruct-earth", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", + "case_id": "earth" + }, + "setup": { + "duration": 0.01302404107991606, + "outcome": "passed" + }, + "call": { + "duration": 1.3206909999717027, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0002220839960500598, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]", + "lineno": 91, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama-v3p3-70b-instruct-saturn", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", + "case_id": "saturn" + }, + "setup": { + "duration": 0.0071772499941289425, + "outcome": "passed" + }, + "call": { + "duration": 0.4109888339880854, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0005431669997051358, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]", + "lineno": 91, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-scout-instruct-basic-earth", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-scout-instruct-basic", + "case_id": "earth" + }, + "setup": { + "duration": 0.012043708004057407, + "outcome": "passed" + }, + "call": { + "duration": 0.4509220840409398, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00016408402007073164, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]", + "lineno": 91, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-scout-instruct-basic-saturn", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-scout-instruct-basic", + "case_id": "saturn" + }, + "setup": { + "duration": 0.007165874936617911, + "outcome": "passed" + }, + "call": { + "duration": 0.6527335830032825, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0006419579731300473, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]", + "lineno": 91, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-maverick-instruct-basic-earth", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", + "case_id": "earth" + }, + "setup": { + "duration": 0.007546542095951736, + "outcome": "passed" + }, + "call": { + "duration": 0.9360042089829221, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00020483299158513546, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]", + "lineno": 91, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-maverick-instruct-basic-saturn", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", + "case_id": "saturn" + }, + "setup": { + "duration": 0.046697250101715326, + "outcome": "passed" + }, + "call": { + "duration": 0.668349124956876, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0005031249020248652, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", + "lineno": 115, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama-v3p3-70b-instruct-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", + "case_id": "case0" + }, + "setup": { + "duration": 0.012287458986975253, + "outcome": "passed" + }, + "call": { + "duration": 0.00015287497080862522, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 124, 'Skipped: Skipping test_chat_non_streaming_image for model accounts/fireworks/models/llama-v3p3-70b-instruct on provider fireworks based on config.')" + }, + "teardown": { + "duration": 0.00012162502389401197, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", + "lineno": 115, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-scout-instruct-basic-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-scout-instruct-basic", + "case_id": "case0" + }, + "setup": { + "duration": 0.007204124936833978, + "outcome": "passed" + }, + "call": { + "duration": 1.8676417920505628, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0001557499635964632, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", + "lineno": 115, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-maverick-instruct-basic-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", + "case_id": "case0" + }, + "setup": { + "duration": 0.008226625039242208, + "outcome": "passed" + }, + "call": { + "duration": 3.2724285409785807, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0002898330567404628, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", + "lineno": 134, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama-v3p3-70b-instruct-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", + "case_id": "case0" + }, + "setup": { + "duration": 0.011927249957807362, + "outcome": "passed" + }, + "call": { + "duration": 0.00017358292825520039, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 143, 'Skipped: Skipping test_chat_streaming_image for model accounts/fireworks/models/llama-v3p3-70b-instruct on provider fireworks based on config.')" + }, + "teardown": { + "duration": 0.00014037499204277992, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", + "lineno": 134, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-scout-instruct-basic-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-scout-instruct-basic", + "case_id": "case0" + }, + "setup": { + "duration": 0.008731417008675635, + "outcome": "passed" + }, + "call": { + "duration": 2.8333610829431564, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0005132080987095833, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", + "lineno": 134, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-maverick-instruct-basic-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", + "case_id": "case0" + }, + "setup": { + "duration": 0.016569208004511893, + "outcome": "passed" + }, + "call": { + "duration": 2.302010750048794, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00016108399722725153, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]", + "lineno": 158, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama-v3p3-70b-instruct-calendar", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", + "case_id": "calendar" + }, + "setup": { + "duration": 0.039960999973118305, + "outcome": "passed" + }, + "call": { + "duration": 7.661373125039972, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00015833403449505568, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]", + "lineno": 158, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama-v3p3-70b-instruct-math", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", + "case_id": "math" + }, + "setup": { + "duration": 0.006928625050932169, + "outcome": "passed" + }, + "call": { + "duration": 2.762534625013359, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0006561250193044543, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]", + "lineno": 158, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-scout-instruct-basic-calendar", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-scout-instruct-basic", + "case_id": "calendar" + }, + "setup": { + "duration": 0.008602249901741743, + "outcome": "passed" + }, + "call": { + "duration": 0.8311484589939937, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0005021670367568731, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]", + "lineno": 158, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-scout-instruct-basic-math", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-scout-instruct-basic", + "case_id": "math" + }, + "setup": { + "duration": 0.015500334091484547, + "outcome": "passed" + }, + "call": { + "duration": 2.505719291046262, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0002619170118123293, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]", + "lineno": 158, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-maverick-instruct-basic-calendar", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", + "case_id": "calendar" + }, + "setup": { + "duration": 0.01948041608557105, + "outcome": "passed" + }, + "call": { + "duration": 0.6336237500654534, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00016637507360428572, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]", + "lineno": 158, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-maverick-instruct-basic-math", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", + "case_id": "math" + }, + "setup": { + "duration": 0.006810749997384846, + "outcome": "passed" + }, + "call": { + "duration": 1.9086956249084324, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00018824997823685408, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]", + "lineno": 181, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama-v3p3-70b-instruct-calendar", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", + "case_id": "calendar" + }, + "setup": { + "duration": 0.007881582947447896, + "outcome": "passed" + }, + "call": { + "duration": 0.7142562499502674, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0007035828894004226, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]", + "lineno": 181, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama-v3p3-70b-instruct-math", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", + "case_id": "math" + }, + "setup": { + "duration": 0.00848070892971009, + "outcome": "passed" + }, + "call": { + "duration": 1.5210869159782305, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00021216599270701408, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]", + "lineno": 181, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-scout-instruct-basic-calendar", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-scout-instruct-basic", + "case_id": "calendar" + }, + "setup": { + "duration": 0.009669666993431747, + "outcome": "passed" + }, + "call": { + "duration": 1.3105999580584466, + "outcome": "passed" + }, + "teardown": { + "duration": 0.000588166993111372, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]", + "lineno": 181, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-scout-instruct-basic-math", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-scout-instruct-basic", + "case_id": "math" + }, + "setup": { + "duration": 0.007745541981421411, + "outcome": "passed" + }, + "call": { + "duration": 3.250162083073519, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0001455000601708889, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]", + "lineno": 181, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-maverick-instruct-basic-calendar", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", + "case_id": "calendar" + }, + "setup": { + "duration": 0.009726207936182618, + "outcome": "passed" + }, + "call": { + "duration": 0.5564592910232022, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00019470800179988146, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]", + "lineno": 181, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-maverick-instruct-basic-math", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", + "case_id": "math" + }, + "setup": { + "duration": 0.018431040924042463, + "outcome": "passed" + }, + "call": { + "duration": 3.8501765420660377, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00015279196668416262, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", + "lineno": 203, + "outcome": "failed", + "keywords": [ + "test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama-v3p3-70b-instruct-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", + "case_id": "case0" + }, + "setup": { + "duration": 0.007509749964810908, + "outcome": "passed" + }, + "call": { + "duration": 0.4906975000631064, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", + "lineno": 222, + "message": "TypeError: object of type 'NoneType' has no len()" + }, + "traceback": [ + { + "path": "tests/verifications/openai_api/test_chat_completion.py", + "lineno": 222, + "message": "TypeError" + } + ], + "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert len(response.choices[0].message.tool_calls) > 0\nE TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:222: TypeError" + }, + "teardown": { + "duration": 0.00023995805531740189, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", + "lineno": 203, + "outcome": "failed", + "keywords": [ + "test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-scout-instruct-basic-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-scout-instruct-basic", + "case_id": "case0" + }, + "setup": { + "duration": 0.007144959061406553, + "outcome": "passed" + }, + "call": { + "duration": 3.818257624981925, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", + "lineno": 222, + "message": "TypeError: object of type 'NoneType' has no len()" + }, + "traceback": [ + { + "path": "tests/verifications/openai_api/test_chat_completion.py", + "lineno": 222, + "message": "TypeError" + } + ], + "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert len(response.choices[0].message.tool_calls) > 0\nE TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:222: TypeError" + }, + "teardown": { + "duration": 0.0002668750239536166, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", + "lineno": 203, + "outcome": "failed", + "keywords": [ + "test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-maverick-instruct-basic-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", + "case_id": "case0" + }, + "setup": { + "duration": 0.015290249953977764, + "outcome": "passed" + }, + "call": { + "duration": 1.5883799999719486, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", + "lineno": 222, + "message": "TypeError: object of type 'NoneType' has no len()" + }, + "traceback": [ + { + "path": "tests/verifications/openai_api/test_chat_completion.py", + "lineno": 222, + "message": "TypeError" + } + ], + "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert len(response.choices[0].message.tool_calls) > 0\nE TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:222: TypeError" + }, + "teardown": { + "duration": 0.0008049579337239265, + "outcome": "passed" + } + } + ] +} diff --git a/tests/verifications/test_results/openai_1744154522.json b/tests/verifications/test_results/openai_1744154522.json deleted file mode 100644 index 310f3500d..000000000 --- a/tests/verifications/test_results/openai_1744154522.json +++ /dev/null @@ -1,2672 +0,0 @@ -{ - "created": 1744154576.251519, - "duration": 51.50739002227783, - "exitcode": 0, - "root": "/Users/erichuang/projects/llama-stack", - "environment": {}, - "summary": { - "skipped": 61, - "passed": 22, - "total": 83, - "collected": 83 - }, - "collectors": [ - { - "nodeid": "", - "outcome": "passed", - "result": [ - { - "nodeid": "tests/verifications/openai/test_chat_completion.py", - "type": "Module" - } - ] - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py", - "outcome": "passed", - "result": [ - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o-mini]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o-mini]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o-mini]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o-mini]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 60 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 60 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 60 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 60 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o]", - "type": "Function", - "lineno": 60 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o-mini]", - "type": "Function", - "lineno": 60 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 75 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 75 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 75 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 75 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o]", - "type": "Function", - "lineno": 75 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o-mini]", - "type": "Function", - "lineno": 75 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o-mini]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o-mini]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o-mini]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o-mini]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 138 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 138 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 138 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 138 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 138 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o]", - "type": "Function", - "lineno": 138 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o-mini]", - "type": "Function", - "lineno": 138 - } - ] - } - ], - "tests": [ - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.0531630830373615, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.0001657919492572546, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.006063499953597784, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-3.3-70B-Instruct')" - }, - "teardown": { - "duration": 0.00014004099648445845, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.005356832989491522, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.00016508297994732857, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.006139832898043096, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E-Instruct')" - }, - "teardown": { - "duration": 0.00014450005255639553, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.00542324990965426, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.00014112505596131086, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.004965625004842877, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E-Instruct')" - }, - "teardown": { - "duration": 0.00013720791321247816, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o]", - "lineno": 25, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-gpt-4o]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.005054084002040327, - "outcome": "passed" - }, - "call": { - "duration": 0.6271341659594327, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00043925002682954073, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o-mini]", - "lineno": 25, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.0159178749890998, - "outcome": "passed" - }, - "call": { - "duration": 0.44088316697161645, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0006467089988291264, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.016705541987903416, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.0005769169656559825, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.012067249976098537, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-3.3-70B-Instruct')" - }, - "teardown": { - "duration": 0.00016683305148035288, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.009295083000324667, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.00017204193864017725, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.009534333017654717, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E-Instruct')" - }, - "teardown": { - "duration": 0.00020175008103251457, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.006628665956668556, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.0003687090938910842, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.0061322919791564345, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E-Instruct')" - }, - "teardown": { - "duration": 0.0003664169926196337, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o]", - "lineno": 25, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-gpt-4o]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.00623433303553611, - "outcome": "passed" - }, - "call": { - "duration": 0.7898445830214769, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0006602079374715686, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o-mini]", - "lineno": 25, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.014758958015590906, - "outcome": "passed" - }, - "call": { - "duration": 1.1555478329537436, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0011781250359490514, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.03454475000035018, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.000967124942690134, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.025206666090525687, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-3.3-70B-Instruct')" - }, - "teardown": { - "duration": 0.000189624959602952, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.014331333106383681, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.00023133307695388794, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.009339665994048119, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E-Instruct')" - }, - "teardown": { - "duration": 0.00020329200197011232, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.010387042071670294, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.00018254201859235764, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.012297999928705394, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E-Instruct')" - }, - "teardown": { - "duration": 0.00018662505317479372, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o]", - "lineno": 40, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_basic[input_output0-gpt-4o]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.006984042003750801, - "outcome": "passed" - }, - "call": { - "duration": 0.32529433304443955, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0033042499562725425, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o-mini]", - "lineno": 40, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_basic[input_output0-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.01832079200539738, - "outcome": "passed" - }, - "call": { - "duration": 0.48440287495031953, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00047233293298631907, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.02893691696226597, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.0001747499918565154, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.006553041050210595, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-3.3-70B-Instruct')" - }, - "teardown": { - "duration": 0.00016829196829348803, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.013746666954830289, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.00019237503875046968, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.007175332983024418, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E-Instruct')" - }, - "teardown": { - "duration": 0.0001873329747468233, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.006127291941083968, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.00019004102796316147, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.006421791040338576, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E-Instruct')" - }, - "teardown": { - "duration": 0.0001611249754205346, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o]", - "lineno": 40, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_basic[input_output1-gpt-4o]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.009806249989196658, - "outcome": "passed" - }, - "call": { - "duration": 0.9556747920578346, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0004937920020893216, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o-mini]", - "lineno": 40, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_basic[input_output1-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.03146500000730157, - "outcome": "passed" - }, - "call": { - "duration": 1.082494750036858, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0006242080125957727, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E]", - "lineno": 60, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.021534667001105845, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.0003469999646767974, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 60, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.025929750059731305, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E-Instruct')" - }, - "teardown": { - "duration": 0.0008774169255048037, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", - "lineno": 60, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.012507125036790967, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.00022008304949849844, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 60, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.008156375028192997, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E-Instruct')" - }, - "teardown": { - "duration": 0.0002079169498756528, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o]", - "lineno": 60, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_image[input_output0-gpt-4o]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.012587749981321394, - "outcome": "passed" - }, - "call": { - "duration": 2.7379885419504717, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00044579198583960533, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o-mini]", - "lineno": 60, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_image[input_output0-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.017111250082962215, - "outcome": "passed" - }, - "call": { - "duration": 2.599374584038742, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0009177909232676029, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E]", - "lineno": 75, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.02198700001463294, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.00042749999556690454, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 75, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.015032917028293014, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E-Instruct')" - }, - "teardown": { - "duration": 0.00041016703471541405, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", - "lineno": 75, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.013976250076666474, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.00027600000612437725, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 75, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.00799729092977941, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E-Instruct')" - }, - "teardown": { - "duration": 0.00020320899784564972, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o]", - "lineno": 75, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_image[input_output0-gpt-4o]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.010483540943823755, - "outcome": "passed" - }, - "call": { - "duration": 4.249965250026435, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0008596250554546714, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o-mini]", - "lineno": 75, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_image[input_output0-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.018141582957468927, - "outcome": "passed" - }, - "call": { - "duration": 2.297856790944934, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0005075830267742276, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.017144332989118993, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.0006829580524936318, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.009827250032685697, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-3.3-70B-Instruct')" - }, - "teardown": { - "duration": 0.00024204188957810402, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.006737958989106119, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.00022729102056473494, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.006030917051248252, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E-Instruct')" - }, - "teardown": { - "duration": 0.00022229203023016453, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.009183833957649767, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.00022629194427281618, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.007097500027157366, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E-Instruct')" - }, - "teardown": { - "duration": 0.00826825003605336, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-gpt-4o]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.006604874972254038, - "outcome": "passed" - }, - "call": { - "duration": 1.4057738750707358, - "outcome": "passed" - }, - "teardown": { - "duration": 0.000506040989421308, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o-mini]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.015966624952852726, - "outcome": "passed" - }, - "call": { - "duration": 0.540478374925442, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0009536249563097954, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.020631707971915603, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.0004928340204060078, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.016745459055528045, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-3.3-70B-Instruct')" - }, - "teardown": { - "duration": 0.0003412909572944045, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.012252667103894055, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.00028650008607655764, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.01128904102370143, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E-Instruct')" - }, - "teardown": { - "duration": 0.00027041707653552294, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.009191332967020571, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.0002074999501928687, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.007687666919082403, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E-Instruct')" - }, - "teardown": { - "duration": 0.0002027079463005066, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-gpt-4o]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.007542708073742688, - "outcome": "passed" - }, - "call": { - "duration": 4.244797708000988, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0012778330128639936, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o-mini]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.026919999974779785, - "outcome": "passed" - }, - "call": { - "duration": 9.006108874920756, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00046324997674673796, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.01554666692391038, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.0004023330984637141, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.007354958914220333, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-3.3-70B-Instruct')" - }, - "teardown": { - "duration": 0.0002900830004364252, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.017274250043556094, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.0002668329980224371, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.006813667016103864, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E-Instruct')" - }, - "teardown": { - "duration": 0.00024500000290572643, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.007385291974060237, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.00017024995759129524, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.00857366609852761, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E-Instruct')" - }, - "teardown": { - "duration": 0.00016850000247359276, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o]", - "lineno": 117, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-gpt-4o]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.005570041947066784, - "outcome": "passed" - }, - "call": { - "duration": 0.8564215000951663, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0004029169213026762, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o-mini]", - "lineno": 117, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.00786762498319149, - "outcome": "passed" - }, - "call": { - "duration": 0.6419672920601442, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0005102079594507813, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.017147499951533973, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.00032350001856684685, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.01194737502373755, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-3.3-70B-Instruct')" - }, - "teardown": { - "duration": 0.0005004579434171319, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.010250666993670166, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.00022554199676960707, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.007847042055800557, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E-Instruct')" - }, - "teardown": { - "duration": 0.000283458037301898, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.008078000042587519, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.0001794169656932354, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.007204750087112188, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E-Instruct')" - }, - "teardown": { - "duration": 0.00017725001089274883, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o]", - "lineno": 117, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-gpt-4o]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.006797667010687292, - "outcome": "passed" - }, - "call": { - "duration": 5.411579457926564, - "outcome": "passed" - }, - "teardown": { - "duration": 0.001134666963480413, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o-mini]", - "lineno": 117, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.025059624924324453, - "outcome": "passed" - }, - "call": { - "duration": 9.112342999898829, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0009202499641105533, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-3.3-70B-Instruct]", - "lineno": 138, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_tool_calling[input_output0-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.024287916952744126, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider openai does not support model Llama-3.3-70B-Instruct')" - }, - "teardown": { - "duration": 0.00015587499365210533, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E]", - "lineno": 138, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.006531457998789847, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.00014670798555016518, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 138, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.006190375075675547, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E-Instruct')" - }, - "teardown": { - "duration": 0.0001603750279173255, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E]", - "lineno": 138, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.005670750048011541, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.0001479999627918005, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 138, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.005662833107635379, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E-Instruct')" - }, - "teardown": { - "duration": 0.0001480829669162631, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o]", - "lineno": 138, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_tool_calling[input_output0-gpt-4o]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.00573637499473989, - "outcome": "passed" - }, - "call": { - "duration": 0.6269576249178499, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0010142088867723942, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o-mini]", - "lineno": 138, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_tool_calling[input_output0-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.01623620803002268, - "outcome": "passed" - }, - "call": { - "duration": 0.7144521250156686, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0011040839599445462, - "outcome": "passed" - } - } - ] -} diff --git a/tests/verifications/test_results/openai_1744264304.json b/tests/verifications/test_results/openai_1744264304.json new file mode 100644 index 000000000..fe9c2fcac --- /dev/null +++ b/tests/verifications/test_results/openai_1744264304.json @@ -0,0 +1,868 @@ +{ + "created": 1744264338.9923031, + "duration": 32.825536012649536, + "exitcode": 0, + "root": "/Users/erichuang/projects/llama-stack", + "environment": {}, + "summary": { + "passed": 22, + "total": 22, + "collected": 22 + }, + "collectors": [ + { + "nodeid": "", + "outcome": "passed", + "result": [ + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py", + "type": "Module" + } + ] + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py", + "outcome": "passed", + "result": [ + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-earth]", + "type": "Function", + "lineno": 72 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-saturn]", + "type": "Function", + "lineno": 72 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-mini-earth]", + "type": "Function", + "lineno": 72 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-mini-saturn]", + "type": "Function", + "lineno": 72 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-earth]", + "type": "Function", + "lineno": 91 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-saturn]", + "type": "Function", + "lineno": 91 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-mini-earth]", + "type": "Function", + "lineno": 91 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-mini-saturn]", + "type": "Function", + "lineno": 91 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[gpt-4o-case0]", + "type": "Function", + "lineno": 115 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[gpt-4o-mini-case0]", + "type": "Function", + "lineno": 115 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[gpt-4o-case0]", + "type": "Function", + "lineno": 134 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[gpt-4o-mini-case0]", + "type": "Function", + "lineno": 134 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-calendar]", + "type": "Function", + "lineno": 158 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-math]", + "type": "Function", + "lineno": 158 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-mini-calendar]", + "type": "Function", + "lineno": 158 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-mini-math]", + "type": "Function", + "lineno": 158 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-calendar]", + "type": "Function", + "lineno": 181 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-math]", + "type": "Function", + "lineno": 181 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-mini-calendar]", + "type": "Function", + "lineno": 181 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-mini-math]", + "type": "Function", + "lineno": 181 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[gpt-4o-case0]", + "type": "Function", + "lineno": 203 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[gpt-4o-mini-case0]", + "type": "Function", + "lineno": 203 + } + ] + } + ], + "tests": [ + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-earth]", + "lineno": 72, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[gpt-4o-earth]", + "parametrize", + "pytestmark", + "gpt-4o-earth", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o", + "case_id": "earth" + }, + "setup": { + "duration": 0.05381445901002735, + "outcome": "passed" + }, + "call": { + "duration": 0.49848275003023446, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00018287496641278267, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-saturn]", + "lineno": 72, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[gpt-4o-saturn]", + "parametrize", + "pytestmark", + "gpt-4o-saturn", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o", + "case_id": "saturn" + }, + "setup": { + "duration": 0.007965500000864267, + "outcome": "passed" + }, + "call": { + "duration": 0.9293275829404593, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00018229195848107338, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-mini-earth]", + "lineno": 72, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[gpt-4o-mini-earth]", + "parametrize", + "pytestmark", + "gpt-4o-mini-earth", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o-mini", + "case_id": "earth" + }, + "setup": { + "duration": 0.00875679193995893, + "outcome": "passed" + }, + "call": { + "duration": 0.5793640419142321, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0005307920509949327, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-mini-saturn]", + "lineno": 72, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[gpt-4o-mini-saturn]", + "parametrize", + "pytestmark", + "gpt-4o-mini-saturn", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o-mini", + "case_id": "saturn" + }, + "setup": { + "duration": 0.01076845801435411, + "outcome": "passed" + }, + "call": { + "duration": 0.8752291660057381, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0004834589781239629, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-earth]", + "lineno": 91, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_basic[gpt-4o-earth]", + "parametrize", + "pytestmark", + "gpt-4o-earth", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o", + "case_id": "earth" + }, + "setup": { + "duration": 0.01662245800253004, + "outcome": "passed" + }, + "call": { + "duration": 0.8336971249664202, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0024086670018732548, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-saturn]", + "lineno": 91, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_basic[gpt-4o-saturn]", + "parametrize", + "pytestmark", + "gpt-4o-saturn", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o", + "case_id": "saturn" + }, + "setup": { + "duration": 0.009416291955858469, + "outcome": "passed" + }, + "call": { + "duration": 0.43594495789147913, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0009131249971687794, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-mini-earth]", + "lineno": 91, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_basic[gpt-4o-mini-earth]", + "parametrize", + "pytestmark", + "gpt-4o-mini-earth", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o-mini", + "case_id": "earth" + }, + "setup": { + "duration": 0.013155042077414691, + "outcome": "passed" + }, + "call": { + "duration": 0.6119836670113727, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00023804197553545237, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-mini-saturn]", + "lineno": 91, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_basic[gpt-4o-mini-saturn]", + "parametrize", + "pytestmark", + "gpt-4o-mini-saturn", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o-mini", + "case_id": "saturn" + }, + "setup": { + "duration": 0.009004916995763779, + "outcome": "passed" + }, + "call": { + "duration": 0.8327413749648258, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00046841695439070463, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[gpt-4o-case0]", + "lineno": 115, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_image[gpt-4o-case0]", + "parametrize", + "pytestmark", + "gpt-4o-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o", + "case_id": "case0" + }, + "setup": { + "duration": 0.009574208059348166, + "outcome": "passed" + }, + "call": { + "duration": 2.221839000005275, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00015945907216519117, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[gpt-4o-mini-case0]", + "lineno": 115, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_image[gpt-4o-mini-case0]", + "parametrize", + "pytestmark", + "gpt-4o-mini-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o-mini", + "case_id": "case0" + }, + "setup": { + "duration": 0.0084402080392465, + "outcome": "passed" + }, + "call": { + "duration": 2.298736457945779, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0002423750702291727, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[gpt-4o-case0]", + "lineno": 134, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_image[gpt-4o-case0]", + "parametrize", + "pytestmark", + "gpt-4o-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o", + "case_id": "case0" + }, + "setup": { + "duration": 0.007330416003242135, + "outcome": "passed" + }, + "call": { + "duration": 4.062959833070636, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00015470804646611214, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[gpt-4o-mini-case0]", + "lineno": 134, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_image[gpt-4o-mini-case0]", + "parametrize", + "pytestmark", + "gpt-4o-mini-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o-mini", + "case_id": "case0" + }, + "setup": { + "duration": 0.019998832955025136, + "outcome": "passed" + }, + "call": { + "duration": 2.609432084020227, + "outcome": "passed" + }, + "teardown": { + "duration": 0.005618917057290673, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-calendar]", + "lineno": 158, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[gpt-4o-calendar]", + "parametrize", + "pytestmark", + "gpt-4o-calendar", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o", + "case_id": "calendar" + }, + "setup": { + "duration": 0.00867662497330457, + "outcome": "passed" + }, + "call": { + "duration": 0.6856697499752045, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00018445902969688177, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-math]", + "lineno": 158, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[gpt-4o-math]", + "parametrize", + "pytestmark", + "gpt-4o-math", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o", + "case_id": "math" + }, + "setup": { + "duration": 0.01139050000347197, + "outcome": "passed" + }, + "call": { + "duration": 2.764390083961189, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0003164170775562525, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-mini-calendar]", + "lineno": 158, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[gpt-4o-mini-calendar]", + "parametrize", + "pytestmark", + "gpt-4o-mini-calendar", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o-mini", + "case_id": "calendar" + }, + "setup": { + "duration": 0.01321374997496605, + "outcome": "passed" + }, + "call": { + "duration": 0.8284227909753099, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00030170800164341927, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-mini-math]", + "lineno": 158, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[gpt-4o-mini-math]", + "parametrize", + "pytestmark", + "gpt-4o-mini-math", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o-mini", + "case_id": "math" + }, + "setup": { + "duration": 0.013477458036504686, + "outcome": "passed" + }, + "call": { + "duration": 2.4146235829684883, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00025754200760275126, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-calendar]", + "lineno": 181, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_structured_output[gpt-4o-calendar]", + "parametrize", + "pytestmark", + "gpt-4o-calendar", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o", + "case_id": "calendar" + }, + "setup": { + "duration": 0.006940583931282163, + "outcome": "passed" + }, + "call": { + "duration": 0.5102092920569703, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00023379107005894184, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-math]", + "lineno": 181, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_structured_output[gpt-4o-math]", + "parametrize", + "pytestmark", + "gpt-4o-math", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o", + "case_id": "math" + }, + "setup": { + "duration": 0.007166999974288046, + "outcome": "passed" + }, + "call": { + "duration": 3.5751801669830456, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00015041697770357132, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-mini-calendar]", + "lineno": 181, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_structured_output[gpt-4o-mini-calendar]", + "parametrize", + "pytestmark", + "gpt-4o-mini-calendar", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o-mini", + "case_id": "calendar" + }, + "setup": { + "duration": 0.010652625001966953, + "outcome": "passed" + }, + "call": { + "duration": 0.6648182499920949, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0008647920330986381, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-mini-math]", + "lineno": 181, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_structured_output[gpt-4o-mini-math]", + "parametrize", + "pytestmark", + "gpt-4o-mini-math", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o-mini", + "case_id": "math" + }, + "setup": { + "duration": 0.007372208056040108, + "outcome": "passed" + }, + "call": { + "duration": 2.80747462506406, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00028124998789280653, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[gpt-4o-case0]", + "lineno": 203, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_tool_calling[gpt-4o-case0]", + "parametrize", + "pytestmark", + "gpt-4o-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o", + "case_id": "case0" + }, + "setup": { + "duration": 0.01625587500166148, + "outcome": "passed" + }, + "call": { + "duration": 0.6878769160248339, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0002637499710544944, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[gpt-4o-mini-case0]", + "lineno": 203, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_tool_calling[gpt-4o-mini-case0]", + "parametrize", + "pytestmark", + "gpt-4o-mini-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o-mini", + "case_id": "case0" + }, + "setup": { + "duration": 0.008817250025458634, + "outcome": "passed" + }, + "call": { + "duration": 0.7181202919455245, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0017147079342976213, + "outcome": "passed" + } + } + ] +} diff --git a/tests/verifications/test_results/together_1744154399.json b/tests/verifications/test_results/together_1744154399.json deleted file mode 100644 index ae801e83b..000000000 --- a/tests/verifications/test_results/together_1744154399.json +++ /dev/null @@ -1,2830 +0,0 @@ -{ - "created": 1744154470.9868789, - "duration": 59.6187219619751, - "exitcode": 1, - "root": "/Users/erichuang/projects/llama-stack", - "environment": {}, - "summary": { - "skipped": 52, - "passed": 21, - "failed": 10, - "total": 83, - "collected": 83 - }, - "collectors": [ - { - "nodeid": "", - "outcome": "passed", - "result": [ - { - "nodeid": "tests/verifications/openai/test_chat_completion.py", - "type": "Module" - } - ] - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py", - "outcome": "passed", - "result": [ - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o-mini]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o-mini]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o-mini]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o-mini]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 60 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 60 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 60 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 60 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o]", - "type": "Function", - "lineno": 60 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o-mini]", - "type": "Function", - "lineno": 60 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 75 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 75 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 75 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 75 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o]", - "type": "Function", - "lineno": 75 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o-mini]", - "type": "Function", - "lineno": 75 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o-mini]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o-mini]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o-mini]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o-mini]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 138 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 138 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 138 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 138 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 138 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o]", - "type": "Function", - "lineno": 138 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o-mini]", - "type": "Function", - "lineno": 138 - } - ] - } - ], - "tests": [ - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.39231995795853436, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider together does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.0002014160854741931, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", - "lineno": 25, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.0071710830088704824, - "outcome": "passed" - }, - "call": { - "duration": 0.7968309168936685, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0004362498875707388, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.012780916062183678, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider together does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.00029158301185816526, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 25, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.013563874992541969, - "outcome": "passed" - }, - "call": { - "duration": 0.5071627920260653, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0005456249928101897, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.020708917058072984, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider together does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.00030325003899633884, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 25, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.014170082984492183, - "outcome": "passed" - }, - "call": { - "duration": 1.2383921250002459, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0009597090538591146, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-gpt-4o]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.013402250013314188, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider together does not support model gpt-4o')" - }, - "teardown": { - "duration": 0.00028245802968740463, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o-mini]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.008693707990460098, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider together does not support model gpt-4o-mini')" - }, - "teardown": { - "duration": 0.00016249995678663254, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.005904874997213483, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider together does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.0001960420049726963, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", - "lineno": 25, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.006532749976031482, - "outcome": "passed" - }, - "call": { - "duration": 0.5410778749501333, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00019516597967594862, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.009374375105835497, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider together does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.00015524995978921652, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 25, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.007205875008367002, - "outcome": "passed" - }, - "call": { - "duration": 0.42584729101508856, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0009506250498816371, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.029625958995893598, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider together does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.0001860830234363675, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 25, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.023576707928441465, - "outcome": "passed" - }, - "call": { - "duration": 1.2249365829629824, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0004278330598026514, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-gpt-4o]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.014816291979514062, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider together does not support model gpt-4o')" - }, - "teardown": { - "duration": 0.00029558304231613874, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o-mini]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.012769333901815116, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider together does not support model gpt-4o-mini')" - }, - "teardown": { - "duration": 0.00024329195730388165, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.009145625052042305, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider together does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.00021195888984948397, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", - "lineno": 40, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.0133140409598127, - "outcome": "passed" - }, - "call": { - "duration": 0.7228892090497538, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0004301250446587801, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.013998750015161932, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider together does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.0002961249556392431, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 40, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.012570249964483082, - "outcome": "passed" - }, - "call": { - "duration": 0.7193170419195667, - "outcome": "failed", - "crash": { - "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py", - "lineno": 54, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai/test_chat_completion.py", - "lineno": 54, - "message": "IndexError" - } - ], - "longrepr": "openai_client = \ninput_output = {'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\ncorrect_model_name = 'meta-llama/Llama-4-Scout-17B-16E-Instruct'\n\n @pytest.mark.parametrize(\"model\", chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"model\"])\n @pytest.mark.parametrize(\n \"input_output\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"input_output\"],\n )\n def test_chat_streaming_basic(openai_client, input_output, correct_model_name):\n response = openai_client.chat.completions.create(\n model=correct_model_name,\n messages=input_output[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai/test_chat_completion.py:54: IndexError" - }, - "teardown": { - "duration": 0.00022504094522446394, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.006660082959569991, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider together does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.0001445829402655363, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 40, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.021228999947197735, - "outcome": "passed" - }, - "call": { - "duration": 1.5670281670754775, - "outcome": "failed", - "crash": { - "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py", - "lineno": 54, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai/test_chat_completion.py", - "lineno": 54, - "message": "IndexError" - } - ], - "longrepr": "openai_client = \ninput_output = {'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\ncorrect_model_name = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\n\n @pytest.mark.parametrize(\"model\", chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"model\"])\n @pytest.mark.parametrize(\n \"input_output\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"input_output\"],\n )\n def test_chat_streaming_basic(openai_client, input_output, correct_model_name):\n response = openai_client.chat.completions.create(\n model=correct_model_name,\n messages=input_output[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai/test_chat_completion.py:54: IndexError" - }, - "teardown": { - "duration": 0.0004656669916585088, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output0-gpt-4o]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.009595917072147131, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider together does not support model gpt-4o')" - }, - "teardown": { - "duration": 0.00025625003036111593, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o-mini]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output0-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.009242708911187947, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider together does not support model gpt-4o-mini')" - }, - "teardown": { - "duration": 0.0002484159776940942, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.00905474997125566, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider together does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.00023312494158744812, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", - "lineno": 40, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.007183165987953544, - "outcome": "passed" - }, - "call": { - "duration": 1.0667660840554163, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0005163750611245632, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.05233616603072733, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider together does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.0003471659729257226, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 40, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.015932541922666132, - "outcome": "passed" - }, - "call": { - "duration": 0.41540695796720684, - "outcome": "failed", - "crash": { - "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py", - "lineno": 54, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai/test_chat_completion.py", - "lineno": 54, - "message": "IndexError" - } - ], - "longrepr": "openai_client = \ninput_output = {'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\ncorrect_model_name = 'meta-llama/Llama-4-Scout-17B-16E-Instruct'\n\n @pytest.mark.parametrize(\"model\", chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"model\"])\n @pytest.mark.parametrize(\n \"input_output\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"input_output\"],\n )\n def test_chat_streaming_basic(openai_client, input_output, correct_model_name):\n response = openai_client.chat.completions.create(\n model=correct_model_name,\n messages=input_output[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai/test_chat_completion.py:54: IndexError" - }, - "teardown": { - "duration": 0.0002845840062946081, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.007243875064887106, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider together does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.00016258296091109514, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 40, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.009275624994188547, - "outcome": "passed" - }, - "call": { - "duration": 1.43309554096777, - "outcome": "failed", - "crash": { - "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py", - "lineno": 54, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai/test_chat_completion.py", - "lineno": 54, - "message": "IndexError" - } - ], - "longrepr": "openai_client = \ninput_output = {'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\ncorrect_model_name = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\n\n @pytest.mark.parametrize(\"model\", chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"model\"])\n @pytest.mark.parametrize(\n \"input_output\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"input_output\"],\n )\n def test_chat_streaming_basic(openai_client, input_output, correct_model_name):\n response = openai_client.chat.completions.create(\n model=correct_model_name,\n messages=input_output[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai/test_chat_completion.py:54: IndexError" - }, - "teardown": { - "duration": 0.0003690000157803297, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output1-gpt-4o]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.011570582981221378, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider together does not support model gpt-4o')" - }, - "teardown": { - "duration": 0.00024937500711530447, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o-mini]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output1-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.010756584000773728, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider together does not support model gpt-4o-mini')" - }, - "teardown": { - "duration": 0.00026183295994997025, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E]", - "lineno": 60, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.008863041992299259, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider together does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.00023283297196030617, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 60, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.007975792046636343, - "outcome": "passed" - }, - "call": { - "duration": 2.1585817909799516, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0005107080796733499, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", - "lineno": 60, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.05228079203516245, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider together does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.0017226670170202851, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 60, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.009964749915525317, - "outcome": "passed" - }, - "call": { - "duration": 4.6593364590080455, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0009852920193225145, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o]", - "lineno": 60, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_image[input_output0-gpt-4o]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.023214041953906417, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider together does not support model gpt-4o')" - }, - "teardown": { - "duration": 0.0003567079547792673, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o-mini]", - "lineno": 60, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_image[input_output0-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.01705008395947516, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider together does not support model gpt-4o-mini')" - }, - "teardown": { - "duration": 0.0003085409989580512, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E]", - "lineno": 75, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.014711958006955683, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider together does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.0003121249610558152, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 75, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.01843333407305181, - "outcome": "passed" - }, - "call": { - "duration": 2.8683876669965684, - "outcome": "failed", - "crash": { - "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py", - "lineno": 89, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai/test_chat_completion.py", - "lineno": 89, - "message": "IndexError" - } - ], - "longrepr": "openai_client = \ninput_output = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\ncorrect_model_name = 'meta-llama/Llama-4-Scout-17B-16E-Instruct'\n\n @pytest.mark.parametrize(\"model\", chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"model\"])\n @pytest.mark.parametrize(\n \"input_output\",\n chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"input_output\"],\n )\n def test_chat_streaming_image(openai_client, input_output, correct_model_name):\n response = openai_client.chat.completions.create(\n model=correct_model_name,\n messages=input_output[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai/test_chat_completion.py:89: IndexError" - }, - "teardown": { - "duration": 0.00028662499971687794, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", - "lineno": 75, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.00653208396397531, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider together does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.00021291698794811964, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 75, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.006028458010405302, - "outcome": "passed" - }, - "call": { - "duration": 4.981105040991679, - "outcome": "failed", - "crash": { - "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py", - "lineno": 89, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai/test_chat_completion.py", - "lineno": 89, - "message": "IndexError" - } - ], - "longrepr": "openai_client = \ninput_output = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\ncorrect_model_name = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\n\n @pytest.mark.parametrize(\"model\", chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"model\"])\n @pytest.mark.parametrize(\n \"input_output\",\n chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"input_output\"],\n )\n def test_chat_streaming_image(openai_client, input_output, correct_model_name):\n response = openai_client.chat.completions.create(\n model=correct_model_name,\n messages=input_output[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai/test_chat_completion.py:89: IndexError" - }, - "teardown": { - "duration": 0.0010110830189660192, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o]", - "lineno": 75, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_image[input_output0-gpt-4o]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.01591233303770423, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider together does not support model gpt-4o')" - }, - "teardown": { - "duration": 0.0003783750580623746, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o-mini]", - "lineno": 75, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_image[input_output0-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.010691000032238662, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider together does not support model gpt-4o-mini')" - }, - "teardown": { - "duration": 0.00027445796877145767, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.01258529198821634, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider together does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.0002044580178335309, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.010904791066423059, - "outcome": "passed" - }, - "call": { - "duration": 0.8311828339938074, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00048687495291233063, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.029216791968792677, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider together does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.0002269580727443099, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.013182583032175899, - "outcome": "passed" - }, - "call": { - "duration": 1.7446029160637408, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0008087089518085122, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.02009516698308289, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider together does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.000320291961543262, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.015216833096928895, - "outcome": "passed" - }, - "call": { - "duration": 0.8049291669158265, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0005109170451760292, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-gpt-4o]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.0171551660168916, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider together does not support model gpt-4o')" - }, - "teardown": { - "duration": 0.0005707499803975224, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o-mini]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.01131124992389232, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider together does not support model gpt-4o-mini')" - }, - "teardown": { - "duration": 0.0003044159384444356, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.0054290409898385406, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider together does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.00014645792543888092, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.011368000064976513, - "outcome": "passed" - }, - "call": { - "duration": 4.363120499998331, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0003998749889433384, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.04945958300959319, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider together does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.0002401659730821848, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.011090958025306463, - "outcome": "passed" - }, - "call": { - "duration": 4.699277375009842, - "outcome": "passed" - }, - "teardown": { - "duration": 0.000689250067807734, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.020744459005072713, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider together does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.0001836250303313136, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.005926624988205731, - "outcome": "passed" - }, - "call": { - "duration": 2.7814464160474017, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0009554170537739992, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-gpt-4o]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.03027112502604723, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider together does not support model gpt-4o')" - }, - "teardown": { - "duration": 0.0003245410043746233, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o-mini]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.009138708002865314, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider together does not support model gpt-4o-mini')" - }, - "teardown": { - "duration": 0.0001919999485835433, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.0064505410846322775, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider together does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.00015720794908702374, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", - "lineno": 117, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.00582624995149672, - "outcome": "passed" - }, - "call": { - "duration": 0.8302567919017747, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00020354206208139658, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.014151416951790452, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider together does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.00034970801789313555, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 117, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.012150791939347982, - "outcome": "passed" - }, - "call": { - "duration": 0.7078855830477551, - "outcome": "failed", - "crash": { - "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py", - "lineno": 135, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai/test_chat_completion.py", - "lineno": 135, - "message": "IndexError" - } - ], - "longrepr": "openai_client = \ninput_output = {'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'content': 'Alice and Bob ar...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\ncorrect_model_name = 'meta-llama/Llama-4-Scout-17B-16E-Instruct'\n\n @pytest.mark.parametrize(\n \"model\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"model\"],\n )\n @pytest.mark.parametrize(\n \"input_output\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"input_output\"],\n )\n def test_chat_streaming_structured_output(openai_client, input_output, correct_model_name):\n response = openai_client.chat.completions.create(\n model=correct_model_name,\n messages=input_output[\"input\"][\"messages\"],\n response_format=input_output[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai/test_chat_completion.py:135: IndexError" - }, - "teardown": { - "duration": 0.0008542909054085612, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.022667833953164518, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider together does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.0006820419803261757, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 117, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.01285991701297462, - "outcome": "passed" - }, - "call": { - "duration": 0.6888671671040356, - "outcome": "failed", - "crash": { - "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py", - "lineno": 135, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai/test_chat_completion.py", - "lineno": 135, - "message": "IndexError" - } - ], - "longrepr": "openai_client = \ninput_output = {'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'content': 'Alice and Bob ar...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\ncorrect_model_name = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\n\n @pytest.mark.parametrize(\n \"model\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"model\"],\n )\n @pytest.mark.parametrize(\n \"input_output\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"input_output\"],\n )\n def test_chat_streaming_structured_output(openai_client, input_output, correct_model_name):\n response = openai_client.chat.completions.create(\n model=correct_model_name,\n messages=input_output[\"input\"][\"messages\"],\n response_format=input_output[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai/test_chat_completion.py:135: IndexError" - }, - "teardown": { - "duration": 0.0007953330641612411, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-gpt-4o]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.015029000001959503, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider together does not support model gpt-4o')" - }, - "teardown": { - "duration": 0.00015666603576391935, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o-mini]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.00622316705994308, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider together does not support model gpt-4o-mini')" - }, - "teardown": { - "duration": 0.0001533749746158719, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.005598834017291665, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider together does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.00013062497600913048, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", - "lineno": 117, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.005876541952602565, - "outcome": "passed" - }, - "call": { - "duration": 7.561108374968171, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0004579999949783087, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.018791542039252818, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider together does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.0004900830099359155, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 117, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.0065952910808846354, - "outcome": "passed" - }, - "call": { - "duration": 2.6826554159633815, - "outcome": "failed", - "crash": { - "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py", - "lineno": 135, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai/test_chat_completion.py", - "lineno": 135, - "message": "IndexError" - } - ], - "longrepr": "openai_client = \ninput_output = {'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solution step by step.',... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\ncorrect_model_name = 'meta-llama/Llama-4-Scout-17B-16E-Instruct'\n\n @pytest.mark.parametrize(\n \"model\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"model\"],\n )\n @pytest.mark.parametrize(\n \"input_output\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"input_output\"],\n )\n def test_chat_streaming_structured_output(openai_client, input_output, correct_model_name):\n response = openai_client.chat.completions.create(\n model=correct_model_name,\n messages=input_output[\"input\"][\"messages\"],\n response_format=input_output[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai/test_chat_completion.py:135: IndexError" - }, - "teardown": { - "duration": 0.0009669580031186342, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.019489208003506064, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider together does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.0007419160101562738, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 117, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.012299792026169598, - "outcome": "passed" - }, - "call": { - "duration": 2.829678333015181, - "outcome": "failed", - "crash": { - "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py", - "lineno": 135, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai/test_chat_completion.py", - "lineno": 135, - "message": "IndexError" - } - ], - "longrepr": "openai_client = \ninput_output = {'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solution step by step.',... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\ncorrect_model_name = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\n\n @pytest.mark.parametrize(\n \"model\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"model\"],\n )\n @pytest.mark.parametrize(\n \"input_output\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"input_output\"],\n )\n def test_chat_streaming_structured_output(openai_client, input_output, correct_model_name):\n response = openai_client.chat.completions.create(\n model=correct_model_name,\n messages=input_output[\"input\"][\"messages\"],\n response_format=input_output[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai/test_chat_completion.py:135: IndexError" - }, - "teardown": { - "duration": 0.0010418329620733857, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-gpt-4o]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.016189916990697384, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider together does not support model gpt-4o')" - }, - "teardown": { - "duration": 0.00027966592460870743, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o-mini]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.010247125057503581, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider together does not support model gpt-4o-mini')" - }, - "teardown": { - "duration": 0.00023291702382266521, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-3.3-70B-Instruct]", - "lineno": 138, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_tool_calling[input_output0-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.012632582918740809, - "outcome": "passed" - }, - "call": { - "duration": 0.40774812502786517, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0007319580763578415, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E]", - "lineno": 138, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.019890791969373822, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider together does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.0006391670322045684, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 138, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.0178165000397712, - "outcome": "passed" - }, - "call": { - "duration": 0.38229950005188584, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0010000420734286308, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E]", - "lineno": 138, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.024259291938506067, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider together does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.0003602079814299941, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 138, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.012425708002410829, - "outcome": "passed" - }, - "call": { - "duration": 0.7610744580160826, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0005935420049354434, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o]", - "lineno": 138, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_tool_calling[input_output0-gpt-4o]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.018717541941441596, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider together does not support model gpt-4o')" - }, - "teardown": { - "duration": 0.000659791985526681, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o-mini]", - "lineno": 138, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_tool_calling[input_output0-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.012784749967977405, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider together does not support model gpt-4o-mini')" - }, - "teardown": { - "duration": 0.0002145830076187849, - "outcome": "passed" - } - } - ] -} diff --git a/tests/verifications/test_results/together_1744264258.json b/tests/verifications/test_results/together_1744264258.json new file mode 100644 index 000000000..c38dd52b5 --- /dev/null +++ b/tests/verifications/test_results/together_1744264258.json @@ -0,0 +1,1420 @@ +{ + "created": 1744264304.064288, + "duration": 42.470197916030884, + "exitcode": 1, + "root": "/Users/erichuang/projects/llama-stack", + "environment": {}, + "summary": { + "passed": 21, + "failed": 10, + "skipped": 2, + "total": 33, + "collected": 33 + }, + "collectors": [ + { + "nodeid": "", + "outcome": "passed", + "result": [ + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py", + "type": "Module" + } + ] + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py", + "outcome": "passed", + "result": [ + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", + "type": "Function", + "lineno": 72 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", + "type": "Function", + "lineno": 72 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", + "type": "Function", + "lineno": 72 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", + "type": "Function", + "lineno": 72 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", + "type": "Function", + "lineno": 72 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", + "type": "Function", + "lineno": 72 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", + "type": "Function", + "lineno": 91 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", + "type": "Function", + "lineno": 91 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", + "type": "Function", + "lineno": 91 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", + "type": "Function", + "lineno": 91 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", + "type": "Function", + "lineno": 91 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", + "type": "Function", + "lineno": 91 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", + "type": "Function", + "lineno": 115 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", + "type": "Function", + "lineno": 115 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", + "type": "Function", + "lineno": 115 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", + "type": "Function", + "lineno": 134 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", + "type": "Function", + "lineno": 134 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", + "type": "Function", + "lineno": 134 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", + "type": "Function", + "lineno": 158 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", + "type": "Function", + "lineno": 158 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", + "type": "Function", + "lineno": 158 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", + "type": "Function", + "lineno": 158 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", + "type": "Function", + "lineno": 158 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", + "type": "Function", + "lineno": 158 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", + "type": "Function", + "lineno": 181 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", + "type": "Function", + "lineno": 181 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", + "type": "Function", + "lineno": 181 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", + "type": "Function", + "lineno": 181 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", + "type": "Function", + "lineno": 181 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", + "type": "Function", + "lineno": 181 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", + "type": "Function", + "lineno": 203 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", + "type": "Function", + "lineno": 203 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", + "type": "Function", + "lineno": 203 + } + ] + } + ], + "tests": [ + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", + "lineno": 72, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", + "parametrize", + "pytestmark", + "meta-llama/Llama-3.3-70B-Instruct-Turbo-earth", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", + "case_id": "earth" + }, + "setup": { + "duration": 0.06113254197407514, + "outcome": "passed" + }, + "call": { + "duration": 1.0720349580515176, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00015966698992997408, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", + "lineno": 72, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", + "parametrize", + "pytestmark", + "meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", + "case_id": "saturn" + }, + "setup": { + "duration": 0.006908083101734519, + "outcome": "passed" + }, + "call": { + "duration": 0.5013210839824751, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0005375830223783851, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", + "lineno": 72, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Scout-17B-16E-Instruct-earth", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "case_id": "earth" + }, + "setup": { + "duration": 0.006910792086273432, + "outcome": "passed" + }, + "call": { + "duration": 0.5142245410243049, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0004069580463692546, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", + "lineno": 72, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "case_id": "saturn" + }, + "setup": { + "duration": 0.009730000048875809, + "outcome": "passed" + }, + "call": { + "duration": 0.40133179200347513, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0004558749496936798, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", + "lineno": 72, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "case_id": "earth" + }, + "setup": { + "duration": 0.008247417048551142, + "outcome": "passed" + }, + "call": { + "duration": 0.7914331250358373, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00020262505859136581, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", + "lineno": 72, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "case_id": "saturn" + }, + "setup": { + "duration": 0.00922900007572025, + "outcome": "passed" + }, + "call": { + "duration": 1.2742049579974264, + "outcome": "passed" + }, + "teardown": { + "duration": 0.000688415952026844, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", + "lineno": 91, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", + "parametrize", + "pytestmark", + "meta-llama/Llama-3.3-70B-Instruct-Turbo-earth", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", + "case_id": "earth" + }, + "setup": { + "duration": 0.006949124974198639, + "outcome": "passed" + }, + "call": { + "duration": 0.4681705000111833, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00017795804888010025, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", + "lineno": 91, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", + "parametrize", + "pytestmark", + "meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", + "case_id": "saturn" + }, + "setup": { + "duration": 0.008564374991692603, + "outcome": "passed" + }, + "call": { + "duration": 1.7430362500017509, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00015312491450458765, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", + "lineno": 91, + "outcome": "failed", + "keywords": [ + "test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Scout-17B-16E-Instruct-earth", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "case_id": "earth" + }, + "setup": { + "duration": 0.007404124946333468, + "outcome": "passed" + }, + "call": { + "duration": 0.515926624997519, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", + "lineno": 109, + "message": "IndexError: list index out of range" + }, + "traceback": [ + { + "path": "tests/verifications/openai_api/test_chat_completion.py", + "lineno": 109, + "message": "IndexError" + } + ], + "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'earth', 'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:109: IndexError" + }, + "teardown": { + "duration": 0.0002389999572187662, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", + "lineno": 91, + "outcome": "failed", + "keywords": [ + "test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "case_id": "saturn" + }, + "setup": { + "duration": 0.0071305419551208615, + "outcome": "passed" + }, + "call": { + "duration": 0.37054662499576807, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", + "lineno": 109, + "message": "IndexError: list index out of range" + }, + "traceback": [ + { + "path": "tests/verifications/openai_api/test_chat_completion.py", + "lineno": 109, + "message": "IndexError" + } + ], + "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'saturn', 'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:109: IndexError" + }, + "teardown": { + "duration": 0.0006014580139890313, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", + "lineno": 91, + "outcome": "failed", + "keywords": [ + "test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "case_id": "earth" + }, + "setup": { + "duration": 0.007489709067158401, + "outcome": "passed" + }, + "call": { + "duration": 0.7767745839664713, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", + "lineno": 109, + "message": "IndexError: list index out of range" + }, + "traceback": [ + { + "path": "tests/verifications/openai_api/test_chat_completion.py", + "lineno": 109, + "message": "IndexError" + } + ], + "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'earth', 'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:109: IndexError" + }, + "teardown": { + "duration": 0.00025491707492619753, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", + "lineno": 91, + "outcome": "failed", + "keywords": [ + "test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "case_id": "saturn" + }, + "setup": { + "duration": 0.006736499955877662, + "outcome": "passed" + }, + "call": { + "duration": 0.43948554201051593, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", + "lineno": 109, + "message": "IndexError: list index out of range" + }, + "traceback": [ + { + "path": "tests/verifications/openai_api/test_chat_completion.py", + "lineno": 109, + "message": "IndexError" + } + ], + "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'saturn', 'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:109: IndexError" + }, + "teardown": { + "duration": 0.0002264160430058837, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", + "lineno": 115, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", + "parametrize", + "pytestmark", + "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", + "case_id": "case0" + }, + "setup": { + "duration": 0.007171708042733371, + "outcome": "passed" + }, + "call": { + "duration": 0.00013554200995713472, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 124, 'Skipped: Skipping test_chat_non_streaming_image for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')" + }, + "teardown": { + "duration": 0.0001235839445143938, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", + "lineno": 115, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "case_id": "case0" + }, + "setup": { + "duration": 0.008639499894343317, + "outcome": "passed" + }, + "call": { + "duration": 1.4001279999502003, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00014812499284744263, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", + "lineno": 115, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "case_id": "case0" + }, + "setup": { + "duration": 0.015450250008143485, + "outcome": "passed" + }, + "call": { + "duration": 3.3522649579681456, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00041629199404269457, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", + "lineno": 134, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", + "parametrize", + "pytestmark", + "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", + "case_id": "case0" + }, + "setup": { + "duration": 0.007634000037796795, + "outcome": "passed" + }, + "call": { + "duration": 0.0001563339028507471, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 143, 'Skipped: Skipping test_chat_streaming_image for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')" + }, + "teardown": { + "duration": 0.0001324999611824751, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", + "lineno": 134, + "outcome": "failed", + "keywords": [ + "test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "case_id": "case0" + }, + "setup": { + "duration": 0.007050334010273218, + "outcome": "passed" + }, + "call": { + "duration": 1.7063317500287667, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", + "lineno": 152, + "message": "IndexError: list index out of range" + }, + "traceback": [ + { + "path": "tests/verifications/openai_api/test_chat_completion.py", + "lineno": 152, + "message": "IndexError" + } + ], + "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_image(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:152: IndexError" + }, + "teardown": { + "duration": 0.0002109999768435955, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", + "lineno": 134, + "outcome": "failed", + "keywords": [ + "test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "case_id": "case0" + }, + "setup": { + "duration": 0.006729208980686963, + "outcome": "passed" + }, + "call": { + "duration": 3.829621708020568, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", + "lineno": 152, + "message": "IndexError: list index out of range" + }, + "traceback": [ + { + "path": "tests/verifications/openai_api/test_chat_completion.py", + "lineno": 152, + "message": "IndexError" + } + ], + "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_image(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:152: IndexError" + }, + "teardown": { + "duration": 0.0002882500411942601, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", + "lineno": 158, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", + "parametrize", + "pytestmark", + "meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", + "case_id": "calendar" + }, + "setup": { + "duration": 0.007713916013017297, + "outcome": "passed" + }, + "call": { + "duration": 2.48285808309447, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00020350003615021706, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", + "lineno": 158, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", + "parametrize", + "pytestmark", + "meta-llama/Llama-3.3-70B-Instruct-Turbo-math", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", + "case_id": "math" + }, + "setup": { + "duration": 0.010098082944750786, + "outcome": "passed" + }, + "call": { + "duration": 1.6994713749736547, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00014512497000396252, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", + "lineno": 158, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "case_id": "calendar" + }, + "setup": { + "duration": 0.006934792036190629, + "outcome": "passed" + }, + "call": { + "duration": 1.277176082949154, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0004985419800505042, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", + "lineno": 158, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Scout-17B-16E-Instruct-math", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "case_id": "math" + }, + "setup": { + "duration": 0.012558708898723125, + "outcome": "passed" + }, + "call": { + "duration": 2.442075416096486, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0003505420172587037, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", + "lineno": 158, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "case_id": "calendar" + }, + "setup": { + "duration": 0.012642999994568527, + "outcome": "passed" + }, + "call": { + "duration": 0.9305703329155222, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00016004196368157864, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", + "lineno": 158, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "case_id": "math" + }, + "setup": { + "duration": 0.008792415959760547, + "outcome": "passed" + }, + "call": { + "duration": 2.194098167004995, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0003667499404400587, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", + "lineno": 181, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", + "parametrize", + "pytestmark", + "meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", + "case_id": "calendar" + }, + "setup": { + "duration": 0.01219504198525101, + "outcome": "passed" + }, + "call": { + "duration": 2.045097667025402, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00029958400409668684, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", + "lineno": 181, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", + "parametrize", + "pytestmark", + "meta-llama/Llama-3.3-70B-Instruct-Turbo-math", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", + "case_id": "math" + }, + "setup": { + "duration": 0.014203459024429321, + "outcome": "passed" + }, + "call": { + "duration": 1.3079068749211729, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0001914579188451171, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", + "lineno": 181, + "outcome": "failed", + "keywords": [ + "test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "case_id": "calendar" + }, + "setup": { + "duration": 0.04714570892974734, + "outcome": "passed" + }, + "call": { + "duration": 0.44743770791683346, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", + "lineno": 200, + "message": "IndexError: list index out of range" + }, + "traceback": [ + { + "path": "tests/verifications/openai_api/test_chat_completion.py", + "lineno": 200, + "message": "IndexError" + } + ], + "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'calendar', 'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'cont...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:200: IndexError" + }, + "teardown": { + "duration": 0.00022199994418770075, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", + "lineno": 181, + "outcome": "failed", + "keywords": [ + "test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Scout-17B-16E-Instruct-math", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "case_id": "math" + }, + "setup": { + "duration": 0.012237709015607834, + "outcome": "passed" + }, + "call": { + "duration": 3.180020791012794, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", + "lineno": 200, + "message": "IndexError: list index out of range" + }, + "traceback": [ + { + "path": "tests/verifications/openai_api/test_chat_completion.py", + "lineno": 200, + "message": "IndexError" + } + ], + "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'math', 'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solut... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:200: IndexError" + }, + "teardown": { + "duration": 0.000273333047516644, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", + "lineno": 181, + "outcome": "failed", + "keywords": [ + "test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "case_id": "calendar" + }, + "setup": { + "duration": 0.013312208000570536, + "outcome": "passed" + }, + "call": { + "duration": 0.4110311249969527, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", + "lineno": 200, + "message": "IndexError: list index out of range" + }, + "traceback": [ + { + "path": "tests/verifications/openai_api/test_chat_completion.py", + "lineno": 200, + "message": "IndexError" + } + ], + "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'calendar', 'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'cont...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:200: IndexError" + }, + "teardown": { + "duration": 0.00022975006140768528, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", + "lineno": 181, + "outcome": "failed", + "keywords": [ + "test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "case_id": "math" + }, + "setup": { + "duration": 0.006676917080767453, + "outcome": "passed" + }, + "call": { + "duration": 2.316411833046004, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", + "lineno": 200, + "message": "IndexError: list index out of range" + }, + "traceback": [ + { + "path": "tests/verifications/openai_api/test_chat_completion.py", + "lineno": 200, + "message": "IndexError" + } + ], + "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'math', 'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solut... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:200: IndexError" + }, + "teardown": { + "duration": 0.000245374976657331, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", + "lineno": 203, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", + "parametrize", + "pytestmark", + "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", + "case_id": "case0" + }, + "setup": { + "duration": 0.007064500008709729, + "outcome": "passed" + }, + "call": { + "duration": 0.606806542025879, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00046320806723088026, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", + "lineno": 203, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "case_id": "case0" + }, + "setup": { + "duration": 0.009071375010535121, + "outcome": "passed" + }, + "call": { + "duration": 0.41908070899080485, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00026074994821101427, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", + "lineno": 203, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "case_id": "case0" + }, + "setup": { + "duration": 0.0068333749659359455, + "outcome": "passed" + }, + "call": { + "duration": 0.8904451669659466, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0005833340110257268, + "outcome": "passed" + } + } + ] +} From de6ec5803e18e336c936c5d5f8d9d8a9302b14bf Mon Sep 17 00:00:00 2001 From: Francisco Arceo Date: Thu, 10 Apr 2025 11:37:31 -0600 Subject: [PATCH 04/10] fix: Fix linter failures from #1921 (#1932) # What does this PR do? fix: Fix linter failures from #1921 Signed-off-by: Francisco Javier Arceo --- tests/verifications/conf/cerebras.yaml | 2 +- tests/verifications/conf/fireworks.yaml | 2 +- tests/verifications/conf/groq.yaml | 2 +- tests/verifications/conf/openai.yaml | 2 +- tests/verifications/conf/together.yaml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/verifications/conf/cerebras.yaml b/tests/verifications/conf/cerebras.yaml index 32a60e766..5b19b4916 100644 --- a/tests/verifications/conf/cerebras.yaml +++ b/tests/verifications/conf/cerebras.yaml @@ -7,4 +7,4 @@ model_display_names: test_exclusions: llama-3.3-70b: - test_chat_non_streaming_image - - test_chat_streaming_image \ No newline at end of file + - test_chat_streaming_image diff --git a/tests/verifications/conf/fireworks.yaml b/tests/verifications/conf/fireworks.yaml index 30d6e4d75..f55b707ba 100644 --- a/tests/verifications/conf/fireworks.yaml +++ b/tests/verifications/conf/fireworks.yaml @@ -11,4 +11,4 @@ model_display_names: test_exclusions: accounts/fireworks/models/llama-v3p3-70b-instruct: - test_chat_non_streaming_image - - test_chat_streaming_image \ No newline at end of file + - test_chat_streaming_image diff --git a/tests/verifications/conf/groq.yaml b/tests/verifications/conf/groq.yaml index ef31a66e5..7871036dc 100644 --- a/tests/verifications/conf/groq.yaml +++ b/tests/verifications/conf/groq.yaml @@ -11,4 +11,4 @@ model_display_names: test_exclusions: llama-3.3-70b-versatile: - test_chat_non_streaming_image - - test_chat_streaming_image \ No newline at end of file + - test_chat_streaming_image diff --git a/tests/verifications/conf/openai.yaml b/tests/verifications/conf/openai.yaml index 89ae698f3..95a6259f7 100644 --- a/tests/verifications/conf/openai.yaml +++ b/tests/verifications/conf/openai.yaml @@ -6,4 +6,4 @@ models: model_display_names: gpt-4o: gpt-4o gpt-4o-mini: gpt-4o-mini -test_exclusions: {} \ No newline at end of file +test_exclusions: {} diff --git a/tests/verifications/conf/together.yaml b/tests/verifications/conf/together.yaml index 80e86fa77..258616662 100644 --- a/tests/verifications/conf/together.yaml +++ b/tests/verifications/conf/together.yaml @@ -11,4 +11,4 @@ model_display_names: test_exclusions: meta-llama/Llama-3.3-70B-Instruct-Turbo: - test_chat_non_streaming_image - - test_chat_streaming_image \ No newline at end of file + - test_chat_streaming_image From 79fc81f78f737057a4af3567fa533db20774513a Mon Sep 17 00:00:00 2001 From: Ilya Kolchinsky <58424190+ilya-kolchinsky@users.noreply.github.com> Date: Thu, 10 Apr 2025 22:38:31 +0200 Subject: [PATCH 05/10] fix: Playground RAG page errors (#1928) # What does this PR do? This PR fixes two issues with the RAG page of the Playground UI: 1. When the user modifies a configurable setting via a widget (e.g., system prompt, temperature, etc.), the agent is not recreated. Thus, the change has no effect and the user gets no indication of that. 2. After the first issue is fixed, it becomes possible to recreate the agent mid-conversation or even mid-generation. To mitigate this, widgets related to agent configuration are now disabled when a conversation is in progress (i.e., when the chat is non-empty). They are automatically enabled again when the user resets the chat history. ## Test Plan - Launch the Playground and go to the RAG page; - Select the vector DB ID; - Send a message to the agent via the chat; - The widgets in charge of the agent parameters will become disabled at this point; - Send a second message asking the model about the content of the first message; - The reply will indicate that the two messages were sent over the same session, that is, the agent was not recreated; - Click the 'Clear Chat' button; - All widgets will be enabled and a new agent will be created (which can be validated by sending another message). --- .../distribution/ui/page/playground/rag.py | 59 ++++++++++++++----- 1 file changed, 44 insertions(+), 15 deletions(-) diff --git a/llama_stack/distribution/ui/page/playground/rag.py b/llama_stack/distribution/ui/page/playground/rag.py index bb31bd2a7..be222f840 100644 --- a/llama_stack/distribution/ui/page/playground/rag.py +++ b/llama_stack/distribution/ui/page/playground/rag.py @@ -16,6 +16,13 @@ from llama_stack.distribution.ui.modules.utils import data_url_from_file def rag_chat_page(): st.title("🦙 RAG") + def reset_agent_and_chat(): + st.session_state.clear() + st.cache_resource.clear() + + def should_disable_input(): + return "messages" in st.session_state and len(st.session_state.messages) > 0 + with st.sidebar: # File/Directory Upload Section st.subheader("Upload Documents") @@ -69,21 +76,27 @@ def rag_chat_page(): vector_dbs = llama_stack_api.client.vector_dbs.list() vector_dbs = [vector_db.identifier for vector_db in vector_dbs] selected_vector_dbs = st.multiselect( - "Select Vector Databases", - vector_dbs, + label="Select Vector Databases", + options=vector_dbs, + on_change=reset_agent_and_chat, + disabled=should_disable_input(), ) available_models = llama_stack_api.client.models.list() available_models = [model.identifier for model in available_models if model.model_type == "llm"] selected_model = st.selectbox( - "Choose a model", - available_models, + label="Choose a model", + options=available_models, index=0, + on_change=reset_agent_and_chat, + disabled=should_disable_input(), ) system_prompt = st.text_area( "System Prompt", value="You are a helpful assistant. ", help="Initial instructions given to the AI to set its behavior and context", + on_change=reset_agent_and_chat, + disabled=should_disable_input(), ) temperature = st.slider( "Temperature", @@ -92,6 +105,8 @@ def rag_chat_page(): value=0.0, step=0.1, help="Controls the randomness of the response. Higher values make the output more creative and unexpected, lower values make it more conservative and predictable", + on_change=reset_agent_and_chat, + disabled=should_disable_input(), ) top_p = st.slider( @@ -100,12 +115,14 @@ def rag_chat_page(): max_value=1.0, value=0.95, step=0.1, + on_change=reset_agent_and_chat, + disabled=should_disable_input(), ) # Add clear chat button to sidebar if st.button("Clear Chat", use_container_width=True): - st.session_state.clear() - st.cache_resource.clear() + reset_agent_and_chat() + st.rerun() # Chat Interface if "messages" not in st.session_state: @@ -151,15 +168,8 @@ def rag_chat_page(): session_id = st.session_state["agent_session_id"] - # Chat input - if prompt := st.chat_input("Ask a question about your documents"): - # Add user message to chat history - st.session_state.messages.append({"role": "user", "content": prompt}) - - # Display user message - with st.chat_message("user"): - st.markdown(prompt) - + def process_prompt(prompt): + # Send the prompt to the agent response = agent.create_turn( messages=[ { @@ -188,5 +198,24 @@ def rag_chat_page(): st.session_state.messages.append({"role": "assistant", "content": full_response}) + # Chat input + if prompt := st.chat_input("Ask a question about your documents"): + # Add user message to chat history + st.session_state.messages.append({"role": "user", "content": prompt}) + + # Display user message + with st.chat_message("user"): + st.markdown(prompt) + + # store the prompt to process it after page refresh + st.session_state.prompt = prompt + + # force page refresh to disable the settings widgets + st.rerun() + + if "prompt" in st.session_state and st.session_state.prompt is not None: + process_prompt(st.session_state.prompt) + st.session_state.prompt = None + rag_chat_page() From edd9aaac3b22fe91e8f45e7c6bc6e3d9f97cb250 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Han?= Date: Thu, 10 Apr 2025 22:39:20 +0200 Subject: [PATCH 06/10] fix: use torchao 0.8.0 for inference (#1925) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # What does this PR do? While building the "experimental-post-training" distribution, we encountered a version conflict between torchao with inference requiring version 0.5.0 and training currently depending on version 0.8.0. Resolves this error: ``` × No solution found when resolving dependencies: ╰─▶ Because you require torchao==0.5.0 and torchao==0.8.0, we can conclude that your requirements are unsatisfiable. ERROR 2025-04-10 10:41:22,597 llama_stack.distribution.build:128 uncategorized: Failed to build target test with return code 1 ``` Signed-off-by: Sébastien Han --- llama_stack/providers/registry/inference.py | 2 +- llama_stack/templates/dependencies.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llama_stack/providers/registry/inference.py b/llama_stack/providers/registry/inference.py index aabb3bbdf..3c54cabcf 100644 --- a/llama_stack/providers/registry/inference.py +++ b/llama_stack/providers/registry/inference.py @@ -24,7 +24,7 @@ META_REFERENCE_DEPS = [ "zmq", "lm-format-enforcer", "sentence-transformers", - "torchao==0.5.0", + "torchao==0.8.0", "fbgemm-gpu-genai==1.1.2", ] diff --git a/llama_stack/templates/dependencies.json b/llama_stack/templates/dependencies.json index 053d6ef8a..b96191752 100644 --- a/llama_stack/templates/dependencies.json +++ b/llama_stack/templates/dependencies.json @@ -381,7 +381,7 @@ "sentence-transformers", "sentencepiece", "torch", - "torchao==0.5.0", + "torchao==0.8.0", "torchvision", "tqdm", "transformers", From 49955a06b10814058de9cab85331dd76433a31bd Mon Sep 17 00:00:00 2001 From: Francisco Arceo Date: Thu, 10 Apr 2025 15:09:00 -0600 Subject: [PATCH 07/10] docs: Update quickstart page to structure things a little more for the novices (#1873) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # What does this PR do? Another doc enhancement for https://github.com/meta-llama/llama-stack/issues/1818 Summary of changes: - `docs/source/distributions/configuration.md` - Updated dropdown title to include a more user-friendly description. - `docs/_static/css/my_theme.css` - Added styling for `

` elements to set a normal font weight. - `docs/source/distributions/starting_llama_stack_server.md` - Changed section headers from bold text to proper markdown headers (e.g., `##`). - Improved descriptions for starting Llama Stack server using different methods (library, container, conda, Kubernetes). - Enhanced clarity and structure by converting instructions into markdown headers and improved formatting. - `docs/source/getting_started/index.md` - Major restructuring of the "Quick Start" guide: - Added new introductory section for Llama Stack and its capabilities. - Reorganized steps into clearer subsections with proper markdown headers. - Replaced dropdowns with tabbed content for OS-specific instructions. - Added detailed steps for setting up and running the Llama Stack server and client. - Introduced new sections for running basic inference and building agents. - Enhanced readability and visual structure with emojis, admonitions, and examples. - `docs/source/providers/index.md` - Updated the list of LLM inference providers to include "Ollama." - Expanded the list of vector databases to include "SQLite-Vec." Let me know if you need further details! ## Test Plan Renders locally, included screenshot. # Documentation For https://github.com/meta-llama/llama-stack/issues/1818 Screenshot 2025-04-09 at 11 07 12 AM --------- Signed-off-by: Francisco Javier Arceo --- docs/_static/css/my_theme.css | 3 + docs/source/distributions/configuration.md | 2 +- .../starting_llama_stack_server.md | 8 +- .../getting_started/detailed_tutorial.md | 545 ++++++++++++++++++ docs/source/getting_started/index.md | 497 +++------------- docs/source/index.md | 3 +- docs/source/providers/index.md | 4 +- 7 files changed, 633 insertions(+), 429 deletions(-) create mode 100644 docs/source/getting_started/detailed_tutorial.md diff --git a/docs/_static/css/my_theme.css b/docs/_static/css/my_theme.css index 470452661..6f82f6358 100644 --- a/docs/_static/css/my_theme.css +++ b/docs/_static/css/my_theme.css @@ -17,6 +17,9 @@ display: none; } +h3 { + font-weight: normal; +} html[data-theme="dark"] .rst-content div[class^="highlight"] { background-color: #0b0b0b; } diff --git a/docs/source/distributions/configuration.md b/docs/source/distributions/configuration.md index 6cd5e161f..c06632991 100644 --- a/docs/source/distributions/configuration.md +++ b/docs/source/distributions/configuration.md @@ -2,7 +2,7 @@ The Llama Stack runtime configuration is specified as a YAML file. Here is a simplified version of an example configuration file for the Ollama distribution: -```{dropdown} Sample Configuration File +```{dropdown} 👋 Click here for a Sample Configuration File ```yaml version: 2 diff --git a/docs/source/distributions/starting_llama_stack_server.md b/docs/source/distributions/starting_llama_stack_server.md index 9be2e9ec5..f74de6d48 100644 --- a/docs/source/distributions/starting_llama_stack_server.md +++ b/docs/source/distributions/starting_llama_stack_server.md @@ -2,22 +2,22 @@ You can run a Llama Stack server in one of the following ways: -**As a Library**: +## As a Library: This is the simplest way to get started. Using Llama Stack as a library means you do not need to start a server. This is especially useful when you are not running inference locally and relying on an external inference service (eg. fireworks, together, groq, etc.) See [Using Llama Stack as a Library](importing_as_library) -**Container**: +## Container: Another simple way to start interacting with Llama Stack is to just spin up a container (via Docker or Podman) which is pre-built with all the providers you need. We provide a number of pre-built images so you can start a Llama Stack server instantly. You can also build your own custom container. Which distribution to choose depends on the hardware you have. See [Selection of a Distribution](selection) for more details. -**Conda**: +## Conda: If you have a custom or an advanced setup or you are developing on Llama Stack you can also build a custom Llama Stack server. Using `llama stack build` and `llama stack run` you can build/run a custom Llama Stack server containing the exact combination of providers you wish. We have also provided various templates to make getting started easier. See [Building a Custom Distribution](building_distro) for more details. -**Kubernetes**: +## Kubernetes: If you have built a container image and want to deploy it in a Kubernetes cluster instead of starting the Llama Stack server locally. See [Kubernetes Deployment Guide](kubernetes_deployment) for more details. diff --git a/docs/source/getting_started/detailed_tutorial.md b/docs/source/getting_started/detailed_tutorial.md new file mode 100644 index 000000000..65582e8d8 --- /dev/null +++ b/docs/source/getting_started/detailed_tutorial.md @@ -0,0 +1,545 @@ +# Detailed Tutorial + +In this guide, we'll walk through how you can use the Llama Stack (server and client SDK) to test a simple agent. +A Llama Stack agent is a simple integrated system that can perform tasks by combining a Llama model for reasoning with +tools (e.g., RAG, web search, code execution, etc.) for taking actions. +In Llama Stack, we provide a server exposing multiple APIs. These APIs are backed by implementations from different providers. + +Llama Stack is a stateful service with REST APIs to support seamless transition of AI applications across different environments. The server can be run in a variety of ways, including as a standalone binary, Docker container, or hosted service. You can build and test using a local server first and deploy to a hosted endpoint for production. + +In this guide, we'll walk through how to build a RAG agent locally using Llama Stack with [Ollama](https://ollama.com/) +as the inference [provider](../providers/index.md#inference) for a Llama Model. + +## Step 1: Installation and Setup + +Install Ollama by following the instructions on the [Ollama website](https://ollama.com/download), then +download Llama 3.2 3B model, and then start the Ollama service. +```bash +ollama pull llama3.2:3b +ollama run llama3.2:3b --keepalive 60m +``` + +Install [uv](https://docs.astral.sh/uv/) to setup your virtual environment + +::::{tab-set} + +:::{tab-item} macOS and Linux +Use `curl` to download the script and execute it with `sh`: +```console +curl -LsSf https://astral.sh/uv/install.sh | sh +``` +::: + +:::{tab-item} Windows +Use `irm` to download the script and execute it with `iex`: + +```console +powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex" +``` +::: +:::: + +Setup your virtual environment. + +```bash +uv venv --python 3.10 +source .venv/bin/activate +``` +## Step 2: Run Llama Stack +Llama Stack is a server that exposes multiple APIs, you connect with it using the Llama Stack client SDK. + +::::{tab-set} + +:::{tab-item} Using `venv` +You can use Python to build and run the Llama Stack server, which is useful for testing and development. + +Llama Stack uses a [YAML configuration file](../distributions/configuration.md) to specify the stack setup, +which defines the providers and their settings. +Now let's build and run the Llama Stack config for Ollama. + +```bash +INFERENCE_MODEL=llama3.2:3b llama stack build --template ollama --image-type venv --run +``` +::: +:::{tab-item} Using `conda` +You can use Python to build and run the Llama Stack server, which is useful for testing and development. + +Llama Stack uses a [YAML configuration file](../distributions/configuration.md) to specify the stack setup, +which defines the providers and their settings. +Now let's build and run the Llama Stack config for Ollama. + +```bash +INFERENCE_MODEL=llama3.2:3b llama stack build --template ollama --image-type conda --run +``` +::: +:::{tab-item} Using a Container +You can use a container image to run the Llama Stack server. We provide several container images for the server +component that works with different inference providers out of the box. For this guide, we will use +`llamastack/distribution-ollama` as the container image. If you'd like to build your own image or customize the +configurations, please check out [this guide](../references/index.md). + +First lets setup some environment variables and create a local directory to mount into the container’s file system. +```bash +export INFERENCE_MODEL="meta-llama/Llama-3.2-3B-Instruct" +export LLAMA_STACK_PORT=8321 +mkdir -p ~/.llama +``` +Then start the server using the container tool of your choice. For example, if you are running Docker you can use the +following command: +```bash +docker run -it \ + --pull always \ + -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -v ~/.llama:/root/.llama \ + llamastack/distribution-ollama \ + --port $LLAMA_STACK_PORT \ + --env INFERENCE_MODEL=$INFERENCE_MODEL \ + --env OLLAMA_URL=http://host.docker.internal:11434 +``` +Note to start the container with Podman, you can do the same but replace `docker` at the start of the command with +`podman`. If you are using `podman` older than `4.7.0`, please also replace `host.docker.internal` in the `OLLAMA_URL` +with `host.containers.internal`. + +The configuration YAML for the Ollama distribution is available at `distributions/ollama/run.yaml`. + +```{tip} + +Docker containers run in their own isolated network namespaces on Linux. To allow the container to communicate with services running on the host via `localhost`, you need `--network=host`. This makes the container use the host’s network directly so it can connect to Ollama running on `localhost:11434`. + +Linux users having issues running the above command should instead try the following: +```bash +docker run -it \ + --pull always \ + -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -v ~/.llama:/root/.llama \ + --network=host \ + llamastack/distribution-ollama \ + --port $LLAMA_STACK_PORT \ + --env INFERENCE_MODEL=$INFERENCE_MODEL \ + --env OLLAMA_URL=http://localhost:11434 +``` +::: +:::: +You will see output like below: +``` +INFO: Application startup complete. +INFO: Uvicorn running on http://['::', '0.0.0.0']:8321 (Press CTRL+C to quit) +``` + +Now you can use the Llama Stack client to run inference and build agents! + +You can reuse the server setup or use the [Llama Stack Client](https://github.com/meta-llama/llama-stack-client-python/). +Note that the client package is already included in the `llama-stack` package. + +## Step 3: Run Client CLI + +Open a new terminal and navigate to the same directory you started the server from. Then set up a new or activate your +existing server virtual environment. + +::::{tab-set} + +:::{tab-item} Reuse Server `venv` +```bash +# The client is included in the llama-stack package so we just activate the server venv +source .venv/bin/activate +``` +::: + +:::{tab-item} Install with `venv` +```bash +uv venv client --python 3.10 +source client/bin/activate +pip install llama-stack-client +``` +::: + +:::{tab-item} Install with `conda` +```bash +yes | conda create -n stack-client python=3.10 +conda activate stack-client +pip install llama-stack-client +``` +::: +:::: + +Now let's use the `llama-stack-client` [CLI](../references/llama_stack_client_cli_reference.md) to check the +connectivity to the server. + +```bash +llama-stack-client configure --endpoint http://localhost:8321 --api-key none +``` +You will see the below: +``` +Done! You can now use the Llama Stack Client CLI with endpoint http://localhost:8321 +``` + +#### iii. List Available Models +List the models +``` +llama-stack-client models list +Available Models + +┏━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ +┃ model_type ┃ identifier ┃ provider_resource_id ┃ metadata ┃ provider_id ┃ +┡━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ +│ embedding │ all-MiniLM-L6-v2 │ all-minilm:latest │ {'embedding_dimension': 384.0} │ ollama │ +├─────────────────┼─────────────────────────────────────┼─────────────────────────────────────┼───────────────────────────────────────────┼─────────────────┤ +│ llm │ llama3.2:3b │ llama3.2:3b │ │ ollama │ +└─────────────────┴─────────────────────────────────────┴─────────────────────────────────────┴───────────────────────────────────────────┴─────────────────┘ + +Total models: 2 + +``` + +## Step 4: Run the Demos + +Note that these demos show the [Python Client SDK](../references/python_sdk_reference/index.md). +Other SDKs are also available, please refer to the [Client SDK](../index.md#client-sdks) list for the complete options. + +::::{tab-set} + +:::{tab-item} Basic Inference with the CLI +You can test basic Llama inference completion using the CLI. + +```bash +llama-stack-client inference chat-completion --message "tell me a joke" +``` +Sample output: +```python +ChatCompletionResponse( + completion_message=CompletionMessage( + content="Here's one:\n\nWhat do you call a fake noodle?\n\nAn impasta!", + role="assistant", + stop_reason="end_of_turn", + tool_calls=[], + ), + logprobs=None, + metrics=[ + Metric(metric="prompt_tokens", value=14.0, unit=None), + Metric(metric="completion_tokens", value=27.0, unit=None), + Metric(metric="total_tokens", value=41.0, unit=None), + ], +) +``` +::: + +:::{tab-item} Basic Inference with a Script +Alternatively, you can run inference using the Llama Stack client SDK. + +### i. Create the Script +Create a file `inference.py` and add the following code: +```python +from llama_stack_client import LlamaStackClient + +client = LlamaStackClient(base_url="http://localhost:8321") + +# List available models +models = client.models.list() + +# Select the first LLM +llm = next(m for m in models if m.model_type == "llm") +model_id = llm.identifier + +print("Model:", model_id) + +response = client.inference.chat_completion( + model_id=model_id, + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Write a haiku about coding"}, + ], +) +print(response.completion_message.content) +``` + +### ii. Run the Script +Let's run the script using `uv` +```bash +uv run python inference.py +``` +Which will output: +``` +Model: llama3.2:3b +Here is a haiku about coding: + +Lines of code unfold +Logic flows through digital night +Beauty in the bits +``` +::: + +:::{tab-item} Build a Simple Agent +Now we can move beyond simple inference and build an agent that can perform tasks using the Llama Stack server. +### i. Create the Script +Create a file `agent.py` and add the following code: + +```python +from llama_stack_client import LlamaStackClient +from llama_stack_client import Agent, AgentEventLogger +from rich.pretty import pprint +import uuid + +client = LlamaStackClient(base_url=f"http://localhost:8321") + +models = client.models.list() +llm = next(m for m in models if m.model_type == "llm") +model_id = llm.identifier + +agent = Agent(client, model=model_id, instructions="You are a helpful assistant.") + +s_id = agent.create_session(session_name=f"s{uuid.uuid4().hex}") + +print("Non-streaming ...") +response = agent.create_turn( + messages=[{"role": "user", "content": "Who are you?"}], + session_id=s_id, + stream=False, +) +print("agent>", response.output_message.content) + +print("Streaming ...") +stream = agent.create_turn( + messages=[{"role": "user", "content": "Who are you?"}], session_id=s_id, stream=True +) +for event in stream: + pprint(event) + +print("Streaming with print helper...") +stream = agent.create_turn( + messages=[{"role": "user", "content": "Who are you?"}], session_id=s_id, stream=True +) +for event in AgentEventLogger().log(stream): + event.print() +``` +### ii. Run the Script +Let's run the script using `uv` +```bash +uv run python agent.py +``` + +```{dropdown} 👋 Click here to see the sample output + Non-streaming ... + agent> I'm an artificial intelligence designed to assist and communicate with users like you. I don't have a personal identity, but I'm here to provide information, answer questions, and help with tasks to the best of my abilities. + + I can be used for a wide range of purposes, such as: + + * Providing definitions and explanations + * Offering suggestions and ideas + * Helping with language translation + * Assisting with writing and proofreading + * Generating text or responses to questions + * Playing simple games or chatting about topics of interest + + I'm constantly learning and improving my abilities, so feel free to ask me anything, and I'll do my best to help! + + Streaming ... + AgentTurnResponseStreamChunk( + │ event=TurnResponseEvent( + │ │ payload=AgentTurnResponseStepStartPayload( + │ │ │ event_type='step_start', + │ │ │ step_id='69831607-fa75-424a-949b-e2049e3129d1', + │ │ │ step_type='inference', + │ │ │ metadata={} + │ │ ) + │ ) + ) + AgentTurnResponseStreamChunk( + │ event=TurnResponseEvent( + │ │ payload=AgentTurnResponseStepProgressPayload( + │ │ │ delta=TextDelta(text='As', type='text'), + │ │ │ event_type='step_progress', + │ │ │ step_id='69831607-fa75-424a-949b-e2049e3129d1', + │ │ │ step_type='inference' + │ │ ) + │ ) + ) + AgentTurnResponseStreamChunk( + │ event=TurnResponseEvent( + │ │ payload=AgentTurnResponseStepProgressPayload( + │ │ │ delta=TextDelta(text=' a', type='text'), + │ │ │ event_type='step_progress', + │ │ │ step_id='69831607-fa75-424a-949b-e2049e3129d1', + │ │ │ step_type='inference' + │ │ ) + │ ) + ) + ... + AgentTurnResponseStreamChunk( + │ event=TurnResponseEvent( + │ │ payload=AgentTurnResponseStepCompletePayload( + │ │ │ event_type='step_complete', + │ │ │ step_details=InferenceStep( + │ │ │ │ api_model_response=CompletionMessage( + │ │ │ │ │ content='As a conversational AI, I don\'t have a personal identity in the classical sense. I exist as a program running on computer servers, designed to process and respond to text-based inputs.\n\nI\'m an instance of a type of artificial intelligence called a "language model," which is trained on vast amounts of text data to generate human-like responses. My primary function is to understand and respond to natural language inputs, like our conversation right now.\n\nThink of me as a virtual assistant, a chatbot, or a conversational interface – I\'m here to provide information, answer questions, and engage in conversation to the best of my abilities. I don\'t have feelings, emotions, or consciousness like humans do, but I\'m designed to simulate human-like interactions to make our conversations feel more natural and helpful.\n\nSo, that\'s me in a nutshell! What can I help you with today?', + │ │ │ │ │ role='assistant', + │ │ │ │ │ stop_reason='end_of_turn', + │ │ │ │ │ tool_calls=[] + │ │ │ │ ), + │ │ │ │ step_id='69831607-fa75-424a-949b-e2049e3129d1', + │ │ │ │ step_type='inference', + │ │ │ │ turn_id='8b360202-f7cb-4786-baa9-166a1b46e2ca', + │ │ │ │ completed_at=datetime.datetime(2025, 4, 3, 1, 15, 21, 716174, tzinfo=TzInfo(UTC)), + │ │ │ │ started_at=datetime.datetime(2025, 4, 3, 1, 15, 14, 28823, tzinfo=TzInfo(UTC)) + │ │ │ ), + │ │ │ step_id='69831607-fa75-424a-949b-e2049e3129d1', + │ │ │ step_type='inference' + │ │ ) + │ ) + ) + AgentTurnResponseStreamChunk( + │ event=TurnResponseEvent( + │ │ payload=AgentTurnResponseTurnCompletePayload( + │ │ │ event_type='turn_complete', + │ │ │ turn=Turn( + │ │ │ │ input_messages=[UserMessage(content='Who are you?', role='user', context=None)], + │ │ │ │ output_message=CompletionMessage( + │ │ │ │ │ content='As a conversational AI, I don\'t have a personal identity in the classical sense. I exist as a program running on computer servers, designed to process and respond to text-based inputs.\n\nI\'m an instance of a type of artificial intelligence called a "language model," which is trained on vast amounts of text data to generate human-like responses. My primary function is to understand and respond to natural language inputs, like our conversation right now.\n\nThink of me as a virtual assistant, a chatbot, or a conversational interface – I\'m here to provide information, answer questions, and engage in conversation to the best of my abilities. I don\'t have feelings, emotions, or consciousness like humans do, but I\'m designed to simulate human-like interactions to make our conversations feel more natural and helpful.\n\nSo, that\'s me in a nutshell! What can I help you with today?', + │ │ │ │ │ role='assistant', + │ │ │ │ │ stop_reason='end_of_turn', + │ │ │ │ │ tool_calls=[] + │ │ │ │ ), + │ │ │ │ session_id='abd4afea-4324-43f4-9513-cfe3970d92e8', + │ │ │ │ started_at=datetime.datetime(2025, 4, 3, 1, 15, 14, 28722, tzinfo=TzInfo(UTC)), + │ │ │ │ steps=[ + │ │ │ │ │ InferenceStep( + │ │ │ │ │ │ api_model_response=CompletionMessage( + │ │ │ │ │ │ │ content='As a conversational AI, I don\'t have a personal identity in the classical sense. I exist as a program running on computer servers, designed to process and respond to text-based inputs.\n\nI\'m an instance of a type of artificial intelligence called a "language model," which is trained on vast amounts of text data to generate human-like responses. My primary function is to understand and respond to natural language inputs, like our conversation right now.\n\nThink of me as a virtual assistant, a chatbot, or a conversational interface – I\'m here to provide information, answer questions, and engage in conversation to the best of my abilities. I don\'t have feelings, emotions, or consciousness like humans do, but I\'m designed to simulate human-like interactions to make our conversations feel more natural and helpful.\n\nSo, that\'s me in a nutshell! What can I help you with today?', + │ │ │ │ │ │ │ role='assistant', + │ │ │ │ │ │ │ stop_reason='end_of_turn', + │ │ │ │ │ │ │ tool_calls=[] + │ │ │ │ │ │ ), + │ │ │ │ │ │ step_id='69831607-fa75-424a-949b-e2049e3129d1', + │ │ │ │ │ │ step_type='inference', + │ │ │ │ │ │ turn_id='8b360202-f7cb-4786-baa9-166a1b46e2ca', + │ │ │ │ │ │ completed_at=datetime.datetime(2025, 4, 3, 1, 15, 21, 716174, tzinfo=TzInfo(UTC)), + │ │ │ │ │ │ started_at=datetime.datetime(2025, 4, 3, 1, 15, 14, 28823, tzinfo=TzInfo(UTC)) + │ │ │ │ │ ) + │ │ │ │ ], + │ │ │ │ turn_id='8b360202-f7cb-4786-baa9-166a1b46e2ca', + │ │ │ │ completed_at=datetime.datetime(2025, 4, 3, 1, 15, 21, 727364, tzinfo=TzInfo(UTC)), + │ │ │ │ output_attachments=[] + │ │ │ ) + │ │ ) + │ ) + ) + + + Streaming with print helper... + inference> Déjà vu! + + As I mentioned earlier, I'm an artificial intelligence language model. I don't have a personal identity or consciousness like humans do. I exist solely to process and respond to text-based inputs, providing information and assistance on a wide range of topics. + + I'm a computer program designed to simulate human-like conversations, using natural language processing (NLP) and machine learning algorithms to understand and generate responses. My purpose is to help users like you with their questions, provide information, and engage in conversation. + + Think of me as a virtual companion, a helpful tool designed to make your interactions more efficient and enjoyable. I don't have personal opinions, emotions, or biases, but I'm here to provide accurate and informative responses to the best of my abilities. + + So, who am I? I'm just a computer program designed to help you! +``` +::: + +:::{tab-item} Build a RAG Agent + +For our last demo, we can build a RAG agent that can answer questions about the Torchtune project using the documents +in a vector database. +### i. Create the Script +Create a file `rag_agent.py` and add the following code: + +```python +from llama_stack_client import LlamaStackClient +from llama_stack_client import Agent, AgentEventLogger +from llama_stack_client.types import Document +import uuid +from termcolor import cprint + +client = LlamaStackClient(base_url="http://localhost:8321") + +# Create a vector database instance +embed_lm = next(m for m in client.models.list() if m.model_type == "embedding") +embedding_model = embed_lm.identifier +vector_db_id = f"v{uuid.uuid4().hex}" +client.vector_dbs.register( + vector_db_id=vector_db_id, + embedding_model=embedding_model, +) + +# Create Documents +urls = [ + "memory_optimizations.rst", + "chat.rst", + "llama3.rst", + "datasets.rst", + "qat_finetune.rst", + "lora_finetune.rst", +] +documents = [ + Document( + document_id=f"num-{i}", + content=f"https://raw.githubusercontent.com/pytorch/torchtune/main/docs/source/tutorials/{url}", + mime_type="text/plain", + metadata={}, + ) + for i, url in enumerate(urls) +] + +# Insert documents +client.tool_runtime.rag_tool.insert( + documents=documents, + vector_db_id=vector_db_id, + chunk_size_in_tokens=512, +) + +# Get the model being served +llm = next(m for m in client.models.list() if m.model_type == "llm") +model = llm.identifier + +# Create the RAG agent +rag_agent = Agent( + client, + model=model, + instructions="You are a helpful assistant. Use the RAG tool to answer questions as needed.", + tools=[ + { + "name": "builtin::rag/knowledge_search", + "args": {"vector_db_ids": [vector_db_id]}, + } + ], +) + +session_id = rag_agent.create_session(session_name=f"s{uuid.uuid4().hex}") + +turns = ["what is torchtune", "tell me about dora"] + +for t in turns: + print("user>", t) + stream = rag_agent.create_turn( + messages=[{"role": "user", "content": t}], session_id=session_id, stream=True + ) + for event in AgentEventLogger().log(stream): + event.print() +``` +### ii. Run the Script +Let's run the script using `uv` +```bash +uv run python rag_agent.py +``` + +```{dropdown} 👋 Click here to see the sample output + user> what is torchtune + inference> [knowledge_search(query='TorchTune')] + tool_execution> Tool:knowledge_search Args:{'query': 'TorchTune'} + tool_execution> Tool:knowledge_search Response:[TextContentItem(text='knowledge_search tool found 5 chunks:\nBEGIN of knowledge_search tool results.\n', type='text'), TextContentItem(text='Result 1:\nDocument_id:num-1\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. ..., type='text'), TextContentItem(text='END of knowledge_search tool results.\n', type='text')] + inference> Here is a high-level overview of the text: + + **LoRA Finetuning with PyTorch Tune** + + PyTorch Tune provides a recipe for LoRA (Low-Rank Adaptation) finetuning, which is a technique to adapt pre-trained models to new tasks. The recipe uses the `lora_finetune_distributed` command. + ... + Overall, DORA is a powerful reinforcement learning algorithm that can learn complex tasks from human demonstrations. However, it requires careful consideration of the challenges and limitations to achieve optimal results. +``` +::: + +:::: + +## You're Ready to Build Your Own Apps! + +Congrats! 🥳 Now you're ready to [build your own Llama Stack applications](../building_applications/index)! 🚀 diff --git a/docs/source/getting_started/index.md b/docs/source/getting_started/index.md index 82329e60e..63fa5ae6e 100644 --- a/docs/source/getting_started/index.md +++ b/docs/source/getting_started/index.md @@ -1,455 +1,110 @@ -# Quick Start +# Quickstart +Get started with Llama Stack in minutes! -Llama Stack is a stateful service with REST APIs to support seamless transition of AI applications across different environments. The server can be run in a variety of ways, including as a standalone binary, Docker container, or hosted service. You can build and test using a local server first and deploy to a hosted endpoint for production. +Llama Stack is a stateful service with REST APIs to support the seamless transition of AI applications across different +environments. You can build and test using a local server first and deploy to a hosted endpoint for production. -In this guide, we'll walk through how to build a RAG agent locally using Llama Stack with [Ollama](https://ollama.com/) to run inference on a Llama Model. - - -### 1. Download a Llama model with Ollama +In this guide, we'll walk through how to build a RAG application locally using Llama Stack with [Ollama](https://ollama.com/) +as the inference [provider](../providers/index.md#inference) for a Llama Model. +## Step 1. Install and Setup +Install [uv](https://docs.astral.sh/uv/), setup your virtual environment, and run inference on a Llama model with +[Ollama](https://ollama.com/download). ```bash -ollama pull llama3.2:3b -``` - -This will instruct the Ollama service to download the Llama 3.2 3B model, which we'll use in the rest of this guide. - -```{admonition} Note -:class: tip - -If you do not have ollama, you can install it from [here](https://ollama.com/download). -``` - -### 2. Run Llama Stack locally - -We use `uv` to setup a virtual environment and install the Llama Stack package. - -:::{dropdown} [Click to Open] Instructions to setup uv - -Install [uv](https://docs.astral.sh/uv/) to setup your virtual environment. - - -#### For macOS and Linux: -```bash -curl -LsSf https://astral.sh/uv/install.sh | sh -``` -#### For Windows: -Use `irm` to download the script and execute it with `iex`: -```powershell -powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex" -``` - -Setup venv -```bash -uv venv --python 3.10 +uv pip install llama-stack aiosqlite faiss-cpu ollama openai datasets opentelemetry-exporter-otlp-proto-http mcp autoevals source .venv/bin/activate +export INFERENCE_MODEL="llama3.2:3b" +ollama run llama3.2:3b --keepalive 60m ``` -::: - -**Install the Llama Stack package** -```bash -uv pip install -U llama-stack -``` - -**Build and Run the Llama Stack server for Ollama.** +## Step 2: Run the Llama Stack Server ```bash INFERENCE_MODEL=llama3.2:3b llama stack build --template ollama --image-type venv --run ``` - -You will see the output end like below: -``` -... -INFO: Application startup complete. -INFO: Uvicorn running on http://['::', '0.0.0.0']:8321 (Press CTRL+C to quit) -``` - -Now you can use the llama stack client to run inference and build agents! - -### 3. Client CLI - -Install the client package -```bash -pip install llama-stack-client -``` - -:::{dropdown} OR reuse server setup -Open a new terminal and navigate to the same directory you started the server from. - -Setup venv (llama-stack already includes the llama-stack-client package) -```bash -source .venv/bin/activate -``` -::: - -#### 3.1 Configure the client to point to the local server -```bash -llama-stack-client configure --endpoint http://localhost:8321 --api-key none -``` -You will see the below: -``` -Done! You can now use the Llama Stack Client CLI with endpoint http://localhost:8321 -``` - -#### 3.2 List available models -``` -llama-stack-client models list -``` - -``` -Available Models - -┏━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ -┃ model_type ┃ identifier ┃ provider_resource_id ┃ metadata ┃ provider_id ┃ -┡━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ -│ embedding │ all-MiniLM-L6-v2 │ all-minilm:latest │ {'embedding_dimension': 384.0} │ ollama │ -├─────────────────┼─────────────────────────────────────┼─────────────────────────────────────┼───────────────────────────────────────────┼─────────────────┤ -│ llm │ llama3.2:3b │ llama3.2:3b │ │ ollama │ -└─────────────────┴─────────────────────────────────────┴─────────────────────────────────────┴───────────────────────────────────────────┴─────────────────┘ - -Total models: 2 - -``` - -#### 3.3 Test basic inference -```bash -llama-stack-client inference chat-completion --message "tell me a joke" -``` -Sample output: +## Step 3: Run the Demo +Now open up a new terminal using the same virtual environment and you can run this demo as a script using `uv run demo_script.py` or in an interactive shell. ```python -ChatCompletionResponse( - completion_message=CompletionMessage( - content="Here's one:\n\nWhat do you call a fake noodle?\n\nAn impasta!", - role="assistant", - stop_reason="end_of_turn", - tool_calls=[], - ), - logprobs=None, - metrics=[ - Metric(metric="prompt_tokens", value=14.0, unit=None), - Metric(metric="completion_tokens", value=27.0, unit=None), - Metric(metric="total_tokens", value=41.0, unit=None), - ], -) -``` - -### 4. Python SDK -Install the python client -```bash -pip install llama-stack-client -``` -:::{dropdown} OR reuse server setup -Open a new terminal and navigate to the same directory you started the server from. - -Setup venv (llama-stack already includes the llama-stack-client package) -```bash -source .venv/bin/activate -``` -::: -#### 4.1 Basic Inference -Create a file `inference.py` and add the following code: -```python -from llama_stack_client import LlamaStackClient - -client = LlamaStackClient(base_url=f"http://localhost:8321") - -# List available models -models = client.models.list() - -# Select the first LLM -llm = next(m for m in models if m.model_type == "llm") -model_id = llm.identifier - -print("Model:", model_id) - -response = client.inference.chat_completion( - model_id=model_id, - messages=[ - {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": "Write a haiku about coding"}, - ], -) -print(response.completion_message.content) -``` -Run the script -```bash -python inference.py -``` -Sample output: -``` -Model: llama3.2:3b -Here is a haiku about coding: - -Lines of code unfold -Logic flows through digital night -Beauty in the bits -``` - -#### 4.2. Basic Agent - -Create a file `agent.py` and add the following code: -```python -from llama_stack_client import LlamaStackClient -from llama_stack_client import Agent, AgentEventLogger -from rich.pretty import pprint -import uuid - -client = LlamaStackClient(base_url=f"http://localhost:8321") - -models = client.models.list() -llm = next(m for m in models if m.model_type == "llm") -model_id = llm.identifier - -agent = Agent(client, model=model_id, instructions="You are a helpful assistant.") - -s_id = agent.create_session(session_name=f"s{uuid.uuid4().hex}") - -print("Non-streaming ...") -response = agent.create_turn( - messages=[{"role": "user", "content": "Who are you?"}], - session_id=s_id, - stream=False, -) -print("agent>", response.output_message.content) - -print("Streaming ...") -stream = agent.create_turn( - messages=[{"role": "user", "content": "Who are you?"}], session_id=s_id, stream=True -) -for event in stream: - pprint(event) - -print("Streaming with print helper...") -stream = agent.create_turn( - messages=[{"role": "user", "content": "Who are you?"}], session_id=s_id, stream=True -) -for event in AgentEventLogger().log(stream): - event.print() -``` - -Run the script: -```bash -python agent.py -``` - -:::{dropdown} `Sample output` -``` -Non-streaming ... -agent> I'm an artificial intelligence designed to assist and communicate with users like you. I don't have a personal identity, but I'm here to provide information, answer questions, and help with tasks to the best of my abilities. - -I can be used for a wide range of purposes, such as: - -* Providing definitions and explanations -* Offering suggestions and ideas -* Helping with language translation -* Assisting with writing and proofreading -* Generating text or responses to questions -* Playing simple games or chatting about topics of interest - -I'm constantly learning and improving my abilities, so feel free to ask me anything, and I'll do my best to help! - -Streaming ... -AgentTurnResponseStreamChunk( -│ event=TurnResponseEvent( -│ │ payload=AgentTurnResponseStepStartPayload( -│ │ │ event_type='step_start', -│ │ │ step_id='69831607-fa75-424a-949b-e2049e3129d1', -│ │ │ step_type='inference', -│ │ │ metadata={} -│ │ ) -│ ) -) -AgentTurnResponseStreamChunk( -│ event=TurnResponseEvent( -│ │ payload=AgentTurnResponseStepProgressPayload( -│ │ │ delta=TextDelta(text='As', type='text'), -│ │ │ event_type='step_progress', -│ │ │ step_id='69831607-fa75-424a-949b-e2049e3129d1', -│ │ │ step_type='inference' -│ │ ) -│ ) -) -AgentTurnResponseStreamChunk( -│ event=TurnResponseEvent( -│ │ payload=AgentTurnResponseStepProgressPayload( -│ │ │ delta=TextDelta(text=' a', type='text'), -│ │ │ event_type='step_progress', -│ │ │ step_id='69831607-fa75-424a-949b-e2049e3129d1', -│ │ │ step_type='inference' -│ │ ) -│ ) -) -... -AgentTurnResponseStreamChunk( -│ event=TurnResponseEvent( -│ │ payload=AgentTurnResponseStepCompletePayload( -│ │ │ event_type='step_complete', -│ │ │ step_details=InferenceStep( -│ │ │ │ api_model_response=CompletionMessage( -│ │ │ │ │ content='As a conversational AI, I don\'t have a personal identity in the classical sense. I exist as a program running on computer servers, designed to process and respond to text-based inputs.\n\nI\'m an instance of a type of artificial intelligence called a "language model," which is trained on vast amounts of text data to generate human-like responses. My primary function is to understand and respond to natural language inputs, like our conversation right now.\n\nThink of me as a virtual assistant, a chatbot, or a conversational interface – I\'m here to provide information, answer questions, and engage in conversation to the best of my abilities. I don\'t have feelings, emotions, or consciousness like humans do, but I\'m designed to simulate human-like interactions to make our conversations feel more natural and helpful.\n\nSo, that\'s me in a nutshell! What can I help you with today?', -│ │ │ │ │ role='assistant', -│ │ │ │ │ stop_reason='end_of_turn', -│ │ │ │ │ tool_calls=[] -│ │ │ │ ), -│ │ │ │ step_id='69831607-fa75-424a-949b-e2049e3129d1', -│ │ │ │ step_type='inference', -│ │ │ │ turn_id='8b360202-f7cb-4786-baa9-166a1b46e2ca', -│ │ │ │ completed_at=datetime.datetime(2025, 4, 3, 1, 15, 21, 716174, tzinfo=TzInfo(UTC)), -│ │ │ │ started_at=datetime.datetime(2025, 4, 3, 1, 15, 14, 28823, tzinfo=TzInfo(UTC)) -│ │ │ ), -│ │ │ step_id='69831607-fa75-424a-949b-e2049e3129d1', -│ │ │ step_type='inference' -│ │ ) -│ ) -) -AgentTurnResponseStreamChunk( -│ event=TurnResponseEvent( -│ │ payload=AgentTurnResponseTurnCompletePayload( -│ │ │ event_type='turn_complete', -│ │ │ turn=Turn( -│ │ │ │ input_messages=[UserMessage(content='Who are you?', role='user', context=None)], -│ │ │ │ output_message=CompletionMessage( -│ │ │ │ │ content='As a conversational AI, I don\'t have a personal identity in the classical sense. I exist as a program running on computer servers, designed to process and respond to text-based inputs.\n\nI\'m an instance of a type of artificial intelligence called a "language model," which is trained on vast amounts of text data to generate human-like responses. My primary function is to understand and respond to natural language inputs, like our conversation right now.\n\nThink of me as a virtual assistant, a chatbot, or a conversational interface – I\'m here to provide information, answer questions, and engage in conversation to the best of my abilities. I don\'t have feelings, emotions, or consciousness like humans do, but I\'m designed to simulate human-like interactions to make our conversations feel more natural and helpful.\n\nSo, that\'s me in a nutshell! What can I help you with today?', -│ │ │ │ │ role='assistant', -│ │ │ │ │ stop_reason='end_of_turn', -│ │ │ │ │ tool_calls=[] -│ │ │ │ ), -│ │ │ │ session_id='abd4afea-4324-43f4-9513-cfe3970d92e8', -│ │ │ │ started_at=datetime.datetime(2025, 4, 3, 1, 15, 14, 28722, tzinfo=TzInfo(UTC)), -│ │ │ │ steps=[ -│ │ │ │ │ InferenceStep( -│ │ │ │ │ │ api_model_response=CompletionMessage( -│ │ │ │ │ │ │ content='As a conversational AI, I don\'t have a personal identity in the classical sense. I exist as a program running on computer servers, designed to process and respond to text-based inputs.\n\nI\'m an instance of a type of artificial intelligence called a "language model," which is trained on vast amounts of text data to generate human-like responses. My primary function is to understand and respond to natural language inputs, like our conversation right now.\n\nThink of me as a virtual assistant, a chatbot, or a conversational interface – I\'m here to provide information, answer questions, and engage in conversation to the best of my abilities. I don\'t have feelings, emotions, or consciousness like humans do, but I\'m designed to simulate human-like interactions to make our conversations feel more natural and helpful.\n\nSo, that\'s me in a nutshell! What can I help you with today?', -│ │ │ │ │ │ │ role='assistant', -│ │ │ │ │ │ │ stop_reason='end_of_turn', -│ │ │ │ │ │ │ tool_calls=[] -│ │ │ │ │ │ ), -│ │ │ │ │ │ step_id='69831607-fa75-424a-949b-e2049e3129d1', -│ │ │ │ │ │ step_type='inference', -│ │ │ │ │ │ turn_id='8b360202-f7cb-4786-baa9-166a1b46e2ca', -│ │ │ │ │ │ completed_at=datetime.datetime(2025, 4, 3, 1, 15, 21, 716174, tzinfo=TzInfo(UTC)), -│ │ │ │ │ │ started_at=datetime.datetime(2025, 4, 3, 1, 15, 14, 28823, tzinfo=TzInfo(UTC)) -│ │ │ │ │ ) -│ │ │ │ ], -│ │ │ │ turn_id='8b360202-f7cb-4786-baa9-166a1b46e2ca', -│ │ │ │ completed_at=datetime.datetime(2025, 4, 3, 1, 15, 21, 727364, tzinfo=TzInfo(UTC)), -│ │ │ │ output_attachments=[] -│ │ │ ) -│ │ ) -│ ) -) - - -Streaming with print helper... -inference> Déjà vu! - -As I mentioned earlier, I'm an artificial intelligence language model. I don't have a personal identity or consciousness like humans do. I exist solely to process and respond to text-based inputs, providing information and assistance on a wide range of topics. - -I'm a computer program designed to simulate human-like conversations, using natural language processing (NLP) and machine learning algorithms to understand and generate responses. My purpose is to help users like you with their questions, provide information, and engage in conversation. - -Think of me as a virtual companion, a helpful tool designed to make your interactions more efficient and enjoyable. I don't have personal opinions, emotions, or biases, but I'm here to provide accurate and informative responses to the best of my abilities. - -So, who am I? I'm just a computer program designed to help you! - -``` -::: - -#### 4.3. RAG agent - -Create a file `rag_agent.py` and add the following code: - -```python -from llama_stack_client import LlamaStackClient -from llama_stack_client import Agent, AgentEventLogger +from termcolor import cprint from llama_stack_client.types import Document -import uuid +from llama_stack_client import LlamaStackClient -client = LlamaStackClient(base_url=f"http://localhost:8321") -# Create a vector database instance -embedlm = next(m for m in client.models.list() if m.model_type == "embedding") -embedding_model = embedlm.identifier -vector_db_id = f"v{uuid.uuid4().hex}" -client.vector_dbs.register( - vector_db_id=vector_db_id, - embedding_model=embedding_model, -) - -# Create Documents -urls = [ - "memory_optimizations.rst", - "chat.rst", - "llama3.rst", - "datasets.rst", - "qat_finetune.rst", - "lora_finetune.rst", -] +vector_db = "faiss" +vector_db_id = "test-vector-db" +model_id = "llama3.2:3b-instruct-fp16" +query = "Can you give me the arxiv link for Lora Fine Tuning in Pytorch?" documents = [ Document( - document_id=f"num-{i}", - content=f"https://raw.githubusercontent.com/pytorch/torchtune/main/docs/source/tutorials/{url}", + document_id="document_1", + content=f"https://raw.githubusercontent.com/pytorch/torchtune/main/docs/source/tutorials/lora_finetune.rst", mime_type="text/plain", metadata={}, ) - for i, url in enumerate(urls) ] -# Insert documents +client = LlamaStackClient(base_url="http://localhost:8321") +client.vector_dbs.register( + provider_id=vector_db, + vector_db_id=vector_db_id, + embedding_model="all-MiniLM-L6-v2", + embedding_dimension=384, +) + client.tool_runtime.rag_tool.insert( documents=documents, vector_db_id=vector_db_id, - chunk_size_in_tokens=512, + chunk_size_in_tokens=50, ) -# Get the model being served -llm = next(m for m in client.models.list() if m.model_type == "llm") -model = llm.identifier - -# Create RAG agent -ragagent = Agent( - client, - model=model, - instructions="You are a helpful assistant. Use the RAG tool to answer questions as needed.", - tools=[ - { - "name": "builtin::rag/knowledge_search", - "args": {"vector_db_ids": [vector_db_id]}, - } - ], +response = client.tool_runtime.rag_tool.query( + vector_db_ids=[vector_db_id], + content=query, ) -s_id = ragagent.create_session(session_name=f"s{uuid.uuid4().hex}") +cprint("" + "-" * 50, "yellow") +cprint(f"Query> {query}", "red") +cprint("" + "-" * 50, "yellow") +for chunk in response.content: + cprint(f"Chunk ID> {chunk.text}", "green") + cprint("" + "-" * 50, "yellow") +``` +And you should see output like below. +``` +-------------------------------------------------- +Query> Can you give me the arxiv link for Lora Fine Tuning in Pytorch? +-------------------------------------------------- +Chunk ID> knowledge_search tool found 5 chunks: +BEGIN of knowledge_search tool results. -turns = ["what is torchtune", "tell me about dora"] +-------------------------------------------------- +Chunk ID> Result 1: +Document_id:docum +Content: .. _lora_finetune_label: -for t in turns: - print("user>", t) - stream = ragagent.create_turn( - messages=[{"role": "user", "content": t}], session_id=s_id, stream=True - ) - for event in AgentEventLogger().log(stream): - event.print() -``` -Run the script: -``` -python rag_agent.py -``` -:::{dropdown} `Sample output` -``` -user> what is torchtune -inference> [knowledge_search(query='TorchTune')] -tool_execution> Tool:knowledge_search Args:{'query': 'TorchTune'} -tool_execution> Tool:knowledge_search Response:[TextContentItem(text='knowledge_search tool found 5 chunks:\nBEGIN of knowledge_search tool results.\n', type='text'), TextContentItem(text='Result 1:\nDocument_id:num-1\nContent: conversational data, :func:`~torchtune.datasets.chat_dataset` seems to be a good fit. ..., type='text'), TextContentItem(text='END of knowledge_search tool results.\n', type='text')] -inference> Here is a high-level overview of the text: +============================ +Fine-Tuning Llama2 with LoRA +============================ -**LoRA Finetuning with PyTorch Tune** +This guide will teach you about `LoRA `_, a -PyTorch Tune provides a recipe for LoRA (Low-Rank Adaptation) finetuning, which is a technique to adapt pre-trained models to new tasks. The recipe uses the `lora_finetune_distributed` command. -... -Overall, DORA is a powerful reinforcement learning algorithm that can learn complex tasks from human demonstrations. However, it requires careful consideration of the challenges and limitations to achieve optimal results. +-------------------------------------------------- ``` -::: +Congratulations! You've successfully built your first RAG application using Llama Stack! 🎉🥳 + ## Next Steps -- Go through the [Getting Started Notebook](https://github.com/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb) -- Checkout more [Notebooks on GitHub](https://github.com/meta-llama/llama-stack/tree/main/docs/notebooks) -- See [References](../references/index.md) for more details about the llama CLI and Python SDK -- For example applications and more detailed tutorials, visit our [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) repository. + +Now you're ready to dive deeper into Llama Stack! +- Explore the [Detailed Tutorial](./detailed_tutorial.md). +- Try the [Getting Started Notebook](https://github.com/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb). +- Browse more [Notebooks on GitHub](https://github.com/meta-llama/llama-stack/tree/main/docs/notebooks). +- Learn about Llama Stack [Concepts](../concepts/index.md). +- Discover how to [Build Llama Stacks](../distributions/index.md). +- Refer to our [References](../references/index.md) for details on the Llama CLI and Python SDK. +- Check out the [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) repository for example applications and tutorials. + +```{toctree} +:maxdepth: 0 +:hidden: + +detailed_tutorial +``` diff --git a/docs/source/index.md b/docs/source/index.md index a0ac95957..99b0e1a3e 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -1,3 +1,5 @@ +# Llama Stack +Welcome to Llama Stack, the open-source framework for building generative AI applications. ```{admonition} Llama 4 is here! :class: tip @@ -9,7 +11,6 @@ Check out [Getting Started with Llama 4](https://colab.research.google.com/githu Llama Stack {{ llama_stack_version }} is now available! See the {{ llama_stack_version_link }} for more details. ``` -# Llama Stack ## What is Llama Stack? diff --git a/docs/source/providers/index.md b/docs/source/providers/index.md index 75faf7c00..1d1a6e081 100644 --- a/docs/source/providers/index.md +++ b/docs/source/providers/index.md @@ -1,8 +1,8 @@ # Providers Overview The goal of Llama Stack is to build an ecosystem where users can easily swap out different implementations for the same API. Examples for these include: -- LLM inference providers (e.g., Fireworks, Together, AWS Bedrock, Groq, Cerebras, SambaNova, vLLM, etc.), -- Vector databases (e.g., ChromaDB, Weaviate, Qdrant, Milvus, FAISS, PGVector, etc.), +- LLM inference providers (e.g., Ollama, Fireworks, Together, AWS Bedrock, Groq, Cerebras, SambaNova, vLLM, etc.), +- Vector databases (e.g., ChromaDB, Weaviate, Qdrant, Milvus, FAISS, PGVector, SQLite-Vec, etc.), - Safety providers (e.g., Meta's Llama Guard, AWS Bedrock Guardrails, etc.) Providers come in two flavors: From a4cc4b7e3160d4df2f97eb2ce6aa7325bf908c50 Mon Sep 17 00:00:00 2001 From: ehhuang Date: Thu, 10 Apr 2025 16:58:06 -0700 Subject: [PATCH 08/10] test(verification): add streaming tool calling test (#1933) # What does this PR do? ## Test Plan --- [//]: # (BEGIN SAPLING FOOTER) Stack created with [Sapling](https://sapling-scm.com). Best reviewed with [ReviewStack](https://reviewstack.dev/meta-llama/llama-stack/pull/1933). * #1934 * __->__ #1933 --- .../openai_api/test_chat_completion.py | 55 +++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/tests/verifications/openai_api/test_chat_completion.py b/tests/verifications/openai_api/test_chat_completion.py index dc08ec944..6aee29c3a 100644 --- a/tests/verifications/openai_api/test_chat_completion.py +++ b/tests/verifications/openai_api/test_chat_completion.py @@ -4,6 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +import json import re from typing import Any @@ -225,6 +226,60 @@ def test_chat_non_streaming_tool_calling(request, openai_client, model, provider # TODO: add detailed type validation +@pytest.mark.parametrize( + "case", + chat_completion_test_cases["test_tool_calling"]["test_params"]["case"], + ids=case_id_generator, +) +def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case): + test_name_base = get_base_test_name(request) + if should_skip_test(verification_config, provider, model, test_name_base): + pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") + + stream = openai_client.chat.completions.create( + model=model, + messages=case["input"]["messages"], + tools=case["input"]["tools"], + stream=True, + ) + + # Accumulate partial tool_calls here + tool_calls_buffer = {} + current_id = None + # Process streaming chunks + for chunk in stream: + choice = chunk.choices[0] + delta = choice.delta + + if delta.tool_calls is None: + continue + + for tool_call_delta in delta.tool_calls: + if tool_call_delta.id: + current_id = tool_call_delta.id + call_id = current_id + func_delta = tool_call_delta.function + + if call_id not in tool_calls_buffer: + tool_calls_buffer[call_id] = { + "id": call_id, + "type": tool_call_delta.type, + "name": func_delta.name, + "arguments": "", + } + + if func_delta.arguments: + tool_calls_buffer[call_id]["arguments"] += func_delta.arguments + + assert len(tool_calls_buffer) == 1 + for call in tool_calls_buffer.values(): + assert len(call["id"]) > 0 + assert call["name"] == "get_weather" + + args_dict = json.loads(call["arguments"]) + assert "san francisco" in args_dict["location"].lower() + + # --- Helper functions (structured output validation) --- From 2fcb70b78921b89ef69bd868834958776a1e16aa Mon Sep 17 00:00:00 2001 From: ehhuang Date: Thu, 10 Apr 2025 16:59:28 -0700 Subject: [PATCH 09/10] test(verification): overwrite test result instead of creating new ones (#1934) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # What does this PR do? ## Test Plan (myenv) ➜ llama-stack python tests/verifications/generate_report.py --providers fireworks,together,openai --run-tests --- tests/verifications/REPORT.md | 17 +- tests/verifications/generate_report.py | 113 ++-- ...reworks_1744264202.json => fireworks.json} | 518 +++++++++++------ .../{openai_1744264304.json => openai.json} | 309 ++++++---- ...together_1744264258.json => together.json} | 549 +++++++++++------- 5 files changed, 926 insertions(+), 580 deletions(-) rename tests/verifications/test_results/{fireworks_1744264202.json => fireworks.json} (68%) rename tests/verifications/test_results/{openai_1744264304.json => openai.json} (77%) rename tests/verifications/test_results/{together_1744264258.json => together.json} (77%) diff --git a/tests/verifications/REPORT.md b/tests/verifications/REPORT.md index 449499382..2309c6404 100644 --- a/tests/verifications/REPORT.md +++ b/tests/verifications/REPORT.md @@ -1,6 +1,6 @@ # Test Results Report -*Generated on: 2025-04-09 22:52:19* +*Generated on: 2025-04-10 16:48:18* *This report was generated by running `python tests/verifications/generate_report.py`* @@ -15,15 +15,15 @@ | Provider | Pass Rate | Tests Passed | Total Tests | | --- | --- | --- | --- | -| Together | 67.7% | 21 | 31 | -| Fireworks | 90.3% | 28 | 31 | -| Openai | 100.0% | 22 | 22 | +| Together | 64.7% | 22 | 34 | +| Fireworks | 82.4% | 28 | 34 | +| Openai | 100.0% | 24 | 24 | ## Together -*Tests run on: 2025-04-09 22:50:58* +*Tests run on: 2025-04-10 16:46:35* ```bash # Run all tests for this provider: @@ -56,10 +56,11 @@ pytest tests/verifications/openai_api/test_chat_completion.py --provider=togethe | test_chat_streaming_image | ⚪ | ❌ | ❌ | | test_chat_streaming_structured_output (calendar) | ✅ | ❌ | ❌ | | test_chat_streaming_structured_output (math) | ✅ | ❌ | ❌ | +| test_chat_streaming_tool_calling | ✅ | ❌ | ❌ | ## Fireworks -*Tests run on: 2025-04-09 22:50:02* +*Tests run on: 2025-04-10 16:44:44* ```bash # Run all tests for this provider: @@ -92,10 +93,11 @@ pytest tests/verifications/openai_api/test_chat_completion.py --provider=firewor | test_chat_streaming_image | ⚪ | ✅ | ✅ | | test_chat_streaming_structured_output (calendar) | ✅ | ✅ | ✅ | | test_chat_streaming_structured_output (math) | ✅ | ✅ | ✅ | +| test_chat_streaming_tool_calling | ❌ | ❌ | ❌ | ## Openai -*Tests run on: 2025-04-09 22:51:44* +*Tests run on: 2025-04-10 16:47:28* ```bash # Run all tests for this provider: @@ -127,3 +129,4 @@ pytest tests/verifications/openai_api/test_chat_completion.py --provider=openai | test_chat_streaming_image | ✅ | ✅ | | test_chat_streaming_structured_output (calendar) | ✅ | ✅ | | test_chat_streaming_structured_output (math) | ✅ | ✅ | +| test_chat_streaming_tool_calling | ✅ | ✅ | diff --git a/tests/verifications/generate_report.py b/tests/verifications/generate_report.py index 1c760ca19..6a7c39ee2 100755 --- a/tests/verifications/generate_report.py +++ b/tests/verifications/generate_report.py @@ -77,8 +77,9 @@ def run_tests(provider, keyword=None): print(f"Running tests for provider: {provider}") timestamp = int(time.time()) - result_file = RESULTS_DIR / f"{provider}_{timestamp}.json" - temp_json_file = RESULTS_DIR / f"temp_{provider}_{timestamp}.json" + # Use a constant filename for the final result and temp file + result_file = RESULTS_DIR / f"{provider}.json" + temp_json_file = RESULTS_DIR / f"temp_{provider}.json" # Determine project root directory relative to this script project_root = Path(__file__).parent.parent.parent @@ -106,11 +107,12 @@ def run_tests(provider, keyword=None): # Check if the JSON file was created if temp_json_file.exists(): - # Read the JSON file and save it to our results format with open(temp_json_file, "r") as f: test_results = json.load(f) - # Save results to our own format with a trailing newline + test_results["run_timestamp"] = timestamp + + # Save results to the final (overwritten) file with open(result_file, "w") as f: json.dump(test_results, f, indent=2) f.write("\n") # Add a trailing newline for precommit @@ -132,7 +134,7 @@ def run_tests(provider, keyword=None): def parse_results( result_file, -) -> Tuple[DefaultDict[str, DefaultDict[str, Dict[str, bool]]], DefaultDict[str, Set[str]], Set[str]]: +) -> Tuple[DefaultDict[str, DefaultDict[str, Dict[str, bool]]], DefaultDict[str, Set[str]], Set[str], str]: """Parse a single test results file. Returns: @@ -140,11 +142,12 @@ def parse_results( - parsed_results: DefaultDict[provider, DefaultDict[model, Dict[test_name, pass_status]]] - providers_in_file: DefaultDict[provider, Set[model]] found in this file. - tests_in_file: Set[test_name] found in this file. + - run_timestamp: Timestamp when the test was run """ if not os.path.exists(result_file): print(f"Results file does not exist: {result_file}") # Return empty defaultdicts/set matching the type hint - return defaultdict(lambda: defaultdict(dict)), defaultdict(set), set() + return defaultdict(lambda: defaultdict(dict)), defaultdict(set), set(), "" with open(result_file, "r") as f: results = json.load(f) @@ -153,7 +156,16 @@ def parse_results( parsed_results: DefaultDict[str, DefaultDict[str, Dict[str, bool]]] = defaultdict(lambda: defaultdict(dict)) providers_in_file: DefaultDict[str, Set[str]] = defaultdict(set) tests_in_file: Set[str] = set() - provider: str = os.path.basename(result_file).split("_")[0] + # Extract provider from filename (e.g., "openai.json" -> "openai") + provider: str = result_file.stem + + # Extract run timestamp from the JSON data + run_timestamp_unix = results.get("run_timestamp") + run_timestamp_str = ( + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(run_timestamp_unix)) + if run_timestamp_unix is not None + else "Unknown" + ) # Debug: Print summary of test results print(f"Test results summary for {provider}:") @@ -167,7 +179,7 @@ def parse_results( if "tests" not in results or not results["tests"]: print(f"No test results found in {result_file}") # Return empty defaultdicts/set matching the type hint - return defaultdict(lambda: defaultdict(dict)), defaultdict(set), set() + return defaultdict(lambda: defaultdict(dict)), defaultdict(set), set(), "" # Process the tests for test in results["tests"]: @@ -225,59 +237,29 @@ def parse_results( if not parsed_results.get(provider): print(f"Warning: No valid test results parsed for provider {provider} from file {result_file}") - return parsed_results, providers_in_file, tests_in_file + return parsed_results, providers_in_file, tests_in_file, run_timestamp_str -def cleanup_old_results(providers_to_clean: Dict[str, Set[str]]): - """Clean up old test result files, keeping only the newest N per provider.""" - # Use the passed-in providers dictionary - for provider in providers_to_clean.keys(): - # Get all result files for this provider - provider_files = list(RESULTS_DIR.glob(f"{provider}_*.json")) - - # Sort by timestamp (newest first) - provider_files.sort(key=lambda x: int(x.stem.split("_")[1]), reverse=True) - - # Remove old files beyond the max to keep - if len(provider_files) > MAX_RESULTS_PER_PROVIDER: - for old_file in provider_files[MAX_RESULTS_PER_PROVIDER:]: - try: - old_file.unlink() - print(f"Removed old result file: {old_file}") - except Exception as e: - print(f"Error removing file {old_file}: {e}") - - -def get_latest_results_by_provider(): - """Get the latest test result file for each provider""" +def get_all_result_files_by_provider(): + """Get all test result files, keyed by provider.""" provider_results = {} - # Get all result files result_files = list(RESULTS_DIR.glob("*.json")) - # Extract all provider names from filenames - all_providers = set() for file in result_files: - # File format is provider_timestamp.json - parts = file.stem.split("_") - if len(parts) >= 2: - all_providers.add(parts[0]) - - # Group by provider - for provider in all_providers: - provider_files = [f for f in result_files if f.name.startswith(f"{provider}_")] - - # Sort by timestamp (newest first) - provider_files.sort(key=lambda x: int(x.stem.split("_")[1]), reverse=True) - - if provider_files: - provider_results[provider] = provider_files[0] + provider = file.stem + if provider: + provider_results[provider] = file return provider_results def generate_report( - results_dict: Dict[str, Any], providers: Dict[str, Set[str]], all_tests: Set[str], output_file=None + results_dict: Dict[str, Any], + providers: Dict[str, Set[str]], + all_tests: Set[str], + provider_timestamps: Dict[str, str], + output_file=None, ): """Generate the markdown report. @@ -285,6 +267,7 @@ def generate_report( results_dict: Aggregated results [provider][model][test_name] -> status. providers: Dict of all providers and their models {provider: {models}}. all_tests: Set of all test names found. + provider_timestamps: Dict of provider to timestamp when tests were run output_file: Optional path to save the report. """ if output_file is None: @@ -293,19 +276,6 @@ def generate_report( else: output_file = Path(output_file) - # Get the timestamp from result files - provider_timestamps = {} - provider_results_files = get_latest_results_by_provider() - for provider, result_file in provider_results_files.items(): - # Extract timestamp from filename (format: provider_timestamp.json) - try: - timestamp_str = result_file.stem.split("_")[1] - timestamp = int(timestamp_str) - formatted_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(timestamp)) - provider_timestamps[provider] = formatted_time - except (IndexError, ValueError): - provider_timestamps[provider] = "Unknown" - # Convert provider model sets to sorted lists (use passed-in providers dict) providers_sorted = {prov: sorted(models) for prov, models in providers.items()} @@ -416,7 +386,7 @@ def generate_report( else: example_base_test_name = first_test_name - base_name = base_test_name_map.get(test, test) # Get base name + base_name = base_test_name_map.get(first_test_name, first_test_name) # Get base name case_count = base_test_case_counts.get(base_name, 1) # Get count filter_str = f"{example_base_test_name} and {example_case_id}" if case_count > 1 else example_base_test_name @@ -491,6 +461,7 @@ def main(): # Initialize collections to aggregate results in main aggregated_providers = defaultdict(set) aggregated_tests = set() + provider_timestamps = {} if args.run_tests: # Get list of available providers from command line or use detected providers @@ -512,28 +483,28 @@ def main(): result_file = run_tests(provider, keyword=args.k) if result_file: # Parse and aggregate results - parsed_results, providers_in_file, tests_in_file = parse_results(result_file) + parsed_results, providers_in_file, tests_in_file, run_timestamp = parse_results(result_file) all_results.update(parsed_results) for prov, models in providers_in_file.items(): aggregated_providers[prov].update(models) + if run_timestamp: + provider_timestamps[prov] = run_timestamp aggregated_tests.update(tests_in_file) else: # Use existing results - provider_result_files = get_latest_results_by_provider() + provider_result_files = get_all_result_files_by_provider() for result_file in provider_result_files.values(): # Parse and aggregate results - parsed_results, providers_in_file, tests_in_file = parse_results(result_file) + parsed_results, providers_in_file, tests_in_file, run_timestamp = parse_results(result_file) all_results.update(parsed_results) for prov, models in providers_in_file.items(): aggregated_providers[prov].update(models) + if run_timestamp: + provider_timestamps[prov] = run_timestamp aggregated_tests.update(tests_in_file) - # Generate the report, passing aggregated data - generate_report(all_results, aggregated_providers, aggregated_tests, args.output) - - # Cleanup, passing aggregated providers - cleanup_old_results(aggregated_providers) + generate_report(all_results, aggregated_providers, aggregated_tests, provider_timestamps, args.output) if __name__ == "__main__": diff --git a/tests/verifications/test_results/fireworks_1744264202.json b/tests/verifications/test_results/fireworks.json similarity index 68% rename from tests/verifications/test_results/fireworks_1744264202.json rename to tests/verifications/test_results/fireworks.json index d14738be9..061e44c08 100644 --- a/tests/verifications/test_results/fireworks_1744264202.json +++ b/tests/verifications/test_results/fireworks.json @@ -1,15 +1,15 @@ { - "created": 1744264258.730061, - "duration": 53.86071586608887, + "created": 1744328795.171092, + "duration": 107.57908606529236, "exitcode": 1, "root": "/Users/erichuang/projects/llama-stack", "environment": {}, "summary": { "passed": 28, "skipped": 2, - "failed": 3, - "total": 33, - "collected": 33 + "failed": 6, + "total": 36, + "collected": 36 }, "collectors": [ { @@ -29,167 +29,182 @@ { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]", "type": "Function", - "lineno": 72 + "lineno": 73 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]", "type": "Function", - "lineno": 72 + "lineno": 73 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]", "type": "Function", - "lineno": 72 + "lineno": 73 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]", "type": "Function", - "lineno": 72 + "lineno": 73 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]", "type": "Function", - "lineno": 72 + "lineno": 73 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]", "type": "Function", - "lineno": 72 + "lineno": 73 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]", "type": "Function", - "lineno": 91 + "lineno": 92 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]", "type": "Function", - "lineno": 91 + "lineno": 92 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]", "type": "Function", - "lineno": 91 + "lineno": 92 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]", "type": "Function", - "lineno": 91 + "lineno": 92 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]", "type": "Function", - "lineno": 91 + "lineno": 92 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]", "type": "Function", - "lineno": 91 + "lineno": 92 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", "type": "Function", - "lineno": 115 + "lineno": 116 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", "type": "Function", - "lineno": 115 + "lineno": 116 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", "type": "Function", - "lineno": 115 + "lineno": 116 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", "type": "Function", - "lineno": 134 + "lineno": 135 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", "type": "Function", - "lineno": 134 + "lineno": 135 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", "type": "Function", - "lineno": 134 + "lineno": 135 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]", "type": "Function", - "lineno": 158 + "lineno": 159 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]", "type": "Function", - "lineno": 158 + "lineno": 159 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]", "type": "Function", - "lineno": 158 + "lineno": 159 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]", "type": "Function", - "lineno": 158 + "lineno": 159 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]", "type": "Function", - "lineno": 158 + "lineno": 159 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]", "type": "Function", - "lineno": 158 + "lineno": 159 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]", "type": "Function", - "lineno": 181 + "lineno": 182 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]", "type": "Function", - "lineno": 181 + "lineno": 182 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]", "type": "Function", - "lineno": 181 + "lineno": 182 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]", "type": "Function", - "lineno": 181 + "lineno": 182 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]", "type": "Function", - "lineno": 181 + "lineno": 182 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]", "type": "Function", - "lineno": 181 + "lineno": 182 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", "type": "Function", - "lineno": 203 + "lineno": 204 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", "type": "Function", - "lineno": 203 + "lineno": 204 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", "type": "Function", - "lineno": 203 + "lineno": 204 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", + "type": "Function", + "lineno": 228 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", + "type": "Function", + "lineno": 228 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", + "type": "Function", + "lineno": 228 } ] } @@ -197,7 +212,7 @@ "tests": [ { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]", - "lineno": 72, + "lineno": 73, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]", @@ -216,21 +231,21 @@ "case_id": "earth" }, "setup": { - "duration": 0.05236550001427531, + "duration": 0.2175025000469759, "outcome": "passed" }, "call": { - "duration": 0.5364967910572886, + "duration": 0.7433859170414507, "outcome": "passed" }, "teardown": { - "duration": 0.00015075004193931818, + "duration": 0.0001592918997630477, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]", - "lineno": 72, + "lineno": 73, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]", @@ -249,21 +264,21 @@ "case_id": "saturn" }, "setup": { - "duration": 0.00699599995277822, + "duration": 0.007383499993011355, "outcome": "passed" }, "call": { - "duration": 0.5843954589217901, + "duration": 0.5949292909353971, "outcome": "passed" }, "teardown": { - "duration": 0.0003858329728245735, + "duration": 0.00015891704242676497, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]", - "lineno": 72, + "lineno": 73, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]", @@ -282,21 +297,21 @@ "case_id": "earth" }, "setup": { - "duration": 0.009176500025205314, + "duration": 0.010730999987572432, "outcome": "passed" }, "call": { - "duration": 0.9258683329680935, + "duration": 0.8945954169612378, "outcome": "passed" }, "teardown": { - "duration": 0.00015787500888109207, + "duration": 0.0003751249751076102, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]", - "lineno": 72, + "lineno": 73, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]", @@ -315,21 +330,21 @@ "case_id": "saturn" }, "setup": { - "duration": 0.011275375029072165, + "duration": 0.01665666699409485, "outcome": "passed" }, "call": { - "duration": 0.6890578339807689, + "duration": 0.907927209045738, "outcome": "passed" }, "teardown": { - "duration": 0.0004926669644191861, + "duration": 0.00024874997325241566, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]", - "lineno": 72, + "lineno": 73, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]", @@ -348,21 +363,21 @@ "case_id": "earth" }, "setup": { - "duration": 0.007520624902099371, + "duration": 0.01039199996739626, "outcome": "passed" }, "call": { - "duration": 0.6675686669768766, + "duration": 0.5971567500382662, "outcome": "passed" }, "teardown": { - "duration": 0.00016137503553181887, + "duration": 0.0003488330403342843, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]", - "lineno": 72, + "lineno": 73, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]", @@ -381,21 +396,21 @@ "case_id": "saturn" }, "setup": { - "duration": 0.0076431670458987355, + "duration": 0.018627874902449548, "outcome": "passed" }, "call": { - "duration": 1.6813415409997106, + "duration": 2.0586736251134425, "outcome": "passed" }, "teardown": { - "duration": 0.0004928340204060078, + "duration": 0.00046974990982562304, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]", - "lineno": 91, + "lineno": 92, "outcome": "passed", "keywords": [ "test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]", @@ -414,21 +429,21 @@ "case_id": "earth" }, "setup": { - "duration": 0.01302404107991606, + "duration": 0.01706262503284961, "outcome": "passed" }, "call": { - "duration": 1.3206909999717027, + "duration": 0.6679969580145553, "outcome": "passed" }, "teardown": { - "duration": 0.0002220839960500598, + "duration": 0.0004670419730246067, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]", - "lineno": 91, + "lineno": 92, "outcome": "passed", "keywords": [ "test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]", @@ -447,21 +462,21 @@ "case_id": "saturn" }, "setup": { - "duration": 0.0071772499941289425, + "duration": 0.025956374942325056, "outcome": "passed" }, "call": { - "duration": 0.4109888339880854, + "duration": 2.052679874934256, "outcome": "passed" }, "teardown": { - "duration": 0.0005431669997051358, + "duration": 0.00026958296075463295, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]", - "lineno": 91, + "lineno": 92, "outcome": "passed", "keywords": [ "test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]", @@ -480,21 +495,21 @@ "case_id": "earth" }, "setup": { - "duration": 0.012043708004057407, + "duration": 0.015856957994401455, "outcome": "passed" }, "call": { - "duration": 0.4509220840409398, + "duration": 0.3096678329166025, "outcome": "passed" }, "teardown": { - "duration": 0.00016408402007073164, + "duration": 0.0007620420074090362, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]", - "lineno": 91, + "lineno": 92, "outcome": "passed", "keywords": [ "test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]", @@ -513,21 +528,21 @@ "case_id": "saturn" }, "setup": { - "duration": 0.007165874936617911, + "duration": 0.013509334065020084, "outcome": "passed" }, "call": { - "duration": 0.6527335830032825, + "duration": 0.5914681670255959, "outcome": "passed" }, "teardown": { - "duration": 0.0006419579731300473, + "duration": 0.0002906669396907091, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]", - "lineno": 91, + "lineno": 92, "outcome": "passed", "keywords": [ "test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]", @@ -546,21 +561,21 @@ "case_id": "earth" }, "setup": { - "duration": 0.007546542095951736, + "duration": 0.013216375024057925, "outcome": "passed" }, "call": { - "duration": 0.9360042089829221, + "duration": 1.8804527079919353, "outcome": "passed" }, "teardown": { - "duration": 0.00020483299158513546, + "duration": 0.0002026669681072235, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]", - "lineno": 91, + "lineno": 92, "outcome": "passed", "keywords": [ "test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]", @@ -579,21 +594,21 @@ "case_id": "saturn" }, "setup": { - "duration": 0.046697250101715326, + "duration": 0.00827441702131182, "outcome": "passed" }, "call": { - "duration": 0.668349124956876, + "duration": 0.7407040420221165, "outcome": "passed" }, "teardown": { - "duration": 0.0005031249020248652, + "duration": 0.0005084159784018993, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", - "lineno": 115, + "lineno": 116, "outcome": "skipped", "keywords": [ "test_chat_non_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", @@ -612,22 +627,22 @@ "case_id": "case0" }, "setup": { - "duration": 0.012287458986975253, + "duration": 0.012424499960616231, "outcome": "passed" }, "call": { - "duration": 0.00015287497080862522, + "duration": 0.00032762496266514063, "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 124, 'Skipped: Skipping test_chat_non_streaming_image for model accounts/fireworks/models/llama-v3p3-70b-instruct on provider fireworks based on config.')" + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 125, 'Skipped: Skipping test_chat_non_streaming_image for model accounts/fireworks/models/llama-v3p3-70b-instruct on provider fireworks based on config.')" }, "teardown": { - "duration": 0.00012162502389401197, + "duration": 0.00032416603062301874, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", - "lineno": 115, + "lineno": 116, "outcome": "passed", "keywords": [ "test_chat_non_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", @@ -646,21 +661,21 @@ "case_id": "case0" }, "setup": { - "duration": 0.007204124936833978, + "duration": 0.02253958396613598, "outcome": "passed" }, "call": { - "duration": 1.8676417920505628, + "duration": 2.64042466704268, "outcome": "passed" }, "teardown": { - "duration": 0.0001557499635964632, + "duration": 0.0003636250039562583, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", - "lineno": 115, + "lineno": 116, "outcome": "passed", "keywords": [ "test_chat_non_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", @@ -679,21 +694,21 @@ "case_id": "case0" }, "setup": { - "duration": 0.008226625039242208, + "duration": 0.014634749968536198, "outcome": "passed" }, "call": { - "duration": 3.2724285409785807, + "duration": 5.126485540997237, "outcome": "passed" }, "teardown": { - "duration": 0.0002898330567404628, + "duration": 0.0002988330088555813, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", - "lineno": 134, + "lineno": 135, "outcome": "skipped", "keywords": [ "test_chat_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", @@ -712,22 +727,22 @@ "case_id": "case0" }, "setup": { - "duration": 0.011927249957807362, + "duration": 0.015854416065849364, "outcome": "passed" }, "call": { - "duration": 0.00017358292825520039, + "duration": 0.00038058299105614424, "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 143, 'Skipped: Skipping test_chat_streaming_image for model accounts/fireworks/models/llama-v3p3-70b-instruct on provider fireworks based on config.')" + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 144, 'Skipped: Skipping test_chat_streaming_image for model accounts/fireworks/models/llama-v3p3-70b-instruct on provider fireworks based on config.')" }, "teardown": { - "duration": 0.00014037499204277992, + "duration": 0.0002689170651137829, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", - "lineno": 134, + "lineno": 135, "outcome": "passed", "keywords": [ "test_chat_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", @@ -746,21 +761,21 @@ "case_id": "case0" }, "setup": { - "duration": 0.008731417008675635, + "duration": 0.011205915943719447, "outcome": "passed" }, "call": { - "duration": 2.8333610829431564, + "duration": 3.2596546669956297, "outcome": "passed" }, "teardown": { - "duration": 0.0005132080987095833, + "duration": 0.0006222500232979655, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", - "lineno": 134, + "lineno": 135, "outcome": "passed", "keywords": [ "test_chat_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", @@ -779,21 +794,21 @@ "case_id": "case0" }, "setup": { - "duration": 0.016569208004511893, + "duration": 0.016557667055167258, "outcome": "passed" }, "call": { - "duration": 2.302010750048794, + "duration": 4.930164708988741, "outcome": "passed" }, "teardown": { - "duration": 0.00016108399722725153, + "duration": 0.00048687495291233063, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]", - "lineno": 158, + "lineno": 159, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]", @@ -812,21 +827,21 @@ "case_id": "calendar" }, "setup": { - "duration": 0.039960999973118305, + "duration": 0.00886166701093316, "outcome": "passed" }, "call": { - "duration": 7.661373125039972, + "duration": 0.8833738330285996, "outcome": "passed" }, "teardown": { - "duration": 0.00015833403449505568, + "duration": 0.00025583396200090647, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]", - "lineno": 158, + "lineno": 159, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]", @@ -845,21 +860,21 @@ "case_id": "math" }, "setup": { - "duration": 0.006928625050932169, + "duration": 0.01297520799562335, "outcome": "passed" }, "call": { - "duration": 2.762534625013359, + "duration": 1.9960687910206616, "outcome": "passed" }, "teardown": { - "duration": 0.0006561250193044543, + "duration": 0.0005048330640420318, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]", - "lineno": 158, + "lineno": 159, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]", @@ -878,21 +893,21 @@ "case_id": "calendar" }, "setup": { - "duration": 0.008602249901741743, + "duration": 0.007275875075720251, "outcome": "passed" }, "call": { - "duration": 0.8311484589939937, + "duration": 0.9094266659813002, "outcome": "passed" }, "teardown": { - "duration": 0.0005021670367568731, + "duration": 0.00028041598852723837, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]", - "lineno": 158, + "lineno": 159, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]", @@ -911,21 +926,21 @@ "case_id": "math" }, "setup": { - "duration": 0.015500334091484547, + "duration": 0.008899332955479622, "outcome": "passed" }, "call": { - "duration": 2.505719291046262, + "duration": 3.117967874975875, "outcome": "passed" }, "teardown": { - "duration": 0.0002619170118123293, + "duration": 0.00017600005958229303, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]", - "lineno": 158, + "lineno": 159, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]", @@ -944,21 +959,21 @@ "case_id": "calendar" }, "setup": { - "duration": 0.01948041608557105, + "duration": 0.0073364999843761325, "outcome": "passed" }, "call": { - "duration": 0.6336237500654534, + "duration": 2.2714374579954892, "outcome": "passed" }, "teardown": { - "duration": 0.00016637507360428572, + "duration": 0.0001814159331843257, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]", - "lineno": 158, + "lineno": 159, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]", @@ -977,21 +992,21 @@ "case_id": "math" }, "setup": { - "duration": 0.006810749997384846, + "duration": 0.010546459001488984, "outcome": "passed" }, "call": { - "duration": 1.9086956249084324, + "duration": 3.9954450000077486, "outcome": "passed" }, "teardown": { - "duration": 0.00018824997823685408, + "duration": 0.0002719159238040447, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]", - "lineno": 181, + "lineno": 182, "outcome": "passed", "keywords": [ "test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]", @@ -1010,21 +1025,21 @@ "case_id": "calendar" }, "setup": { - "duration": 0.007881582947447896, + "duration": 0.012508000014349818, "outcome": "passed" }, "call": { - "duration": 0.7142562499502674, + "duration": 9.095425167004578, "outcome": "passed" }, "teardown": { - "duration": 0.0007035828894004226, + "duration": 0.00029200001154094934, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]", - "lineno": 181, + "lineno": 182, "outcome": "passed", "keywords": [ "test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]", @@ -1043,21 +1058,21 @@ "case_id": "math" }, "setup": { - "duration": 0.00848070892971009, + "duration": 0.014769250061362982, "outcome": "passed" }, "call": { - "duration": 1.5210869159782305, + "duration": 1.9875252910424024, "outcome": "passed" }, "teardown": { - "duration": 0.00021216599270701408, + "duration": 0.0006288329605013132, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]", - "lineno": 181, + "lineno": 182, "outcome": "passed", "keywords": [ "test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]", @@ -1076,21 +1091,21 @@ "case_id": "calendar" }, "setup": { - "duration": 0.009669666993431747, + "duration": 0.014440709026530385, "outcome": "passed" }, "call": { - "duration": 1.3105999580584466, + "duration": 1.2613736250204965, "outcome": "passed" }, "teardown": { - "duration": 0.000588166993111372, + "duration": 0.0001937919296324253, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]", - "lineno": 181, + "lineno": 182, "outcome": "passed", "keywords": [ "test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]", @@ -1109,21 +1124,21 @@ "case_id": "math" }, "setup": { - "duration": 0.007745541981421411, + "duration": 0.0071510839043185115, "outcome": "passed" }, "call": { - "duration": 3.250162083073519, + "duration": 2.2953888749470934, "outcome": "passed" }, "teardown": { - "duration": 0.0001455000601708889, + "duration": 0.00016245793085545301, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]", - "lineno": 181, + "lineno": 182, "outcome": "passed", "keywords": [ "test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]", @@ -1142,21 +1157,21 @@ "case_id": "calendar" }, "setup": { - "duration": 0.009726207936182618, + "duration": 0.007294666953384876, "outcome": "passed" }, "call": { - "duration": 0.5564592910232022, + "duration": 2.194703874993138, "outcome": "passed" }, "teardown": { - "duration": 0.00019470800179988146, + "duration": 0.00017604196909815073, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]", - "lineno": 181, + "lineno": 182, "outcome": "passed", "keywords": [ "test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]", @@ -1175,21 +1190,21 @@ "case_id": "math" }, "setup": { - "duration": 0.018431040924042463, + "duration": 0.019950625021010637, "outcome": "passed" }, "call": { - "duration": 3.8501765420660377, + "duration": 8.4994609169662, "outcome": "passed" }, "teardown": { - "duration": 0.00015279196668416262, + "duration": 0.00026404205709695816, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", - "lineno": 203, + "lineno": 204, "outcome": "failed", "keywords": [ "test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", @@ -1208,34 +1223,34 @@ "case_id": "case0" }, "setup": { - "duration": 0.007509749964810908, + "duration": 0.011928000021725893, "outcome": "passed" }, "call": { - "duration": 0.4906975000631064, + "duration": 0.5664792089955881, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 222, + "lineno": 223, "message": "TypeError: object of type 'NoneType' has no len()" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 222, + "lineno": 223, "message": "TypeError" } ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert len(response.choices[0].message.tool_calls) > 0\nE TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:222: TypeError" + "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert len(response.choices[0].message.tool_calls) > 0\nE TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:223: TypeError" }, "teardown": { - "duration": 0.00023995805531740189, + "duration": 0.00023799994960427284, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", - "lineno": 203, + "lineno": 204, "outcome": "failed", "keywords": [ "test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", @@ -1254,34 +1269,34 @@ "case_id": "case0" }, "setup": { - "duration": 0.007144959061406553, + "duration": 0.006813624990172684, "outcome": "passed" }, "call": { - "duration": 3.818257624981925, + "duration": 3.170418416033499, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 222, + "lineno": 223, "message": "TypeError: object of type 'NoneType' has no len()" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 222, + "lineno": 223, "message": "TypeError" } ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert len(response.choices[0].message.tool_calls) > 0\nE TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:222: TypeError" + "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert len(response.choices[0].message.tool_calls) > 0\nE TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:223: TypeError" }, "teardown": { - "duration": 0.0002668750239536166, + "duration": 0.0004129580920562148, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", - "lineno": 203, + "lineno": 204, "outcome": "failed", "keywords": [ "test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", @@ -1300,30 +1315,169 @@ "case_id": "case0" }, "setup": { - "duration": 0.015290249953977764, + "duration": 0.01656208303757012, "outcome": "passed" }, "call": { - "duration": 1.5883799999719486, + "duration": 22.76337137504015, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 222, + "lineno": 223, "message": "TypeError: object of type 'NoneType' has no len()" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 222, + "lineno": 223, "message": "TypeError" } ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert len(response.choices[0].message.tool_calls) > 0\nE TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:222: TypeError" + "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert len(response.choices[0].message.tool_calls) > 0\nE TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:223: TypeError" }, "teardown": { - "duration": 0.0008049579337239265, + "duration": 0.00038704206235706806, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", + "lineno": 228, + "outcome": "failed", + "keywords": [ + "test_chat_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama-v3p3-70b-instruct-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", + "case_id": "case0" + }, + "setup": { + "duration": 0.015727541991509497, + "outcome": "passed" + }, + "call": { + "duration": 0.5719050420448184, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", + "lineno": 274, + "message": "assert 0 == 1\n + where 0 = len({})" + }, + "traceback": [ + { + "path": "tests/verifications/openai_api/test_chat_completion.py", + "lineno": 274, + "message": "AssertionError" + } + ], + "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=True,\n )\n \n # Accumulate partial tool_calls here\n tool_calls_buffer = {}\n current_id = None\n # Process streaming chunks\n for chunk in stream:\n choice = chunk.choices[0]\n delta = choice.delta\n \n if delta.tool_calls is None:\n continue\n \n for tool_call_delta in delta.tool_calls:\n if tool_call_delta.id:\n current_id = tool_call_delta.id\n call_id = current_id\n func_delta = tool_call_delta.function\n \n if call_id not in tool_calls_buffer:\n tool_calls_buffer[call_id] = {\n \"id\": call_id,\n \"type\": tool_call_delta.type,\n \"name\": func_delta.name,\n \"arguments\": \"\",\n }\n \n if func_delta.arguments:\n tool_calls_buffer[call_id][\"arguments\"] += func_delta.arguments\n \n> assert len(tool_calls_buffer) == 1\nE assert 0 == 1\nE + where 0 = len({})\n\ntests/verifications/openai_api/test_chat_completion.py:274: AssertionError" + }, + "teardown": { + "duration": 0.0003532909322530031, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", + "lineno": 228, + "outcome": "failed", + "keywords": [ + "test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-scout-instruct-basic-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-scout-instruct-basic", + "case_id": "case0" + }, + "setup": { + "duration": 0.011914041941054165, + "outcome": "passed" + }, + "call": { + "duration": 5.403063916950487, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", + "lineno": 274, + "message": "assert 0 == 1\n + where 0 = len({})" + }, + "traceback": [ + { + "path": "tests/verifications/openai_api/test_chat_completion.py", + "lineno": 274, + "message": "AssertionError" + } + ], + "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=True,\n )\n \n # Accumulate partial tool_calls here\n tool_calls_buffer = {}\n current_id = None\n # Process streaming chunks\n for chunk in stream:\n choice = chunk.choices[0]\n delta = choice.delta\n \n if delta.tool_calls is None:\n continue\n \n for tool_call_delta in delta.tool_calls:\n if tool_call_delta.id:\n current_id = tool_call_delta.id\n call_id = current_id\n func_delta = tool_call_delta.function\n \n if call_id not in tool_calls_buffer:\n tool_calls_buffer[call_id] = {\n \"id\": call_id,\n \"type\": tool_call_delta.type,\n \"name\": func_delta.name,\n \"arguments\": \"\",\n }\n \n if func_delta.arguments:\n tool_calls_buffer[call_id][\"arguments\"] += func_delta.arguments\n \n> assert len(tool_calls_buffer) == 1\nE assert 0 == 1\nE + where 0 = len({})\n\ntests/verifications/openai_api/test_chat_completion.py:274: AssertionError" + }, + "teardown": { + "duration": 0.0005193749675527215, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", + "lineno": 228, + "outcome": "failed", + "keywords": [ + "test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-maverick-instruct-basic-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", + "case_id": "case0" + }, + "setup": { + "duration": 0.012608832912519574, + "outcome": "passed" + }, + "call": { + "duration": 7.587262416025624, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", + "lineno": 274, + "message": "assert 0 == 1\n + where 0 = len({})" + }, + "traceback": [ + { + "path": "tests/verifications/openai_api/test_chat_completion.py", + "lineno": 274, + "message": "AssertionError" + } + ], + "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=True,\n )\n \n # Accumulate partial tool_calls here\n tool_calls_buffer = {}\n current_id = None\n # Process streaming chunks\n for chunk in stream:\n choice = chunk.choices[0]\n delta = choice.delta\n \n if delta.tool_calls is None:\n continue\n \n for tool_call_delta in delta.tool_calls:\n if tool_call_delta.id:\n current_id = tool_call_delta.id\n call_id = current_id\n func_delta = tool_call_delta.function\n \n if call_id not in tool_calls_buffer:\n tool_calls_buffer[call_id] = {\n \"id\": call_id,\n \"type\": tool_call_delta.type,\n \"name\": func_delta.name,\n \"arguments\": \"\",\n }\n \n if func_delta.arguments:\n tool_calls_buffer[call_id][\"arguments\"] += func_delta.arguments\n \n> assert len(tool_calls_buffer) == 1\nE assert 0 == 1\nE + where 0 = len({})\n\ntests/verifications/openai_api/test_chat_completion.py:274: AssertionError" + }, + "teardown": { + "duration": 0.0008685829816386104, "outcome": "passed" } } - ] + ], + "run_timestamp": 1744328684 } diff --git a/tests/verifications/test_results/openai_1744264304.json b/tests/verifications/test_results/openai.json similarity index 77% rename from tests/verifications/test_results/openai_1744264304.json rename to tests/verifications/test_results/openai.json index fe9c2fcac..0c1892f7e 100644 --- a/tests/verifications/test_results/openai_1744264304.json +++ b/tests/verifications/test_results/openai.json @@ -1,13 +1,13 @@ { - "created": 1744264338.9923031, - "duration": 32.825536012649536, + "created": 1744328898.0248861, + "duration": 47.561042070388794, "exitcode": 0, "root": "/Users/erichuang/projects/llama-stack", "environment": {}, "summary": { - "passed": 22, - "total": 22, - "collected": 22 + "passed": 24, + "total": 24, + "collected": 24 }, "collectors": [ { @@ -27,112 +27,122 @@ { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-earth]", "type": "Function", - "lineno": 72 + "lineno": 73 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-saturn]", "type": "Function", - "lineno": 72 + "lineno": 73 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-mini-earth]", "type": "Function", - "lineno": 72 + "lineno": 73 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-mini-saturn]", "type": "Function", - "lineno": 72 + "lineno": 73 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-earth]", "type": "Function", - "lineno": 91 + "lineno": 92 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-saturn]", "type": "Function", - "lineno": 91 + "lineno": 92 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-mini-earth]", "type": "Function", - "lineno": 91 + "lineno": 92 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-mini-saturn]", "type": "Function", - "lineno": 91 + "lineno": 92 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[gpt-4o-case0]", "type": "Function", - "lineno": 115 + "lineno": 116 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[gpt-4o-mini-case0]", "type": "Function", - "lineno": 115 + "lineno": 116 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[gpt-4o-case0]", "type": "Function", - "lineno": 134 + "lineno": 135 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[gpt-4o-mini-case0]", "type": "Function", - "lineno": 134 + "lineno": 135 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-calendar]", "type": "Function", - "lineno": 158 + "lineno": 159 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-math]", "type": "Function", - "lineno": 158 + "lineno": 159 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-mini-calendar]", "type": "Function", - "lineno": 158 + "lineno": 159 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-mini-math]", "type": "Function", - "lineno": 158 + "lineno": 159 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-calendar]", "type": "Function", - "lineno": 181 + "lineno": 182 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-math]", "type": "Function", - "lineno": 181 + "lineno": 182 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-mini-calendar]", "type": "Function", - "lineno": 181 + "lineno": 182 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-mini-math]", "type": "Function", - "lineno": 181 + "lineno": 182 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[gpt-4o-case0]", "type": "Function", - "lineno": 203 + "lineno": 204 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[gpt-4o-mini-case0]", "type": "Function", - "lineno": 203 + "lineno": 204 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[gpt-4o-case0]", + "type": "Function", + "lineno": 228 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[gpt-4o-mini-case0]", + "type": "Function", + "lineno": 228 } ] } @@ -140,7 +150,7 @@ "tests": [ { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-earth]", - "lineno": 72, + "lineno": 73, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[gpt-4o-earth]", @@ -159,21 +169,21 @@ "case_id": "earth" }, "setup": { - "duration": 0.05381445901002735, + "duration": 0.0694252080284059, "outcome": "passed" }, "call": { - "duration": 0.49848275003023446, + "duration": 0.5709165419684723, "outcome": "passed" }, "teardown": { - "duration": 0.00018287496641278267, + "duration": 0.0007626248989254236, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-saturn]", - "lineno": 72, + "lineno": 73, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[gpt-4o-saturn]", @@ -192,21 +202,21 @@ "case_id": "saturn" }, "setup": { - "duration": 0.007965500000864267, + "duration": 0.010281750001013279, "outcome": "passed" }, "call": { - "duration": 0.9293275829404593, + "duration": 0.6309260830748826, "outcome": "passed" }, "teardown": { - "duration": 0.00018229195848107338, + "duration": 0.0001824579667299986, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-mini-earth]", - "lineno": 72, + "lineno": 73, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[gpt-4o-mini-earth]", @@ -225,21 +235,21 @@ "case_id": "earth" }, "setup": { - "duration": 0.00875679193995893, + "duration": 0.007922374992631376, "outcome": "passed" }, "call": { - "duration": 0.5793640419142321, + "duration": 0.31756504194345325, "outcome": "passed" }, "teardown": { - "duration": 0.0005307920509949327, + "duration": 0.0005268750246614218, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-mini-saturn]", - "lineno": 72, + "lineno": 73, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[gpt-4o-mini-saturn]", @@ -258,21 +268,21 @@ "case_id": "saturn" }, "setup": { - "duration": 0.01076845801435411, + "duration": 0.01643404201604426, "outcome": "passed" }, "call": { - "duration": 0.8752291660057381, + "duration": 0.7479908330133185, "outcome": "passed" }, "teardown": { - "duration": 0.0004834589781239629, + "duration": 0.0004037501057609916, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-earth]", - "lineno": 91, + "lineno": 92, "outcome": "passed", "keywords": [ "test_chat_streaming_basic[gpt-4o-earth]", @@ -291,21 +301,21 @@ "case_id": "earth" }, "setup": { - "duration": 0.01662245800253004, + "duration": 0.021671707974746823, "outcome": "passed" }, "call": { - "duration": 0.8336971249664202, + "duration": 0.6701172919711098, "outcome": "passed" }, "teardown": { - "duration": 0.0024086670018732548, + "duration": 0.0005569590721279383, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-saturn]", - "lineno": 91, + "lineno": 92, "outcome": "passed", "keywords": [ "test_chat_streaming_basic[gpt-4o-saturn]", @@ -324,21 +334,21 @@ "case_id": "saturn" }, "setup": { - "duration": 0.009416291955858469, + "duration": 0.015847125090658665, "outcome": "passed" }, "call": { - "duration": 0.43594495789147913, + "duration": 0.636536999954842, "outcome": "passed" }, "teardown": { - "duration": 0.0009131249971687794, + "duration": 0.00029395800083875656, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-mini-earth]", - "lineno": 91, + "lineno": 92, "outcome": "passed", "keywords": [ "test_chat_streaming_basic[gpt-4o-mini-earth]", @@ -357,21 +367,21 @@ "case_id": "earth" }, "setup": { - "duration": 0.013155042077414691, + "duration": 0.011792832985520363, "outcome": "passed" }, "call": { - "duration": 0.6119836670113727, + "duration": 0.5610962919890881, "outcome": "passed" }, "teardown": { - "duration": 0.00023804197553545237, + "duration": 0.0003578749019652605, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-mini-saturn]", - "lineno": 91, + "lineno": 92, "outcome": "passed", "keywords": [ "test_chat_streaming_basic[gpt-4o-mini-saturn]", @@ -390,21 +400,21 @@ "case_id": "saturn" }, "setup": { - "duration": 0.009004916995763779, + "duration": 0.016500207944773138, "outcome": "passed" }, "call": { - "duration": 0.8327413749648258, + "duration": 0.8060244580265135, "outcome": "passed" }, "teardown": { - "duration": 0.00046841695439070463, + "duration": 0.0005296670133247972, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[gpt-4o-case0]", - "lineno": 115, + "lineno": 116, "outcome": "passed", "keywords": [ "test_chat_non_streaming_image[gpt-4o-case0]", @@ -423,21 +433,21 @@ "case_id": "case0" }, "setup": { - "duration": 0.009574208059348166, + "duration": 0.008338792016729712, "outcome": "passed" }, "call": { - "duration": 2.221839000005275, + "duration": 7.009252917021513, "outcome": "passed" }, "teardown": { - "duration": 0.00015945907216519117, + "duration": 0.0003042910248041153, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[gpt-4o-mini-case0]", - "lineno": 115, + "lineno": 116, "outcome": "passed", "keywords": [ "test_chat_non_streaming_image[gpt-4o-mini-case0]", @@ -456,21 +466,21 @@ "case_id": "case0" }, "setup": { - "duration": 0.0084402080392465, + "duration": 0.007238540914840996, "outcome": "passed" }, "call": { - "duration": 2.298736457945779, + "duration": 3.134693874977529, "outcome": "passed" }, "teardown": { - "duration": 0.0002423750702291727, + "duration": 0.0003104590578004718, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[gpt-4o-case0]", - "lineno": 134, + "lineno": 135, "outcome": "passed", "keywords": [ "test_chat_streaming_image[gpt-4o-case0]", @@ -489,21 +499,21 @@ "case_id": "case0" }, "setup": { - "duration": 0.007330416003242135, + "duration": 0.0161851670127362, "outcome": "passed" }, "call": { - "duration": 4.062959833070636, + "duration": 3.0745719589758664, "outcome": "passed" }, "teardown": { - "duration": 0.00015470804646611214, + "duration": 0.00022620800882577896, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[gpt-4o-mini-case0]", - "lineno": 134, + "lineno": 135, "outcome": "passed", "keywords": [ "test_chat_streaming_image[gpt-4o-mini-case0]", @@ -522,21 +532,21 @@ "case_id": "case0" }, "setup": { - "duration": 0.019998832955025136, + "duration": 0.013220708002336323, "outcome": "passed" }, "call": { - "duration": 2.609432084020227, + "duration": 3.624867417034693, "outcome": "passed" }, "teardown": { - "duration": 0.005618917057290673, + "duration": 0.00020633300300687551, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-calendar]", - "lineno": 158, + "lineno": 159, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[gpt-4o-calendar]", @@ -555,21 +565,21 @@ "case_id": "calendar" }, "setup": { - "duration": 0.00867662497330457, + "duration": 0.017596833989955485, "outcome": "passed" }, "call": { - "duration": 0.6856697499752045, + "duration": 1.248568250099197, "outcome": "passed" }, "teardown": { - "duration": 0.00018445902969688177, + "duration": 0.0004248750628903508, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-math]", - "lineno": 158, + "lineno": 159, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[gpt-4o-math]", @@ -588,21 +598,21 @@ "case_id": "math" }, "setup": { - "duration": 0.01139050000347197, + "duration": 0.01512012502644211, "outcome": "passed" }, "call": { - "duration": 2.764390083961189, + "duration": 8.170285542029887, "outcome": "passed" }, "teardown": { - "duration": 0.0003164170775562525, + "duration": 0.00043537491001188755, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-mini-calendar]", - "lineno": 158, + "lineno": 159, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[gpt-4o-mini-calendar]", @@ -621,21 +631,21 @@ "case_id": "calendar" }, "setup": { - "duration": 0.01321374997496605, + "duration": 0.010376665974035859, "outcome": "passed" }, "call": { - "duration": 0.8284227909753099, + "duration": 0.756480542011559, "outcome": "passed" }, "teardown": { - "duration": 0.00030170800164341927, + "duration": 0.00025695806834846735, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-mini-math]", - "lineno": 158, + "lineno": 159, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[gpt-4o-mini-math]", @@ -654,21 +664,21 @@ "case_id": "math" }, "setup": { - "duration": 0.013477458036504686, + "duration": 0.006846625008620322, "outcome": "passed" }, "call": { - "duration": 2.4146235829684883, + "duration": 2.6833953330060467, "outcome": "passed" }, "teardown": { - "duration": 0.00025754200760275126, + "duration": 0.00022558309137821198, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-calendar]", - "lineno": 181, + "lineno": 182, "outcome": "passed", "keywords": [ "test_chat_streaming_structured_output[gpt-4o-calendar]", @@ -687,21 +697,21 @@ "case_id": "calendar" }, "setup": { - "duration": 0.006940583931282163, + "duration": 0.009646040969528258, "outcome": "passed" }, "call": { - "duration": 0.5102092920569703, + "duration": 0.6117532079806551, "outcome": "passed" }, "teardown": { - "duration": 0.00023379107005894184, + "duration": 0.00015258300118148327, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-math]", - "lineno": 181, + "lineno": 182, "outcome": "passed", "keywords": [ "test_chat_streaming_structured_output[gpt-4o-math]", @@ -720,21 +730,21 @@ "case_id": "math" }, "setup": { - "duration": 0.007166999974288046, + "duration": 0.012024458032101393, "outcome": "passed" }, "call": { - "duration": 3.5751801669830456, + "duration": 4.522625041077845, "outcome": "passed" }, "teardown": { - "duration": 0.00015041697770357132, + "duration": 0.0004230838967487216, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-mini-calendar]", - "lineno": 181, + "lineno": 182, "outcome": "passed", "keywords": [ "test_chat_streaming_structured_output[gpt-4o-mini-calendar]", @@ -753,21 +763,21 @@ "case_id": "calendar" }, "setup": { - "duration": 0.010652625001966953, + "duration": 0.009566582972183824, "outcome": "passed" }, "call": { - "duration": 0.6648182499920949, + "duration": 2.5591942919418216, "outcome": "passed" }, "teardown": { - "duration": 0.0008647920330986381, + "duration": 0.0007555419579148293, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-mini-math]", - "lineno": 181, + "lineno": 182, "outcome": "passed", "keywords": [ "test_chat_streaming_structured_output[gpt-4o-mini-math]", @@ -786,21 +796,21 @@ "case_id": "math" }, "setup": { - "duration": 0.007372208056040108, + "duration": 0.010828875005245209, "outcome": "passed" }, "call": { - "duration": 2.80747462506406, + "duration": 2.495122667052783, "outcome": "passed" }, "teardown": { - "duration": 0.00028124998789280653, + "duration": 0.0002802090020850301, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[gpt-4o-case0]", - "lineno": 203, + "lineno": 204, "outcome": "passed", "keywords": [ "test_chat_non_streaming_tool_calling[gpt-4o-case0]", @@ -819,21 +829,21 @@ "case_id": "case0" }, "setup": { - "duration": 0.01625587500166148, + "duration": 0.012762792059220374, "outcome": "passed" }, "call": { - "duration": 0.6878769160248339, + "duration": 0.5655921660363674, "outcome": "passed" }, "teardown": { - "duration": 0.0002637499710544944, + "duration": 0.00022304197773337364, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[gpt-4o-mini-case0]", - "lineno": 203, + "lineno": 204, "outcome": "passed", "keywords": [ "test_chat_non_streaming_tool_calling[gpt-4o-mini-case0]", @@ -852,17 +862,84 @@ "case_id": "case0" }, "setup": { - "duration": 0.008817250025458634, + "duration": 0.03188708401285112, "outcome": "passed" }, "call": { - "duration": 0.7181202919455245, + "duration": 0.6159415419679135, "outcome": "passed" }, "teardown": { - "duration": 0.0017147079342976213, + "duration": 0.0005549580091610551, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[gpt-4o-case0]", + "lineno": 228, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_tool_calling[gpt-4o-case0]", + "parametrize", + "pytestmark", + "gpt-4o-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o", + "case_id": "case0" + }, + "setup": { + "duration": 0.014768208027817309, + "outcome": "passed" + }, + "call": { + "duration": 0.47373537498060614, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0005811670562252402, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[gpt-4o-mini-case0]", + "lineno": 228, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_tool_calling[gpt-4o-mini-case0]", + "parametrize", + "pytestmark", + "gpt-4o-mini-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o-mini", + "case_id": "case0" + }, + "setup": { + "duration": 0.010271625011228025, + "outcome": "passed" + }, + "call": { + "duration": 0.5656027499353513, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0025699170073494315, "outcome": "passed" } } - ] + ], + "run_timestamp": 1744328848 } diff --git a/tests/verifications/test_results/together_1744264258.json b/tests/verifications/test_results/together.json similarity index 77% rename from tests/verifications/test_results/together_1744264258.json rename to tests/verifications/test_results/together.json index c38dd52b5..2b23089e8 100644 --- a/tests/verifications/test_results/together_1744264258.json +++ b/tests/verifications/test_results/together.json @@ -1,15 +1,15 @@ { - "created": 1744264304.064288, - "duration": 42.470197916030884, + "created": 1744328847.853437, + "duration": 49.9419469833374, "exitcode": 1, "root": "/Users/erichuang/projects/llama-stack", "environment": {}, "summary": { - "passed": 21, - "failed": 10, + "passed": 22, + "failed": 12, "skipped": 2, - "total": 33, - "collected": 33 + "total": 36, + "collected": 36 }, "collectors": [ { @@ -29,167 +29,182 @@ { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", "type": "Function", - "lineno": 72 + "lineno": 73 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", "type": "Function", - "lineno": 72 + "lineno": 73 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", "type": "Function", - "lineno": 72 + "lineno": 73 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", "type": "Function", - "lineno": 72 + "lineno": 73 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", "type": "Function", - "lineno": 72 + "lineno": 73 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", "type": "Function", - "lineno": 72 + "lineno": 73 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", "type": "Function", - "lineno": 91 + "lineno": 92 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", "type": "Function", - "lineno": 91 + "lineno": 92 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", "type": "Function", - "lineno": 91 + "lineno": 92 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", "type": "Function", - "lineno": 91 + "lineno": 92 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", "type": "Function", - "lineno": 91 + "lineno": 92 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", "type": "Function", - "lineno": 91 + "lineno": 92 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "type": "Function", - "lineno": 115 + "lineno": 116 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "type": "Function", - "lineno": 115 + "lineno": 116 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "type": "Function", - "lineno": 115 + "lineno": 116 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "type": "Function", - "lineno": 134 + "lineno": 135 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "type": "Function", - "lineno": 134 + "lineno": 135 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "type": "Function", - "lineno": 134 + "lineno": 135 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", "type": "Function", - "lineno": 158 + "lineno": 159 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", "type": "Function", - "lineno": 158 + "lineno": 159 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", "type": "Function", - "lineno": 158 + "lineno": 159 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", "type": "Function", - "lineno": 158 + "lineno": 159 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", "type": "Function", - "lineno": 158 + "lineno": 159 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", "type": "Function", - "lineno": 158 + "lineno": 159 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", "type": "Function", - "lineno": 181 + "lineno": 182 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", "type": "Function", - "lineno": 181 + "lineno": 182 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", "type": "Function", - "lineno": 181 + "lineno": 182 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", "type": "Function", - "lineno": 181 + "lineno": 182 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", "type": "Function", - "lineno": 181 + "lineno": 182 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", "type": "Function", - "lineno": 181 + "lineno": 182 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "type": "Function", - "lineno": 203 + "lineno": 204 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "type": "Function", - "lineno": 203 + "lineno": 204 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "type": "Function", - "lineno": 203 + "lineno": 204 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", + "type": "Function", + "lineno": 228 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", + "type": "Function", + "lineno": 228 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", + "type": "Function", + "lineno": 228 } ] } @@ -197,7 +212,7 @@ "tests": [ { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", - "lineno": 72, + "lineno": 73, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", @@ -216,21 +231,21 @@ "case_id": "earth" }, "setup": { - "duration": 0.06113254197407514, + "duration": 0.15774220903404057, "outcome": "passed" }, "call": { - "duration": 1.0720349580515176, + "duration": 0.5396400419995189, "outcome": "passed" }, "teardown": { - "duration": 0.00015966698992997408, + "duration": 0.0002977499971166253, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", - "lineno": 72, + "lineno": 73, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", @@ -249,21 +264,21 @@ "case_id": "saturn" }, "setup": { - "duration": 0.006908083101734519, + "duration": 0.015632833004929125, "outcome": "passed" }, "call": { - "duration": 0.5013210839824751, + "duration": 0.4675290420418605, "outcome": "passed" }, "teardown": { - "duration": 0.0005375830223783851, + "duration": 0.00029129208996891975, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", - "lineno": 72, + "lineno": 73, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", @@ -282,21 +297,21 @@ "case_id": "earth" }, "setup": { - "duration": 0.006910792086273432, + "duration": 0.01530187507160008, "outcome": "passed" }, "call": { - "duration": 0.5142245410243049, + "duration": 0.501894542016089, "outcome": "passed" }, "teardown": { - "duration": 0.0004069580463692546, + "duration": 0.0002060839906334877, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", - "lineno": 72, + "lineno": 73, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", @@ -315,21 +330,21 @@ "case_id": "saturn" }, "setup": { - "duration": 0.009730000048875809, + "duration": 0.014841833035461605, "outcome": "passed" }, "call": { - "duration": 0.40133179200347513, + "duration": 0.4202229160582647, "outcome": "passed" }, "teardown": { - "duration": 0.0004558749496936798, + "duration": 0.0005559159908443689, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", - "lineno": 72, + "lineno": 73, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", @@ -348,21 +363,21 @@ "case_id": "earth" }, "setup": { - "duration": 0.008247417048551142, + "duration": 0.008204624988138676, "outcome": "passed" }, "call": { - "duration": 0.7914331250358373, + "duration": 1.991508833016269, "outcome": "passed" }, "teardown": { - "duration": 0.00020262505859136581, + "duration": 0.000539042055606842, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", - "lineno": 72, + "lineno": 73, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", @@ -381,21 +396,21 @@ "case_id": "saturn" }, "setup": { - "duration": 0.00922900007572025, + "duration": 0.022528667002916336, "outcome": "passed" }, "call": { - "duration": 1.2742049579974264, + "duration": 0.37111237505450845, "outcome": "passed" }, "teardown": { - "duration": 0.000688415952026844, + "duration": 0.0005334159359335899, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", - "lineno": 91, + "lineno": 92, "outcome": "passed", "keywords": [ "test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", @@ -414,21 +429,21 @@ "case_id": "earth" }, "setup": { - "duration": 0.006949124974198639, + "duration": 0.00922920904122293, "outcome": "passed" }, "call": { - "duration": 0.4681705000111833, + "duration": 1.1684916669037193, "outcome": "passed" }, "teardown": { - "duration": 0.00017795804888010025, + "duration": 0.0002740409690886736, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", - "lineno": 91, + "lineno": 92, "outcome": "passed", "keywords": [ "test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", @@ -447,21 +462,21 @@ "case_id": "saturn" }, "setup": { - "duration": 0.008564374991692603, + "duration": 0.010883333045057952, "outcome": "passed" }, "call": { - "duration": 1.7430362500017509, + "duration": 0.4275277080014348, "outcome": "passed" }, "teardown": { - "duration": 0.00015312491450458765, + "duration": 0.00043112505227327347, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", - "lineno": 91, + "lineno": 92, "outcome": "failed", "keywords": [ "test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", @@ -480,34 +495,34 @@ "case_id": "earth" }, "setup": { - "duration": 0.007404124946333468, + "duration": 0.012945958063937724, "outcome": "passed" }, "call": { - "duration": 0.515926624997519, + "duration": 0.5551295839250088, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 109, + "lineno": 110, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 109, + "lineno": 110, "message": "IndexError" } ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'earth', 'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:109: IndexError" + "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'earth', 'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:110: IndexError" }, "teardown": { - "duration": 0.0002389999572187662, + "duration": 0.0002744169905781746, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", - "lineno": 91, + "lineno": 92, "outcome": "failed", "keywords": [ "test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", @@ -526,34 +541,34 @@ "case_id": "saturn" }, "setup": { - "duration": 0.0071305419551208615, + "duration": 0.017372542060911655, "outcome": "passed" }, "call": { - "duration": 0.37054662499576807, + "duration": 0.3579877089941874, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 109, + "lineno": 110, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 109, + "lineno": 110, "message": "IndexError" } ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'saturn', 'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:109: IndexError" + "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'saturn', 'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:110: IndexError" }, "teardown": { - "duration": 0.0006014580139890313, + "duration": 0.0005445419810712337, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", - "lineno": 91, + "lineno": 92, "outcome": "failed", "keywords": [ "test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", @@ -572,34 +587,34 @@ "case_id": "earth" }, "setup": { - "duration": 0.007489709067158401, + "duration": 0.014297832967713475, "outcome": "passed" }, "call": { - "duration": 0.7767745839664713, + "duration": 0.8067362919682637, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 109, + "lineno": 110, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 109, + "lineno": 110, "message": "IndexError" } ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'earth', 'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:109: IndexError" + "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'earth', 'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:110: IndexError" }, "teardown": { - "duration": 0.00025491707492619753, + "duration": 0.0003220830112695694, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", - "lineno": 91, + "lineno": 92, "outcome": "failed", "keywords": [ "test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", @@ -618,34 +633,34 @@ "case_id": "saturn" }, "setup": { - "duration": 0.006736499955877662, + "duration": 0.008816750021651387, "outcome": "passed" }, "call": { - "duration": 0.43948554201051593, + "duration": 0.5383605000097305, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 109, + "lineno": 110, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 109, + "lineno": 110, "message": "IndexError" } ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'saturn', 'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:109: IndexError" + "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'saturn', 'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:110: IndexError" }, "teardown": { - "duration": 0.0002264160430058837, + "duration": 0.00018316600471735, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", - "lineno": 115, + "lineno": 116, "outcome": "skipped", "keywords": [ "test_chat_non_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", @@ -664,22 +679,22 @@ "case_id": "case0" }, "setup": { - "duration": 0.007171708042733371, + "duration": 0.0074389580404385924, "outcome": "passed" }, "call": { - "duration": 0.00013554200995713472, + "duration": 0.00014933396596461535, "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 124, 'Skipped: Skipping test_chat_non_streaming_image for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')" + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 125, 'Skipped: Skipping test_chat_non_streaming_image for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')" }, "teardown": { - "duration": 0.0001235839445143938, + "duration": 0.00012462493032217026, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "lineno": 115, + "lineno": 116, "outcome": "passed", "keywords": [ "test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", @@ -698,21 +713,21 @@ "case_id": "case0" }, "setup": { - "duration": 0.008639499894343317, + "duration": 0.013580625061877072, "outcome": "passed" }, "call": { - "duration": 1.4001279999502003, + "duration": 2.89831429196056, "outcome": "passed" }, "teardown": { - "duration": 0.00014812499284744263, + "duration": 0.000491458922624588, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", - "lineno": 115, + "lineno": 116, "outcome": "passed", "keywords": [ "test_chat_non_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", @@ -731,21 +746,21 @@ "case_id": "case0" }, "setup": { - "duration": 0.015450250008143485, + "duration": 0.008266666904091835, "outcome": "passed" }, "call": { - "duration": 3.3522649579681456, + "duration": 3.8873212080216035, "outcome": "passed" }, "teardown": { - "duration": 0.00041629199404269457, + "duration": 0.00016850000247359276, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", - "lineno": 134, + "lineno": 135, "outcome": "skipped", "keywords": [ "test_chat_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", @@ -764,22 +779,22 @@ "case_id": "case0" }, "setup": { - "duration": 0.007634000037796795, + "duration": 0.0080461660400033, "outcome": "passed" }, "call": { - "duration": 0.0001563339028507471, + "duration": 0.00014758307952433825, "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 143, 'Skipped: Skipping test_chat_streaming_image for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')" + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 144, 'Skipped: Skipping test_chat_streaming_image for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')" }, "teardown": { - "duration": 0.0001324999611824751, + "duration": 0.00012695800978690386, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "lineno": 134, + "lineno": 135, "outcome": "failed", "keywords": [ "test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", @@ -798,34 +813,34 @@ "case_id": "case0" }, "setup": { - "duration": 0.007050334010273218, + "duration": 0.00845700001809746, "outcome": "passed" }, "call": { - "duration": 1.7063317500287667, + "duration": 1.6604419159702957, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 152, + "lineno": 153, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 152, + "lineno": 153, "message": "IndexError" } ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_image(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:152: IndexError" + "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_image(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:153: IndexError" }, "teardown": { - "duration": 0.0002109999768435955, + "duration": 0.00033458403777331114, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", - "lineno": 134, + "lineno": 135, "outcome": "failed", "keywords": [ "test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", @@ -844,34 +859,34 @@ "case_id": "case0" }, "setup": { - "duration": 0.006729208980686963, + "duration": 0.012580333976075053, "outcome": "passed" }, "call": { - "duration": 3.829621708020568, + "duration": 4.728511792025529, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 152, + "lineno": 153, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 152, + "lineno": 153, "message": "IndexError" } ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_image(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:152: IndexError" + "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_image(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:153: IndexError" }, "teardown": { - "duration": 0.0002882500411942601, + "duration": 0.00023266696371138096, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", - "lineno": 158, + "lineno": 159, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", @@ -890,21 +905,21 @@ "case_id": "calendar" }, "setup": { - "duration": 0.007713916013017297, + "duration": 0.011554082971997559, "outcome": "passed" }, "call": { - "duration": 2.48285808309447, + "duration": 1.3857994999270886, "outcome": "passed" }, "teardown": { - "duration": 0.00020350003615021706, + "duration": 0.0003951250109821558, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", - "lineno": 158, + "lineno": 159, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", @@ -923,21 +938,21 @@ "case_id": "math" }, "setup": { - "duration": 0.010098082944750786, + "duration": 0.007673708954825997, "outcome": "passed" }, "call": { - "duration": 1.6994713749736547, + "duration": 3.082161583006382, "outcome": "passed" }, "teardown": { - "duration": 0.00014512497000396252, + "duration": 0.0002532500075176358, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", - "lineno": 158, + "lineno": 159, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", @@ -956,21 +971,21 @@ "case_id": "calendar" }, "setup": { - "duration": 0.006934792036190629, + "duration": 0.014791041961871088, "outcome": "passed" }, "call": { - "duration": 1.277176082949154, + "duration": 0.6918012499809265, "outcome": "passed" }, "teardown": { - "duration": 0.0004985419800505042, + "duration": 0.00027070799842476845, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", - "lineno": 158, + "lineno": 159, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", @@ -989,21 +1004,21 @@ "case_id": "math" }, "setup": { - "duration": 0.012558708898723125, + "duration": 0.014746625092811882, "outcome": "passed" }, "call": { - "duration": 2.442075416096486, + "duration": 3.5890139170223847, "outcome": "passed" }, "teardown": { - "duration": 0.0003505420172587037, + "duration": 0.00030137505382299423, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", - "lineno": 158, + "lineno": 159, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", @@ -1022,21 +1037,21 @@ "case_id": "calendar" }, "setup": { - "duration": 0.012642999994568527, + "duration": 0.036798374960199, "outcome": "passed" }, "call": { - "duration": 0.9305703329155222, + "duration": 0.6914895409718156, "outcome": "passed" }, "teardown": { - "duration": 0.00016004196368157864, + "duration": 0.00023716699797660112, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", - "lineno": 158, + "lineno": 159, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", @@ -1055,21 +1070,21 @@ "case_id": "math" }, "setup": { - "duration": 0.008792415959760547, + "duration": 0.05965254199691117, "outcome": "passed" }, "call": { - "duration": 2.194098167004995, + "duration": 2.609581291093491, "outcome": "passed" }, "teardown": { - "duration": 0.0003667499404400587, + "duration": 0.0002674580318853259, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", - "lineno": 181, + "lineno": 182, "outcome": "passed", "keywords": [ "test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", @@ -1088,21 +1103,21 @@ "case_id": "calendar" }, "setup": { - "duration": 0.01219504198525101, + "duration": 0.014533916022628546, "outcome": "passed" }, "call": { - "duration": 2.045097667025402, + "duration": 0.6227063750848174, "outcome": "passed" }, "teardown": { - "duration": 0.00029958400409668684, + "duration": 0.00019699998665601015, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", - "lineno": 181, + "lineno": 182, "outcome": "passed", "keywords": [ "test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", @@ -1121,21 +1136,21 @@ "case_id": "math" }, "setup": { - "duration": 0.014203459024429321, + "duration": 0.009818125050514936, "outcome": "passed" }, "call": { - "duration": 1.3079068749211729, + "duration": 5.144610875053331, "outcome": "passed" }, "teardown": { - "duration": 0.0001914579188451171, + "duration": 0.00045220903120934963, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", - "lineno": 181, + "lineno": 182, "outcome": "failed", "keywords": [ "test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", @@ -1154,34 +1169,34 @@ "case_id": "calendar" }, "setup": { - "duration": 0.04714570892974734, + "duration": 0.012392290984280407, "outcome": "passed" }, "call": { - "duration": 0.44743770791683346, + "duration": 0.777625665999949, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 200, + "lineno": 201, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 200, + "lineno": 201, "message": "IndexError" } ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'calendar', 'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'cont...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:200: IndexError" + "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'calendar', 'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'cont...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:201: IndexError" }, "teardown": { - "duration": 0.00022199994418770075, + "duration": 0.000559916952624917, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", - "lineno": 181, + "lineno": 182, "outcome": "failed", "keywords": [ "test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", @@ -1200,34 +1215,34 @@ "case_id": "math" }, "setup": { - "duration": 0.012237709015607834, + "duration": 0.010390624986030161, "outcome": "passed" }, "call": { - "duration": 3.180020791012794, + "duration": 2.680094916955568, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 200, + "lineno": 201, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 200, + "lineno": 201, "message": "IndexError" } ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'math', 'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solut... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:200: IndexError" + "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'math', 'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solut... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:201: IndexError" }, "teardown": { - "duration": 0.000273333047516644, + "duration": 0.00041987502481788397, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", - "lineno": 181, + "lineno": 182, "outcome": "failed", "keywords": [ "test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", @@ -1246,34 +1261,34 @@ "case_id": "calendar" }, "setup": { - "duration": 0.013312208000570536, + "duration": 0.01190529193263501, "outcome": "passed" }, "call": { - "duration": 0.4110311249969527, + "duration": 0.6690819580107927, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 200, + "lineno": 201, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 200, + "lineno": 201, "message": "IndexError" } ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'calendar', 'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'cont...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:200: IndexError" + "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'calendar', 'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'cont...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:201: IndexError" }, "teardown": { - "duration": 0.00022975006140768528, + "duration": 0.000247166957706213, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", - "lineno": 181, + "lineno": 182, "outcome": "failed", "keywords": [ "test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", @@ -1292,34 +1307,34 @@ "case_id": "math" }, "setup": { - "duration": 0.006676917080767453, + "duration": 0.009588208980858326, "outcome": "passed" }, "call": { - "duration": 2.316411833046004, + "duration": 2.4867218340514228, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 200, + "lineno": 201, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 200, + "lineno": 201, "message": "IndexError" } ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'math', 'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solut... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:200: IndexError" + "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'math', 'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solut... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:201: IndexError" }, "teardown": { - "duration": 0.000245374976657331, + "duration": 0.00022487505339086056, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", - "lineno": 203, + "lineno": 204, "outcome": "passed", "keywords": [ "test_chat_non_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", @@ -1338,21 +1353,21 @@ "case_id": "case0" }, "setup": { - "duration": 0.007064500008709729, + "duration": 0.008509417064487934, "outcome": "passed" }, "call": { - "duration": 0.606806542025879, + "duration": 0.45511841599363834, "outcome": "passed" }, "teardown": { - "duration": 0.00046320806723088026, + "duration": 0.00031033402774482965, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "lineno": 203, + "lineno": 204, "outcome": "passed", "keywords": [ "test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", @@ -1371,21 +1386,21 @@ "case_id": "case0" }, "setup": { - "duration": 0.009071375010535121, + "duration": 0.01352791697718203, "outcome": "passed" }, "call": { - "duration": 0.41908070899080485, + "duration": 0.7166531670372933, "outcome": "passed" }, "teardown": { - "duration": 0.00026074994821101427, + "duration": 0.00031470798421651125, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", - "lineno": 203, + "lineno": 204, "outcome": "passed", "keywords": [ "test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", @@ -1404,17 +1419,143 @@ "case_id": "case0" }, "setup": { - "duration": 0.0068333749659359455, + "duration": 0.01369225000962615, "outcome": "passed" }, "call": { - "duration": 0.8904451669659466, + "duration": 0.34134254103992134, "outcome": "passed" }, "teardown": { - "duration": 0.0005833340110257268, + "duration": 0.0002922919811680913, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", + "lineno": 228, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", + "parametrize", + "pytestmark", + "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", + "case_id": "case0" + }, + "setup": { + "duration": 0.025748749962076545, + "outcome": "passed" + }, + "call": { + "duration": 0.7462511250050738, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00030449999030679464, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", + "lineno": 228, + "outcome": "failed", + "keywords": [ + "test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "case_id": "case0" + }, + "setup": { + "duration": 0.015131957945413888, + "outcome": "passed" + }, + "call": { + "duration": 0.4556894999695942, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", + "lineno": 251, + "message": "IndexError: list index out of range" + }, + "traceback": [ + { + "path": "tests/verifications/openai_api/test_chat_completion.py", + "lineno": 251, + "message": "IndexError" + } + ], + "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=True,\n )\n \n # Accumulate partial tool_calls here\n tool_calls_buffer = {}\n current_id = None\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:251: IndexError" + }, + "teardown": { + "duration": 0.000539042055606842, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", + "lineno": 228, + "outcome": "failed", + "keywords": [ + "test_chat_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "case_id": "case0" + }, + "setup": { + "duration": 0.016429082956165075, + "outcome": "passed" + }, + "call": { + "duration": 0.3677835420239717, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", + "lineno": 251, + "message": "IndexError: list index out of range" + }, + "traceback": [ + { + "path": "tests/verifications/openai_api/test_chat_completion.py", + "lineno": 251, + "message": "IndexError" + } + ], + "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=True,\n )\n \n # Accumulate partial tool_calls here\n tool_calls_buffer = {}\n current_id = None\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:251: IndexError" + }, + "teardown": { + "duration": 0.001610000035725534, "outcome": "passed" } } - ] + ], + "run_timestamp": 1744328795 } From 6aa459b00c55c31bcd265c6876bdb0f6f1d70123 Mon Sep 17 00:00:00 2001 From: Mark Campbell Date: Fri, 11 Apr 2025 12:04:13 +0100 Subject: [PATCH 10/10] docs: fix errors in kubernetes deployment guide (#1914) # What does this PR do? [Provide a short summary of what this PR does and why. Link to relevant issues if applicable.] Fixes a couple of errors in PVC/Secret setup and adds context for expected Hugging Face token [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan [Describe the tests you ran to verify your changes with result summaries. *Provide clear instructions so the plan can be easily re-executed.*] [//]: # (## Documentation) --- docs/source/distributions/kubernetes_deployment.md | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/docs/source/distributions/kubernetes_deployment.md b/docs/source/distributions/kubernetes_deployment.md index 2daf9d785..21ec02012 100644 --- a/docs/source/distributions/kubernetes_deployment.md +++ b/docs/source/distributions/kubernetes_deployment.md @@ -11,7 +11,12 @@ First, create a local Kubernetes cluster via Kind: kind create cluster --image kindest/node:v1.32.0 --name llama-stack-test ``` -First, create a Kubernetes PVC and Secret for downloading and storing Hugging Face model: +First set your hugging face token as an environment variable. +``` +export HF_TOKEN=$(echo -n "your-hf-token" | base64) +``` + +Now create a Kubernetes PVC and Secret for downloading and storing Hugging Face model: ``` cat </tmp/test-vllm-llama-stack/Containerfile.llama-stack-run-k8s <$tmp_dir/Containerfile.llama-stack-run-k8s <