From 5c010e234a13b064803884e5e9c1fd9ce47f3741 Mon Sep 17 00:00:00 2001 From: Michael Clifford Date: Wed, 9 Apr 2025 09:56:41 -0400 Subject: [PATCH 01/11] fix: add tavily_search option to playground api (#1909) # What does this PR do? This PR adds the "TAVILY_SEARCH_API_KEY" option to the playground to enable the use of the websearch tool. [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan ``` export TAVILY_SEARCH_API_KEY=*** streamlit run llama_stack/distribution/ui/app.py ``` Without this change the builtin websearch tool will fail due to missing API key. [//]: # (## Documentation) Related to #1902 Signed-off-by: Michael Clifford --- llama_stack/distribution/ui/modules/api.py | 1 + 1 file changed, 1 insertion(+) diff --git a/llama_stack/distribution/ui/modules/api.py b/llama_stack/distribution/ui/modules/api.py index 40caccda0..d5395c5b9 100644 --- a/llama_stack/distribution/ui/modules/api.py +++ b/llama_stack/distribution/ui/modules/api.py @@ -19,6 +19,7 @@ class LlamaStackApi: "together_api_key": os.environ.get("TOGETHER_API_KEY", ""), "sambanova_api_key": os.environ.get("SAMBANOVA_API_KEY", ""), "openai_api_key": os.environ.get("OPENAI_API_KEY", ""), + "tavily_search_api_key": os.environ.get("TAVILY_SEARCH_API_KEY", ""), }, ) From b93318e40bf8a6ad399f4fa1322456fe0e8797ef Mon Sep 17 00:00:00 2001 From: Francisco Arceo Date: Wed, 9 Apr 2025 10:40:56 -0600 Subject: [PATCH 02/11] chore: Detect browser setting for dark/light mode and set default to light mode (#1913) # What does this PR do? 1. Adding some lightweight JS to detect the default browser setting for dark/light mode 3. Setting default screen setting to light mode as to not change default behavior. From the docs: https://github.com/MrDogeBro/sphinx_rtd_dark_mode >This lets you choose which theme the user sees when they load the docs for the first time ever. After the first time however, this setting has no effect as the users preference is stored in local storage within their browser. This option accepts a boolean for the value. If this option is true (the default option), users will start in dark mode when first visiting the site. If this option is false, users will start in light mode when they first visit the site. # Closes #1915 ## Test Plan Tested locally on my Mac on Safari and Chrome. --------- Signed-off-by: Francisco Javier Arceo --- docs/_static/js/detect_theme.js | 9 +++++++++ docs/source/conf.py | 3 +++ 2 files changed, 12 insertions(+) create mode 100644 docs/_static/js/detect_theme.js diff --git a/docs/_static/js/detect_theme.js b/docs/_static/js/detect_theme.js new file mode 100644 index 000000000..484b2bb8b --- /dev/null +++ b/docs/_static/js/detect_theme.js @@ -0,0 +1,9 @@ +document.addEventListener("DOMContentLoaded", function () { + const prefersDark = window.matchMedia("(prefers-color-scheme: dark)").matches; + const htmlElement = document.documentElement; + if (prefersDark) { + htmlElement.setAttribute("data-theme", "dark"); + } else { + htmlElement.setAttribute("data-theme", "light"); + } +}); diff --git a/docs/source/conf.py b/docs/source/conf.py index 33654fe67..55c6383b2 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -112,6 +112,8 @@ html_theme_options = { # "style_nav_header_background": "#c3c9d4", } +default_dark_mode = False + html_static_path = ["../_static"] # html_logo = "../_static/llama-stack-logo.png" # html_style = "../_static/css/my_theme.css" @@ -119,6 +121,7 @@ html_static_path = ["../_static"] def setup(app): app.add_css_file("css/my_theme.css") + app.add_js_file("js/detect_theme.js") def dockerhub_role(name, rawtext, text, lineno, inliner, options={}, content=[]): url = f"https://hub.docker.com/r/llamastack/{text}" From 770b38f8b5b6139dd4e684f78b39f1868635f05f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Han?= Date: Wed, 9 Apr 2025 20:22:29 +0200 Subject: [PATCH 03/11] chore: simplify running the demo UI (#1907) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # What does this PR do? * Manage UI deps in pyproject * Use a new "ui" dep group to pull the deps with "uv" * Simplify the run command * Bump versions in requirements.txt Signed-off-by: Sébastien Han --- docs/source/playground/index.md | 4 +- llama_stack/distribution/ui/README.md | 4 +- llama_stack/distribution/ui/requirements.txt | 4 +- pyproject.toml | 6 + uv.lock | 178 +++++++++++++++++++ 5 files changed, 188 insertions(+), 8 deletions(-) diff --git a/docs/source/playground/index.md b/docs/source/playground/index.md index 9691609ab..ded2b5772 100644 --- a/docs/source/playground/index.md +++ b/docs/source/playground/index.md @@ -103,7 +103,5 @@ llama stack run together 2. Start Streamlit UI ```bash -cd llama_stack/distribution/ui -pip install -r requirements.txt -streamlit run app.py +uv run --with ".[ui]" streamlit run llama_stack/distribution/ui/app.py ``` diff --git a/llama_stack/distribution/ui/README.md b/llama_stack/distribution/ui/README.md index fe660544f..51c2d2bc2 100644 --- a/llama_stack/distribution/ui/README.md +++ b/llama_stack/distribution/ui/README.md @@ -36,9 +36,7 @@ llama-stack-client benchmarks register \ 3. Start Streamlit UI ```bash -cd llama_stack/distribution/ui -pip install -r requirements.txt -streamlit run app.py +uv run --with ".[ui]" streamlit run llama_stack/distribution/ui/app.py ``` ## Environment Variables diff --git a/llama_stack/distribution/ui/requirements.txt b/llama_stack/distribution/ui/requirements.txt index 1e0456267..61d42768d 100644 --- a/llama_stack/distribution/ui/requirements.txt +++ b/llama_stack/distribution/ui/requirements.txt @@ -1,5 +1,5 @@ streamlit pandas -llama-stack-client>=0.0.55 +llama-stack-client>=0.2.1 streamlit-option-menu -llama-stack>=0.1.9 +llama-stack>=0.2.1 diff --git a/pyproject.toml b/pyproject.toml index 8ae7ddbb6..83260b681 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -89,6 +89,12 @@ docs = [ "tomli", ] codegen = ["rich", "pydantic", "jinja2>=3.1.6"] +ui = [ + "streamlit", + "pandas", + "llama-stack-client>=0.2.1", + "streamlit-option-menu", +] [project.urls] Homepage = "https://github.com/meta-llama/llama-stack" diff --git a/uv.lock b/uv.lock index 5d7ce4076..1f7adea82 100644 --- a/uv.lock +++ b/uv.lock @@ -1,4 +1,5 @@ version = 1 +revision = 1 requires-python = ">=3.10" resolution-markers = [ "(python_full_version < '3.11' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux')", @@ -139,6 +140,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7e/b3/6b4067be973ae96ba0d615946e314c5ae35f9f993eca561b356540bb0c2b/alabaster-1.0.0-py3-none-any.whl", hash = "sha256:fc6786402dc3fcb2de3cabd5fe455a2db534b371124f1f21de8731783dec828b", size = 13929 }, ] +[[package]] +name = "altair" +version = "5.5.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jinja2" }, + { name = "jsonschema" }, + { name = "narwhals" }, + { name = "packaging" }, + { name = "typing-extensions", marker = "python_full_version < '3.14'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/16/b1/f2969c7bdb8ad8bbdda031687defdce2c19afba2aa2c8e1d2a17f78376d8/altair-5.5.0.tar.gz", hash = "sha256:d960ebe6178c56de3855a68c47b516be38640b73fb3b5111c2a9ca90546dd73d", size = 705305 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/aa/f3/0b6ced594e51cc95d8c1fc1640d3623770d01e4969d29c0bd09945fafefa/altair-5.5.0-py3-none-any.whl", hash = "sha256:91a310b926508d560fe0148d02a194f38b824122641ef528113d029fcd129f8c", size = 731200 }, +] + [[package]] name = "annotated-types" version = "0.7.0" @@ -258,6 +275,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/09/71/54e999902aed72baf26bca0d50781b01838251a462612966e9fc4891eadd/black-25.1.0-py3-none-any.whl", hash = "sha256:95e8176dae143ba9097f351d174fdaf0ccd29efb414b362ae3fd72bf0f710717", size = 207646 }, ] +[[package]] +name = "blinker" +version = "1.9.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/21/28/9b3f50ce0e048515135495f198351908d99540d69bfdc8c1d15b73dc55ce/blinker-1.9.0.tar.gz", hash = "sha256:b4ce2265a7abece45e7cc896e98dbebe6cead56bcf805a3d23136d145f5445bf", size = 22460 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/10/cb/f2ad4230dc2eb1a74edf38f1a38b9b52277f75bef262d8908e60d957e13c/blinker-1.9.0-py3-none-any.whl", hash = "sha256:ba0efaa9080b619ff2f3459d1d500c57bddea4a6b424b60a91141db6fd2f08bc", size = 8458 }, +] + [[package]] name = "blobfile" version = "3.0.0" @@ -282,6 +308,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b3/58/a255894436f3eca4a20611785a30a43b85bc75adf1b77f227e1e6d0cce0a/braintrust_core-0.0.58-py3-none-any.whl", hash = "sha256:fa272b70376d2c6692acf00ebd9fb9bae057b0c53b2b6a59a64850bf79757311", size = 4438 }, ] +[[package]] +name = "cachetools" +version = "5.5.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6c/81/3747dad6b14fa2cf53fcf10548cf5aea6913e96fab41a3c198676f8948a5/cachetools-5.5.2.tar.gz", hash = "sha256:1a661caa9175d26759571b2e19580f9d6393969e5dfca11fdb1f947a23e640d4", size = 28380 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/72/76/20fa66124dbe6be5cafeb312ece67de6b61dd91a0247d1ea13db4ebb33c2/cachetools-5.5.2-py3-none-any.whl", hash = "sha256:d26a22bcc62eb95c3beabd9f1ee5e820d3d2704fe2967cbe350e20c8ffcd3f0a", size = 10080 }, +] + [[package]] name = "certifi" version = "2025.1.31" @@ -783,6 +818,30 @@ http = [ { name = "aiohttp" }, ] +[[package]] +name = "gitdb" +version = "4.0.12" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "smmap" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/72/94/63b0fc47eb32792c7ba1fe1b694daec9a63620db1e313033d18140c2320a/gitdb-4.0.12.tar.gz", hash = "sha256:5ef71f855d191a3326fcfbc0d5da835f26b13fbcba60c32c21091c349ffdb571", size = 394684 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a0/61/5c78b91c3143ed5c14207f463aecfc8f9dbb5092fb2869baf37c273b2705/gitdb-4.0.12-py3-none-any.whl", hash = "sha256:67073e15955400952c6565cc3e707c554a4eea2e428946f7a4c162fab9bd9bcf", size = 62794 }, +] + +[[package]] +name = "gitpython" +version = "3.1.44" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "gitdb" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c0/89/37df0b71473153574a5cdef8f242de422a0f5d26d7a9e231e6f169b4ad14/gitpython-3.1.44.tar.gz", hash = "sha256:c87e30b26253bf5418b01b0660f818967f3c503193838337fe5e573331249269", size = 214196 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1d/9a/4114a9057db2f1462d5c8f8390ab7383925fe1ac012eaa42402ad65c2963/GitPython-3.1.44-py3-none-any.whl", hash = "sha256:9e0e10cda9bed1ee64bc9a6de50e7e38a9c9943241cd7f585f6df3ed28011110", size = 207599 }, +] + [[package]] name = "googleapis-common-protos" version = "1.67.0" @@ -1386,6 +1445,12 @@ test = [ { name = "torchvision", version = "0.21.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or sys_platform == 'darwin'" }, { name = "torchvision", version = "0.21.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, ] +ui = [ + { name = "llama-stack-client" }, + { name = "pandas" }, + { name = "streamlit" }, + { name = "streamlit-option-menu" }, +] unit = [ { name = "aiohttp" }, { name = "aiosqlite" }, @@ -1416,6 +1481,7 @@ requires-dist = [ { name = "jinja2", marker = "extra == 'codegen'", specifier = ">=3.1.6" }, { name = "jsonschema" }, { name = "llama-stack-client", specifier = ">=0.2.1" }, + { name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.2.1" }, { name = "mcp", marker = "extra == 'test'" }, { name = "myst-parser", marker = "extra == 'docs'" }, { name = "nbval", marker = "extra == 'dev'" }, @@ -1423,6 +1489,7 @@ requires-dist = [ { name = "openai", marker = "extra == 'unit'" }, { name = "opentelemetry-exporter-otlp-proto-http", marker = "extra == 'test'" }, { name = "opentelemetry-sdk", marker = "extra == 'test'" }, + { name = "pandas", marker = "extra == 'ui'" }, { name = "pillow" }, { name = "pre-commit", marker = "extra == 'dev'" }, { name = "prompt-toolkit" }, @@ -1452,6 +1519,8 @@ requires-dist = [ { name = "sphinxcontrib-redoc", marker = "extra == 'docs'" }, { name = "sphinxcontrib-video", marker = "extra == 'docs'" }, { name = "sqlite-vec", marker = "extra == 'unit'" }, + { name = "streamlit", marker = "extra == 'ui'" }, + { name = "streamlit-option-menu", marker = "extra == 'ui'" }, { name = "termcolor" }, { name = "tiktoken" }, { name = "tomli", marker = "extra == 'docs'" }, @@ -1461,6 +1530,7 @@ requires-dist = [ { name = "types-setuptools", marker = "extra == 'dev'" }, { name = "uvicorn", marker = "extra == 'dev'" }, ] +provides-extras = ["dev", "unit", "test", "docs", "codegen", "ui"] [[package]] name = "llama-stack-client" @@ -1815,6 +1885,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5f/df/76d0321c3797b54b60fef9ec3bd6f4cfd124b9e422182156a1dd418722cf/myst_parser-4.0.1-py3-none-any.whl", hash = "sha256:9134e88959ec3b5780aedf8a99680ea242869d012e8821db3126d427edc9c95d", size = 84579 }, ] +[[package]] +name = "narwhals" +version = "1.34.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ec/1d/a21496389436e96394a6e3fb1a644d5bc382250baff76e867f0368a94068/narwhals-1.34.0.tar.gz", hash = "sha256:bdd3fa60bea1f1e8b698e483be18dd43af13290da12dba69ea16dc1f3edbb8f7", size = 265432 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/6d/875d5a7f8e14fc044ede74b94e739d7312c3c8d1a3878f649601b15fdd68/narwhals-1.34.0-py3-none-any.whl", hash = "sha256:9502b9aa5dfe125c090a3a0bbca95becfa1fac2cd67f8b80d12b1dc2ed751865", size = 325346 }, +] + [[package]] name = "nbformat" version = "5.10.4" @@ -2571,6 +2650,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0b/53/a64f03044927dc47aafe029c42a5b7aabc38dfb813475e0e1bf71c4a59d0/pydantic_settings-2.8.1-py3-none-any.whl", hash = "sha256:81942d5ac3d905f7f3ee1a70df5dfb62d5569c12f51a5a647defc1c3d9ee2e9c", size = 30839 }, ] +[[package]] +name = "pydeck" +version = "0.9.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jinja2" }, + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a1/ca/40e14e196864a0f61a92abb14d09b3d3da98f94ccb03b49cf51688140dab/pydeck-0.9.1.tar.gz", hash = "sha256:f74475ae637951d63f2ee58326757f8d4f9cd9f2a457cf42950715003e2cb605", size = 3832240 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ab/4c/b888e6cf58bd9db9c93f40d1c6be8283ff49d88919231afe93a6bcf61626/pydeck-0.9.1-py2.py3-none-any.whl", hash = "sha256:b3f75ba0d273fc917094fa61224f3f6076ca8752b93d46faf3bcfd9f9d59b038", size = 6900403 }, +] + [[package]] name = "pygments" version = "2.19.1" @@ -3220,6 +3312,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050 }, ] +[[package]] +name = "smmap" +version = "5.0.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/44/cd/a040c4b3119bbe532e5b0732286f805445375489fceaec1f48306068ee3b/smmap-5.0.2.tar.gz", hash = "sha256:26ea65a03958fa0c8a1c7e8c7a58fdc77221b8910f6be2131affade476898ad5", size = 22329 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/be/d09147ad1ec7934636ad912901c5fd7667e1c858e19d355237db0d0cd5e4/smmap-5.0.2-py3-none-any.whl", hash = "sha256:b30115f0def7d7531d22a0fb6502488d879e75b260a9db4d0819cfb25403af5e", size = 24303 }, +] + [[package]] name = "sniffio" version = "1.3.1" @@ -3502,6 +3603,47 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d9/61/f2b52e107b1fc8944b33ef56bf6ac4ebbe16d91b94d2b87ce013bf63fb84/starlette-0.45.3-py3-none-any.whl", hash = "sha256:dfb6d332576f136ec740296c7e8bb8c8a7125044e7c6da30744718880cdd059d", size = 71507 }, ] +[[package]] +name = "streamlit" +version = "1.44.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "altair" }, + { name = "blinker" }, + { name = "cachetools" }, + { name = "click" }, + { name = "gitpython" }, + { name = "numpy" }, + { name = "packaging" }, + { name = "pandas" }, + { name = "pillow" }, + { name = "protobuf" }, + { name = "pyarrow" }, + { name = "pydeck" }, + { name = "requests" }, + { name = "tenacity" }, + { name = "toml" }, + { name = "tornado" }, + { name = "typing-extensions" }, + { name = "watchdog", marker = "sys_platform != 'darwin'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3e/c0/7286284567e5045f0c587c426d0c41aee5d10c0a2e360e627a83037e9f0c/streamlit-1.44.1.tar.gz", hash = "sha256:c6914ed6d5b76870b461510476806db370f36425ae0e6654d227c988288198d3", size = 9423685 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/eb/17/fc425e1d4d86e31b2aaf0812a2ef2163763a0670d671720c7c36e8679323/streamlit-1.44.1-py3-none-any.whl", hash = "sha256:9fe355f58b11f4eb71e74f115ce1f38c4c9eaff2733e6bcffb510ac1298a5990", size = 9812242 }, +] + +[[package]] +name = "streamlit-option-menu" +version = "0.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "streamlit" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5e/27/72dc451cdaef1714fd0d75cc430e50a06c12c9046295fdf1f94af1b766eb/streamlit-option-menu-0.4.0.tar.gz", hash = "sha256:48ec69d59e547fa2fa4bfae001620df8af56a80de2f765ddbb9fcbfb84017129", size = 827290 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fd/52/2f525ad4262dc83d67297f69ec5afcee1438b9e9ae22aa318396725ddbed/streamlit_option_menu-0.4.0-py3-none-any.whl", hash = "sha256:a55fc7554047b6db371595af2182e435b8a2c715ee6124e8543685bd4670b07e", size = 829255 }, +] + [[package]] name = "sympy" version = "1.13.1" @@ -3514,6 +3656,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b2/fe/81695a1aa331a842b582453b605175f419fe8540355886031328089d840a/sympy-1.13.1-py3-none-any.whl", hash = "sha256:db36cdc64bf61b9b24578b6f7bab1ecdd2452cf008f34faa33776680c26d66f8", size = 6189177 }, ] +[[package]] +name = "tenacity" +version = "9.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0a/d4/2b0cd0fe285e14b36db076e78c93766ff1d529d70408bd1d2a5a84f1d929/tenacity-9.1.2.tar.gz", hash = "sha256:1169d376c297e7de388d18b4481760d478b0e99a777cad3a9c86e556f4b697cb", size = 48036 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e5/30/643397144bfbfec6f6ef821f36f33e57d35946c44a2352d3c9f0ae847619/tenacity-9.1.2-py3-none-any.whl", hash = "sha256:f77bf36710d8b73a50b2dd155c97b870017ad21afe6ab300326b0371b3b05138", size = 28248 }, +] + [[package]] name = "termcolor" version = "2.5.0" @@ -3559,6 +3710,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/de/a8/8f499c179ec900783ffe133e9aab10044481679bb9aad78436d239eee716/tiktoken-0.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:5ea0edb6f83dc56d794723286215918c1cde03712cbbafa0348b33448faf5b95", size = 894669 }, ] +[[package]] +name = "toml" +version = "0.10.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/be/ba/1f744cdc819428fc6b5084ec34d9b30660f6f9daaf70eead706e3203ec3c/toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f", size = 22253 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/44/6f/7120676b6d73228c96e17f1f794d8ab046fc910d781c8d151120c3f1569e/toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b", size = 16588 }, +] + [[package]] name = "tomli" version = "2.2.1" @@ -3836,6 +3996,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/93/fa/849483d56773ae29740ae70043ad88e068f98a6401aa819b5d6bee604683/virtualenv-20.29.2-py3-none-any.whl", hash = "sha256:febddfc3d1ea571bdb1dc0f98d7b45d24def7428214d4fb73cc486c9568cce6a", size = 4301478 }, ] +[[package]] +name = "watchdog" +version = "6.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/db/7d/7f3d619e951c88ed75c6037b246ddcf2d322812ee8ea189be89511721d54/watchdog-6.0.0.tar.gz", hash = "sha256:9ddf7c82fda3ae8e24decda1338ede66e1c99883db93711d8fb941eaa2d8c282", size = 131220 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a9/c7/ca4bf3e518cb57a686b2feb4f55a1892fd9a3dd13f470fca14e00f80ea36/watchdog-6.0.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7607498efa04a3542ae3e05e64da8202e58159aa1fa4acddf7678d34a35d4f13", size = 79079 }, + { url = "https://files.pythonhosted.org/packages/5c/51/d46dc9332f9a647593c947b4b88e2381c8dfc0942d15b8edc0310fa4abb1/watchdog-6.0.0-py3-none-manylinux2014_armv7l.whl", hash = "sha256:9041567ee8953024c83343288ccc458fd0a2d811d6a0fd68c4c22609e3490379", size = 79078 }, + { url = "https://files.pythonhosted.org/packages/d4/57/04edbf5e169cd318d5f07b4766fee38e825d64b6913ca157ca32d1a42267/watchdog-6.0.0-py3-none-manylinux2014_i686.whl", hash = "sha256:82dc3e3143c7e38ec49d61af98d6558288c415eac98486a5c581726e0737c00e", size = 79076 }, + { url = "https://files.pythonhosted.org/packages/ab/cc/da8422b300e13cb187d2203f20b9253e91058aaf7db65b74142013478e66/watchdog-6.0.0-py3-none-manylinux2014_ppc64.whl", hash = "sha256:212ac9b8bf1161dc91bd09c048048a95ca3a4c4f5e5d4a7d1b1a7d5752a7f96f", size = 79077 }, + { url = "https://files.pythonhosted.org/packages/2c/3b/b8964e04ae1a025c44ba8e4291f86e97fac443bca31de8bd98d3263d2fcf/watchdog-6.0.0-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:e3df4cbb9a450c6d49318f6d14f4bbc80d763fa587ba46ec86f99f9e6876bb26", size = 79078 }, + { url = "https://files.pythonhosted.org/packages/62/ae/a696eb424bedff7407801c257d4b1afda455fe40821a2be430e173660e81/watchdog-6.0.0-py3-none-manylinux2014_s390x.whl", hash = "sha256:2cce7cfc2008eb51feb6aab51251fd79b85d9894e98ba847408f662b3395ca3c", size = 79077 }, + { url = "https://files.pythonhosted.org/packages/b5/e8/dbf020b4d98251a9860752a094d09a65e1b436ad181faf929983f697048f/watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:20ffe5b202af80ab4266dcd3e91aae72bf2da48c0d33bdb15c66658e685e94e2", size = 79078 }, + { url = "https://files.pythonhosted.org/packages/07/f6/d0e5b343768e8bcb4cda79f0f2f55051bf26177ecd5651f84c07567461cf/watchdog-6.0.0-py3-none-win32.whl", hash = "sha256:07df1fdd701c5d4c8e55ef6cf55b8f0120fe1aef7ef39a1c6fc6bc2e606d517a", size = 79065 }, + { url = "https://files.pythonhosted.org/packages/db/d9/c495884c6e548fce18a8f40568ff120bc3a4b7b99813081c8ac0c936fa64/watchdog-6.0.0-py3-none-win_amd64.whl", hash = "sha256:cbafb470cf848d93b5d013e2ecb245d4aa1c8fd0504e863ccefa32445359d680", size = 79070 }, + { url = "https://files.pythonhosted.org/packages/33/e8/e40370e6d74ddba47f002a32919d91310d6074130fe4e17dabcafc15cbf1/watchdog-6.0.0-py3-none-win_ia64.whl", hash = "sha256:a1914259fa9e1454315171103c6a30961236f508b9b623eae470268bbcc6a22f", size = 79067 }, +] + [[package]] name = "watchfiles" version = "1.0.4" From e2299291c42c4d1e29506bbdc366678c8ff4d987 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Wed, 9 Apr 2025 11:28:45 -0700 Subject: [PATCH 04/11] fix: Mirror llama4 rope scaling fixes, small model simplify (#1917) See: - https://github.com/meta-llama/llama-models/pull/322 - https://github.com/meta-llama/llama-models/pull/320 --- llama_stack/models/llama/llama4/args.py | 13 ++++++ llama_stack/models/llama/llama4/model.py | 51 +++++++++++------------- 2 files changed, 36 insertions(+), 28 deletions(-) diff --git a/llama_stack/models/llama/llama4/args.py b/llama_stack/models/llama/llama4/args.py index 6d7c1d409..dd5f7cbde 100644 --- a/llama_stack/models/llama/llama4/args.py +++ b/llama_stack/models/llama/llama4/args.py @@ -70,6 +70,9 @@ class ModelArgs(BaseModel): attention_chunk_size: Optional[int] = None rope_theta: float = 500000 use_scaled_rope: bool = False + rope_scaling_factor: Optional[float] = None + rope_high_freq_factor: Optional[float] = None + nope_layer_interval: Optional[int] = None # No position encoding in every n layers use_qk_norm: bool = False # Set to True to enable inference-time temperature tuning (useful for very long context) @@ -92,4 +95,14 @@ class ModelArgs(BaseModel): f"n_heads ({self.n_heads}) must be divisible by n_kv_heads ({self.n_kv_heads})" ) assert self.dim % self.n_heads == 0, f"dim ({self.dim}) must be divisible by n_heads ({self.n_heads})" + + if self.use_scaled_rope: + # NOTE: ideally these values should have come from params.json. However, we have + # shipped the models everywhere. Only Llama-4-Scout uses scaled rope and needs these + # specific values. + if self.rope_scaling_factor is None: + self.rope_scaling_factor = 16 + if self.rope_high_freq_factor is None: + self.rope_high_freq_factor = 1 + return self diff --git a/llama_stack/models/llama/llama4/model.py b/llama_stack/models/llama/llama4/model.py index 08fac7714..2272b868d 100644 --- a/llama_stack/models/llama/llama4/model.py +++ b/llama_stack/models/llama/llama4/model.py @@ -23,37 +23,25 @@ from .ffn import FeedForward from .moe import MoE +def rmsnorm(x, eps): + def _norm(y): + return y * torch.rsqrt(y.pow(2).mean(-1, keepdim=True) + eps) + + return _norm(x.float()).type_as(x) + + class RMSNorm(torch.nn.Module): def __init__(self, dim: int, eps: float = 1e-6): super().__init__() self.eps = eps self.weight = nn.Parameter(torch.ones(dim)) - def _norm(self, x): - return x * torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + self.eps) - def forward(self, x): - output = self._norm(x.float()).type_as(x) - return output * self.weight + return rmsnorm(x, self.eps) * self.weight -class L2Norm(torch.nn.Module): - def __init__(self, dim: int, eps: float = 1e-6): - super().__init__() - self.eps = eps - - def _norm(self, x): - return x * torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + self.eps) - - def forward(self, x): - return self._norm(x.float()).type_as(x) - - -def apply_scaling(freqs: torch.Tensor): - # Values obtained from grid search - scale_factor = 8 +def apply_scaling(freqs: torch.Tensor, scale_factor: float, high_freq_factor: float): low_freq_factor = 1 - high_freq_factor = 4 old_context_len = 8192 # original llama3 length low_freq_wavelen = old_context_len / low_freq_factor @@ -72,11 +60,18 @@ def apply_scaling(freqs: torch.Tensor): return torch.tensor(new_freqs, dtype=freqs.dtype, device=freqs.device) -def precompute_freqs_cis(dim: int, end: int, theta: float = 10000.0, use_scaled: bool = False): +def precompute_freqs_cis( + dim: int, + end: int, + theta: float, + use_scaled: bool, + scale_factor: float, + high_freq_factor: float, +): freqs = 1.0 / (theta ** (torch.arange(0, dim, 2)[: (dim // 2)].float() / dim)) t = torch.arange(end, device=freqs.device, dtype=torch.float32) if use_scaled: - freqs = apply_scaling(freqs) + freqs = apply_scaling(freqs, scale_factor, high_freq_factor) freqs = torch.outer(t, freqs) freqs_cis = torch.polar(torch.ones_like(freqs), freqs) # complex64 return freqs_cis @@ -174,9 +169,7 @@ class Attention(nn.Module): self.head_dim, ) ).cuda() - self.qk_norm = None - if self.use_qk_norm: - self.qk_norm = L2Norm(args.norm_eps) + self.norm_eps = args.norm_eps self._register_load_state_dict_pre_hook(self.load_hook) def load_hook( @@ -220,8 +213,8 @@ class Attention(nn.Module): xq, xk = apply_rotary_emb(xq, xk, freqs_cis=freqs_cis) if self.use_qk_norm: - xq = self.qk_norm(xq) - xk = self.qk_norm(xk) + xq = rmsnorm(xq, self.norm_eps) + xk = rmsnorm(xk, self.norm_eps) # We are applying temperature tuning (https://arxiv.org/abs/2501.19399) to NoPE layers, where # the inference-time temperature tuning function is customized to not affect short context @@ -362,6 +355,8 @@ class Transformer(nn.Module): args.max_seq_len * 2, args.rope_theta, args.use_scaled_rope, + args.rope_scaling_factor, + args.rope_high_freq_factor, ) vision_args = self.args.vision_args if vision_args: From 36a31fe5dd3947a163d94fce7a68484beb35ded1 Mon Sep 17 00:00:00 2001 From: Jiawen Liu Date: Wed, 9 Apr 2025 15:00:12 -0700 Subject: [PATCH 05/11] fix: on-the-fly int4 quantize parameter (#1920) Mirror to https://github.com/meta-llama/llama-models/pull/324 with some clean up ``` with-proxy pip install -e . export INFERENCE_MODEL=meta-llama/Llama-4-Scout-17B-16E-Instruct export INFERENCE_CHECKPOINT_DIR=../checkpoints/Llama-4-Scout-17B-16E-Instruct export QUANTIZATION_TYPE=int4_mixed with-proxy llama stack build --run --template meta-reference-gpu ``` # What does this PR do? [Provide a short summary of what this PR does and why. Link to relevant issues if applicable.] [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan [Describe the tests you ran to verify your changes with result summaries. *Provide clear instructions so the plan can be easily re-executed.*] [//]: # (## Documentation) --- .../models/llama/llama4/quantization/loader.py | 2 +- llama_stack/models/llama/quantize_impls.py | 18 +----------------- 2 files changed, 2 insertions(+), 18 deletions(-) diff --git a/llama_stack/models/llama/llama4/quantization/loader.py b/llama_stack/models/llama/llama4/quantization/loader.py index b50432896..f11d83c60 100644 --- a/llama_stack/models/llama/llama4/quantization/loader.py +++ b/llama_stack/models/llama/llama4/quantization/loader.py @@ -91,7 +91,7 @@ def convert_to_quantized_model( log_status(f"Rank {rank}: Quantizing int4 weights from bf16") def apply_quantization(_, weight): - return quantize_int4(weight, fp8_activation_scale_ub, output_device=torch.device("cuda")) + return quantize_int4(weight, output_device=torch.device("cuda")) else: fp8_scales_path = os.path.join(checkpoint_dir, f"fp8_scales_{rank}.pt") diff --git a/llama_stack/models/llama/quantize_impls.py b/llama_stack/models/llama/quantize_impls.py index 6e1d15cf6..a5da01588 100644 --- a/llama_stack/models/llama/quantize_impls.py +++ b/llama_stack/models/llama/quantize_impls.py @@ -65,7 +65,7 @@ class Int4Weights( Int4ScaledWeights, collections.namedtuple( "Int4Weights", - ["weight", "scale", "zero_point", "shape", "activation_scale_ub"], + ["weight", "scale", "zero_point", "shape"], ), ): pass @@ -184,20 +184,13 @@ def quantize_fp8( @torch.inference_mode() def quantize_int4( w: Tensor, - fp8_activation_scale_ub: float, output_device: Optional[torch.device] = None, ) -> Int4Weights: """Quantize [n, k/2] weight tensor. Args: w (Tensor): [n, k/2] input high precision tensor to quantize. - fp8_activation_scale_ub (float): Upper bound for activation max. """ - activation_scale_ub = torch.tensor( - [fp8_activation_scale_ub], - dtype=torch.float, - device=output_device, - ) if w.ndim >= 3: wq, scale, zero_point = zip(*[int4_row_quantize(i) for i in w], strict=False) wq = torch.stack([pack_int4(i) for i in wq], dim=0) @@ -212,7 +205,6 @@ def quantize_int4( scale=scale.to(output_device), zero_point=zero_point.to(output_device), shape=wq.shape, - activation_scale_ub=activation_scale_ub, ) @@ -247,26 +239,18 @@ def load_int4( w: Tensor, scale: Tensor, zero_point: Tensor, - fp8_activation_scale_ub: float, output_device: Optional[torch.device] = None, ) -> Int4Weights: """Load INT4 [n, k/2] weight tensor. Args: w (Tensor): [n, k/2] input INT4. - fp8_activation_scale_ub (float): Upper bound for activation max. """ - activation_scale_ub = torch.tensor( - [fp8_activation_scale_ub], - dtype=torch.float, - device=output_device, - ) return Int4Weights( weight=w.to(torch.int8).to(device=output_device), scale=scale.to(device=output_device), zero_point=zero_point.to(device=output_device), shape=w.shape, - activation_scale_ub=activation_scale_ub, ) From 712c6758c68b228c3b0e8ecb4ce7d53db38ea3e4 Mon Sep 17 00:00:00 2001 From: Yuan Tang Date: Wed, 9 Apr 2025 18:43:43 -0400 Subject: [PATCH 06/11] docs: Avoid bash script syntax highlighting for dark mode (#1918) See https://github.com/meta-llama/llama-stack/pull/1913#issuecomment-2790153778 Signed-off-by: Yuan Tang --- .../distributions/kubernetes_deployment.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/source/distributions/kubernetes_deployment.md b/docs/source/distributions/kubernetes_deployment.md index 8ff3f0408..2daf9d785 100644 --- a/docs/source/distributions/kubernetes_deployment.md +++ b/docs/source/distributions/kubernetes_deployment.md @@ -7,13 +7,13 @@ In this guide, we'll use a local [Kind](https://kind.sigs.k8s.io/) cluster and a First, create a local Kubernetes cluster via Kind: -```bash +``` kind create cluster --image kindest/node:v1.32.0 --name llama-stack-test ``` First, create a Kubernetes PVC and Secret for downloading and storing Hugging Face model: -```bash +``` cat </tmp/test-vllm-llama-stack/Containerfile.llama-stack-run-k8s < Date: Thu, 10 Apr 2025 04:04:17 -0400 Subject: [PATCH 07/11] docs: Redirect instructions for additional hardware accelerators for remote vLLM provider (#1923) # What does this PR do? vLLM website just added a [new index page for installing for different hardware accelerators](https://docs.vllm.ai/en/latest/getting_started/installation.html). This PR adds a link to that page with additional edits to make sure readers are aware that the use of GPUs on this page are for demonstration purposes only. This closes https://github.com/meta-llama/llama-stack/issues/1813. Signed-off-by: Yuan Tang --- .../source/distributions/self_hosted_distro/remote-vllm.md | 7 +++++-- llama_stack/templates/remote-vllm/doc_template.md | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/docs/source/distributions/self_hosted_distro/remote-vllm.md b/docs/source/distributions/self_hosted_distro/remote-vllm.md index 457d703b3..e18b5bf40 100644 --- a/docs/source/distributions/self_hosted_distro/remote-vllm.md +++ b/docs/source/distributions/self_hosted_distro/remote-vllm.md @@ -25,7 +25,7 @@ The `llamastack/distribution-remote-vllm` distribution consists of the following | vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` | -You can use this distribution if you have GPUs and want to run an independent vLLM server container for running inference. +You can use this distribution if you want to run an independent vLLM server for inference. ### Environment Variables @@ -41,7 +41,10 @@ The following environment variables can be configured: ## Setting up vLLM server -Both AMD and NVIDIA GPUs can serve as accelerators for the vLLM server, which acts as both the LLM inference provider and the safety provider. +In the following sections, we'll use either AMD and NVIDIA GPUs to serve as hardware accelerators for the vLLM +server, which acts as both the LLM inference provider and the safety provider. Note that vLLM also +[supports many other hardware accelerators](https://docs.vllm.ai/en/latest/getting_started/installation.html) and +that we only use GPUs here for demonstration purposes. ### Setting up vLLM server on AMD GPU diff --git a/llama_stack/templates/remote-vllm/doc_template.md b/llama_stack/templates/remote-vllm/doc_template.md index 7543e8239..efcdb62c6 100644 --- a/llama_stack/templates/remote-vllm/doc_template.md +++ b/llama_stack/templates/remote-vllm/doc_template.md @@ -13,7 +13,7 @@ The `llamastack/distribution-{{ name }}` distribution consists of the following {{ providers_table }} -You can use this distribution if you have GPUs and want to run an independent vLLM server container for running inference. +You can use this distribution if you want to run an independent vLLM server for inference. {% if run_config_env_vars %} ### Environment Variables @@ -28,7 +28,10 @@ The following environment variables can be configured: ## Setting up vLLM server -Both AMD and NVIDIA GPUs can serve as accelerators for the vLLM server, which acts as both the LLM inference provider and the safety provider. +In the following sections, we'll use either AMD and NVIDIA GPUs to serve as hardware accelerators for the vLLM +server, which acts as both the LLM inference provider and the safety provider. Note that vLLM also +[supports many other hardware accelerators](https://docs.vllm.ai/en/latest/getting_started/installation.html) and +that we only use GPUs here for demonstration purposes. ### Setting up vLLM server on AMD GPU From 1f2df59ecee2070e49053173d57b1ee44a5f049e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Han?= Date: Thu, 10 Apr 2025 18:37:48 +0200 Subject: [PATCH 08/11] docs: fix model name (#1926) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # What does this PR do? Use llama3.2:3b for consistency. Signed-off-by: Sébastien Han --- docs/source/getting_started/index.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/source/getting_started/index.md b/docs/source/getting_started/index.md index e9ad51961..82329e60e 100644 --- a/docs/source/getting_started/index.md +++ b/docs/source/getting_started/index.md @@ -9,10 +9,10 @@ In this guide, we'll walk through how to build a RAG agent locally using Llama S ### 1. Download a Llama model with Ollama ```bash -ollama pull llama3.2:3b-instruct-fp16 +ollama pull llama3.2:3b ``` -This will instruct the Ollama service to download the Llama 3.2 3B Instruct model, which we'll use in the rest of this guide. +This will instruct the Ollama service to download the Llama 3.2 3B model, which we'll use in the rest of this guide. ```{admonition} Note :class: tip @@ -176,7 +176,7 @@ python inference.py ``` Sample output: ``` -Model: llama3.2:3b-instruct-fp16 +Model: llama3.2:3b Here is a haiku about coding: Lines of code unfold From 09a83b1ec1767242b7949532b07f68ac5b1c97b5 Mon Sep 17 00:00:00 2001 From: Francisco Arceo Date: Thu, 10 Apr 2025 10:38:57 -0600 Subject: [PATCH 09/11] docs: Updating background color for code in darkmode (#1930) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # What does this PR do? A small quality of life adjustment to make the code background for darkmode black. Makes it much easier to differentiate between code and non-code text. From: Screenshot 2025-04-10 at 9 22 23 AM To: Screenshot 2025-04-10 at 9 22 43 AM The CSS was sourced from here: https://github.com/MrDogeBro/sphinx_rtd_dark_mode/blob/main/sphinx_rtd_dark_mode/static/dark_mode_css/dark.css Signed-off-by: Francisco Javier Arceo --- docs/_static/css/my_theme.css | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/_static/css/my_theme.css b/docs/_static/css/my_theme.css index ccd7d2060..470452661 100644 --- a/docs/_static/css/my_theme.css +++ b/docs/_static/css/my_theme.css @@ -16,3 +16,7 @@ .hide-title h1 { display: none; } + +html[data-theme="dark"] .rst-content div[class^="highlight"] { + background-color: #0b0b0b; +} From 14146e4b3f2757b03f449d74b3498d17353bdcb5 Mon Sep 17 00:00:00 2001 From: ehhuang Date: Thu, 10 Apr 2025 10:26:19 -0700 Subject: [PATCH 10/11] feat(verification): various improvements (#1921) # What does this PR do? - provider and their models now live in config.yaml - better distinguish different cases within a test - add model key to surface provider's model_id - include example command to rerun single test case ## Test Plan image --- tests/verifications/REPORT.md | 125 +- tests/verifications/conf/cerebras.yaml | 10 + tests/verifications/conf/fireworks.yaml | 14 + tests/verifications/conf/groq.yaml | 14 + tests/verifications/conf/openai.yaml | 9 + tests/verifications/conf/together.yaml | 14 + tests/verifications/conftest.py | 67 +- tests/verifications/generate_report.py | 415 +-- .../verifications/openai/fixtures/fixtures.py | 97 - .../openai/test_chat_completion.py | 202 -- .../{openai => openai_api}/__init__.py | 0 .../fixtures/__init__.py | 0 .../openai_api/fixtures/fixtures.py | 105 + .../{openai => openai_api}/fixtures/load.py | 0 .../fixtures/test_cases/chat_completion.yaml | 53 +- .../openai_api/test_chat_completion.py | 271 ++ .../test_results/fireworks_1744154308.json | 2744 ---------------- .../test_results/fireworks_1744264202.json | 1329 ++++++++ .../test_results/openai_1744154522.json | 2672 ---------------- .../test_results/openai_1744264304.json | 868 +++++ .../test_results/together_1744154399.json | 2830 ----------------- .../test_results/together_1744264258.json | 1420 +++++++++ 22 files changed, 4449 insertions(+), 8810 deletions(-) create mode 100644 tests/verifications/conf/cerebras.yaml create mode 100644 tests/verifications/conf/fireworks.yaml create mode 100644 tests/verifications/conf/groq.yaml create mode 100644 tests/verifications/conf/openai.yaml create mode 100644 tests/verifications/conf/together.yaml delete mode 100644 tests/verifications/openai/fixtures/fixtures.py delete mode 100644 tests/verifications/openai/test_chat_completion.py rename tests/verifications/{openai => openai_api}/__init__.py (100%) rename tests/verifications/{openai => openai_api}/fixtures/__init__.py (100%) create mode 100644 tests/verifications/openai_api/fixtures/fixtures.py rename tests/verifications/{openai => openai_api}/fixtures/load.py (100%) rename tests/verifications/{openai => openai_api}/fixtures/test_cases/chat_completion.yaml (78%) create mode 100644 tests/verifications/openai_api/test_chat_completion.py delete mode 100644 tests/verifications/test_results/fireworks_1744154308.json create mode 100644 tests/verifications/test_results/fireworks_1744264202.json delete mode 100644 tests/verifications/test_results/openai_1744154522.json create mode 100644 tests/verifications/test_results/openai_1744264304.json delete mode 100644 tests/verifications/test_results/together_1744154399.json create mode 100644 tests/verifications/test_results/together_1744264258.json diff --git a/tests/verifications/REPORT.md b/tests/verifications/REPORT.md index d5715ae21..449499382 100644 --- a/tests/verifications/REPORT.md +++ b/tests/verifications/REPORT.md @@ -1,6 +1,6 @@ # Test Results Report -*Generated on: 2025-04-08 21:14:02* +*Generated on: 2025-04-09 22:52:19* *This report was generated by running `python tests/verifications/generate_report.py`* @@ -23,66 +23,107 @@ ## Together -*Tests run on: 2025-04-08 16:19:59* +*Tests run on: 2025-04-09 22:50:58* ```bash -pytest tests/verifications/openai/test_chat_completion.py --provider=together -v +# Run all tests for this provider: +pytest tests/verifications/openai_api/test_chat_completion.py --provider=together -v + +# Example: Run only the 'earth' case of test_chat_non_streaming_basic: +pytest tests/verifications/openai_api/test_chat_completion.py --provider=together -k "test_chat_non_streaming_basic and earth" ``` -| Test | Llama-3.3-70B-Instruct | Llama-4-Maverick-17B-128E-Instruct | Llama-4-Scout-17B-16E-Instruct | + +**Model Key (Together)** + +| Display Name | Full Model ID | +| --- | --- | +| Llama-3.3-70B-Instruct | `meta-llama/Llama-3.3-70B-Instruct-Turbo` | +| Llama-4-Maverick-Instruct | `meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8` | +| Llama-4-Scout-Instruct | `meta-llama/Llama-4-Scout-17B-16E-Instruct` | + + +| Test | Llama-3.3-70B-Instruct | Llama-4-Maverick-Instruct | Llama-4-Scout-Instruct | | --- | --- | --- | --- | -| test_chat_non_streaming_basic (case 0) | ✅ | ✅ | ✅ | -| test_chat_non_streaming_basic (case 1) | ✅ | ✅ | ✅ | -| test_chat_non_streaming_image (case 0) | ⚪ | ✅ | ✅ | -| test_chat_non_streaming_structured_output (case 0) | ✅ | ✅ | ✅ | -| test_chat_non_streaming_structured_output (case 1) | ✅ | ✅ | ✅ | -| test_chat_non_streaming_tool_calling (case 0) | ✅ | ✅ | ✅ | -| test_chat_streaming_basic (case 0) | ✅ | ❌ | ❌ | -| test_chat_streaming_basic (case 1) | ✅ | ❌ | ❌ | -| test_chat_streaming_image (case 0) | ⚪ | ❌ | ❌ | -| test_chat_streaming_structured_output (case 0) | ✅ | ❌ | ❌ | -| test_chat_streaming_structured_output (case 1) | ✅ | ❌ | ❌ | +| test_chat_non_streaming_basic (earth) | ✅ | ✅ | ✅ | +| test_chat_non_streaming_basic (saturn) | ✅ | ✅ | ✅ | +| test_chat_non_streaming_image | ⚪ | ✅ | ✅ | +| test_chat_non_streaming_structured_output (calendar) | ✅ | ✅ | ✅ | +| test_chat_non_streaming_structured_output (math) | ✅ | ✅ | ✅ | +| test_chat_non_streaming_tool_calling | ✅ | ✅ | ✅ | +| test_chat_streaming_basic (earth) | ✅ | ❌ | ❌ | +| test_chat_streaming_basic (saturn) | ✅ | ❌ | ❌ | +| test_chat_streaming_image | ⚪ | ❌ | ❌ | +| test_chat_streaming_structured_output (calendar) | ✅ | ❌ | ❌ | +| test_chat_streaming_structured_output (math) | ✅ | ❌ | ❌ | ## Fireworks -*Tests run on: 2025-04-08 16:18:28* +*Tests run on: 2025-04-09 22:50:02* ```bash -pytest tests/verifications/openai/test_chat_completion.py --provider=fireworks -v +# Run all tests for this provider: +pytest tests/verifications/openai_api/test_chat_completion.py --provider=fireworks -v + +# Example: Run only the 'earth' case of test_chat_non_streaming_basic: +pytest tests/verifications/openai_api/test_chat_completion.py --provider=fireworks -k "test_chat_non_streaming_basic and earth" ``` -| Test | Llama-3.3-70B-Instruct | Llama-4-Maverick-17B-128E-Instruct | Llama-4-Scout-17B-16E-Instruct | + +**Model Key (Fireworks)** + +| Display Name | Full Model ID | +| --- | --- | +| Llama-3.3-70B-Instruct | `accounts/fireworks/models/llama-v3p3-70b-instruct` | +| Llama-4-Maverick-Instruct | `accounts/fireworks/models/llama4-maverick-instruct-basic` | +| Llama-4-Scout-Instruct | `accounts/fireworks/models/llama4-scout-instruct-basic` | + + +| Test | Llama-3.3-70B-Instruct | Llama-4-Maverick-Instruct | Llama-4-Scout-Instruct | | --- | --- | --- | --- | -| test_chat_non_streaming_basic (case 0) | ✅ | ✅ | ✅ | -| test_chat_non_streaming_basic (case 1) | ✅ | ✅ | ✅ | -| test_chat_non_streaming_image (case 0) | ⚪ | ✅ | ✅ | -| test_chat_non_streaming_structured_output (case 0) | ✅ | ✅ | ✅ | -| test_chat_non_streaming_structured_output (case 1) | ✅ | ✅ | ✅ | -| test_chat_non_streaming_tool_calling (case 0) | ✅ | ❌ | ❌ | -| test_chat_streaming_basic (case 0) | ✅ | ✅ | ✅ | -| test_chat_streaming_basic (case 1) | ✅ | ✅ | ✅ | -| test_chat_streaming_image (case 0) | ⚪ | ✅ | ✅ | -| test_chat_streaming_structured_output (case 0) | ✅ | ✅ | ✅ | -| test_chat_streaming_structured_output (case 1) | ❌ | ✅ | ✅ | +| test_chat_non_streaming_basic (earth) | ✅ | ✅ | ✅ | +| test_chat_non_streaming_basic (saturn) | ✅ | ✅ | ✅ | +| test_chat_non_streaming_image | ⚪ | ✅ | ✅ | +| test_chat_non_streaming_structured_output (calendar) | ✅ | ✅ | ✅ | +| test_chat_non_streaming_structured_output (math) | ✅ | ✅ | ✅ | +| test_chat_non_streaming_tool_calling | ❌ | ❌ | ❌ | +| test_chat_streaming_basic (earth) | ✅ | ✅ | ✅ | +| test_chat_streaming_basic (saturn) | ✅ | ✅ | ✅ | +| test_chat_streaming_image | ⚪ | ✅ | ✅ | +| test_chat_streaming_structured_output (calendar) | ✅ | ✅ | ✅ | +| test_chat_streaming_structured_output (math) | ✅ | ✅ | ✅ | ## Openai -*Tests run on: 2025-04-08 16:22:02* +*Tests run on: 2025-04-09 22:51:44* ```bash -pytest tests/verifications/openai/test_chat_completion.py --provider=openai -v +# Run all tests for this provider: +pytest tests/verifications/openai_api/test_chat_completion.py --provider=openai -v + +# Example: Run only the 'earth' case of test_chat_non_streaming_basic: +pytest tests/verifications/openai_api/test_chat_completion.py --provider=openai -k "test_chat_non_streaming_basic and earth" ``` + +**Model Key (Openai)** + +| Display Name | Full Model ID | +| --- | --- | +| gpt-4o | `gpt-4o` | +| gpt-4o-mini | `gpt-4o-mini` | + + | Test | gpt-4o | gpt-4o-mini | | --- | --- | --- | -| test_chat_non_streaming_basic (case 0) | ✅ | ✅ | -| test_chat_non_streaming_basic (case 1) | ✅ | ✅ | -| test_chat_non_streaming_image (case 0) | ✅ | ✅ | -| test_chat_non_streaming_structured_output (case 0) | ✅ | ✅ | -| test_chat_non_streaming_structured_output (case 1) | ✅ | ✅ | -| test_chat_non_streaming_tool_calling (case 0) | ✅ | ✅ | -| test_chat_streaming_basic (case 0) | ✅ | ✅ | -| test_chat_streaming_basic (case 1) | ✅ | ✅ | -| test_chat_streaming_image (case 0) | ✅ | ✅ | -| test_chat_streaming_structured_output (case 0) | ✅ | ✅ | -| test_chat_streaming_structured_output (case 1) | ✅ | ✅ | +| test_chat_non_streaming_basic (earth) | ✅ | ✅ | +| test_chat_non_streaming_basic (saturn) | ✅ | ✅ | +| test_chat_non_streaming_image | ✅ | ✅ | +| test_chat_non_streaming_structured_output (calendar) | ✅ | ✅ | +| test_chat_non_streaming_structured_output (math) | ✅ | ✅ | +| test_chat_non_streaming_tool_calling | ✅ | ✅ | +| test_chat_streaming_basic (earth) | ✅ | ✅ | +| test_chat_streaming_basic (saturn) | ✅ | ✅ | +| test_chat_streaming_image | ✅ | ✅ | +| test_chat_streaming_structured_output (calendar) | ✅ | ✅ | +| test_chat_streaming_structured_output (math) | ✅ | ✅ | diff --git a/tests/verifications/conf/cerebras.yaml b/tests/verifications/conf/cerebras.yaml new file mode 100644 index 000000000..32a60e766 --- /dev/null +++ b/tests/verifications/conf/cerebras.yaml @@ -0,0 +1,10 @@ +base_url: https://api.cerebras.ai/v1 +api_key_var: CEREBRAS_API_KEY +models: +- llama-3.3-70b +model_display_names: + llama-3.3-70b: Llama-3.3-70B-Instruct +test_exclusions: + llama-3.3-70b: + - test_chat_non_streaming_image + - test_chat_streaming_image \ No newline at end of file diff --git a/tests/verifications/conf/fireworks.yaml b/tests/verifications/conf/fireworks.yaml new file mode 100644 index 000000000..30d6e4d75 --- /dev/null +++ b/tests/verifications/conf/fireworks.yaml @@ -0,0 +1,14 @@ +base_url: https://api.fireworks.ai/inference/v1 +api_key_var: FIREWORKS_API_KEY +models: +- accounts/fireworks/models/llama-v3p3-70b-instruct +- accounts/fireworks/models/llama4-scout-instruct-basic +- accounts/fireworks/models/llama4-maverick-instruct-basic +model_display_names: + accounts/fireworks/models/llama-v3p3-70b-instruct: Llama-3.3-70B-Instruct + accounts/fireworks/models/llama4-scout-instruct-basic: Llama-4-Scout-Instruct + accounts/fireworks/models/llama4-maverick-instruct-basic: Llama-4-Maverick-Instruct +test_exclusions: + accounts/fireworks/models/llama-v3p3-70b-instruct: + - test_chat_non_streaming_image + - test_chat_streaming_image \ No newline at end of file diff --git a/tests/verifications/conf/groq.yaml b/tests/verifications/conf/groq.yaml new file mode 100644 index 000000000..ef31a66e5 --- /dev/null +++ b/tests/verifications/conf/groq.yaml @@ -0,0 +1,14 @@ +base_url: https://api.groq.com/openai/v1 +api_key_var: GROQ_API_KEY +models: +- llama-3.3-70b-versatile +- llama-4-scout-17b-16e-instruct +- llama-4-maverick-17b-128e-instruct +model_display_names: + llama-3.3-70b-versatile: Llama-3.3-70B-Instruct + llama-4-scout-17b-16e-instruct: Llama-4-Scout-Instruct + llama-4-maverick-17b-128e-instruct: Llama-4-Maverick-Instruct +test_exclusions: + llama-3.3-70b-versatile: + - test_chat_non_streaming_image + - test_chat_streaming_image \ No newline at end of file diff --git a/tests/verifications/conf/openai.yaml b/tests/verifications/conf/openai.yaml new file mode 100644 index 000000000..89ae698f3 --- /dev/null +++ b/tests/verifications/conf/openai.yaml @@ -0,0 +1,9 @@ +base_url: https://api.openai.com/v1 +api_key_var: OPENAI_API_KEY +models: +- gpt-4o +- gpt-4o-mini +model_display_names: + gpt-4o: gpt-4o + gpt-4o-mini: gpt-4o-mini +test_exclusions: {} \ No newline at end of file diff --git a/tests/verifications/conf/together.yaml b/tests/verifications/conf/together.yaml new file mode 100644 index 000000000..80e86fa77 --- /dev/null +++ b/tests/verifications/conf/together.yaml @@ -0,0 +1,14 @@ +base_url: https://api.together.xyz/v1 +api_key_var: TOGETHER_API_KEY +models: +- meta-llama/Llama-3.3-70B-Instruct-Turbo +- meta-llama/Llama-4-Scout-17B-16E-Instruct +- meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 +model_display_names: + meta-llama/Llama-3.3-70B-Instruct-Turbo: Llama-3.3-70B-Instruct + meta-llama/Llama-4-Scout-17B-16E-Instruct: Llama-4-Scout-Instruct + meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8: Llama-4-Maverick-Instruct +test_exclusions: + meta-llama/Llama-3.3-70B-Instruct-Turbo: + - test_chat_non_streaming_image + - test_chat_streaming_image \ No newline at end of file diff --git a/tests/verifications/conftest.py b/tests/verifications/conftest.py index 08967e834..0b4a6feb7 100644 --- a/tests/verifications/conftest.py +++ b/tests/verifications/conftest.py @@ -4,6 +4,10 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +import re + +import pytest + def pytest_addoption(parser): parser.addoption( @@ -14,7 +18,7 @@ def pytest_addoption(parser): parser.addoption( "--api-key", action="store", - help="API key", + help="API key to use for the provider", ) parser.addoption( "--provider", @@ -24,5 +28,64 @@ def pytest_addoption(parser): pytest_plugins = [ - "tests.verifications.openai.fixtures.fixtures", + "pytest_jsonreport", + "tests.verifications.openai_api.fixtures.fixtures", + "tests.verifications.openai_api.fixtures.load", ] + + +@pytest.hookimpl(optionalhook=True) +def pytest_json_runtest_metadata(item, call): + """Add model and case_id to pytest-json report metadata.""" + metadata = {} + nodeid = item.nodeid + + # 1. Extract model from callspec if available + model = item.callspec.params.get("model") if hasattr(item, "callspec") else None + if model: + metadata["model"] = model + else: + # Fallback: Try parsing from nodeid (less reliable) + match_model = re.search(r"\[(.*?)-", nodeid) + if match_model: + model = match_model.group(1) # Store model even if found via fallback + metadata["model"] = model + else: + print(f"Warning: Could not determine model for test {nodeid}") + model = None # Ensure model is None if not found + + # 2. Extract case_id using the known model string if possible + if model: + # Construct a regex pattern to find the case_id *after* the model name and a hyphen. + # Escape the model name in case it contains regex special characters. + pattern = re.escape(model) + r"-(.*?)\]$" + match_case = re.search(pattern, nodeid) + if match_case: + case_id = match_case.group(1) + metadata["case_id"] = case_id + else: + # Fallback if the pattern didn't match (e.g., nodeid format unexpected) + # Try the old less specific regex as a last resort. + match_case_fallback = re.search(r"-(.*?)\]$", nodeid) + if match_case_fallback: + case_id = match_case_fallback.group(1) + metadata["case_id"] = case_id + print(f"Warning: Used fallback regex to parse case_id from nodeid {nodeid}") + else: + print(f"Warning: Could not parse case_id from nodeid {nodeid} even with fallback.") + if "case" in (item.callspec.params if hasattr(item, "callspec") else {}): + metadata["case_id"] = "parsing_failed" + elif "case" in (item.callspec.params if hasattr(item, "callspec") else {}): + # Cannot reliably parse case_id without model, but we know it's a case test. + # Try the generic fallback regex. + match_case_fallback = re.search(r"-(.*?)\]$", nodeid) + if match_case_fallback: + case_id = match_case_fallback.group(1) + metadata["case_id"] = case_id + print(f"Warning: Used fallback regex to parse case_id from nodeid {nodeid} (model unknown)") + else: + print(f"Warning: Could not parse case_id from nodeid {nodeid} (model unknown)") + metadata["case_id"] = "parsing_failed_no_model" + # else: Not a test with a model or case param we need to handle. + + return metadata diff --git a/tests/verifications/generate_report.py b/tests/verifications/generate_report.py index 98a5930da..1c760ca19 100755 --- a/tests/verifications/generate_report.py +++ b/tests/verifications/generate_report.py @@ -4,27 +4,48 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "pytest-json-report", +# "pyyaml", +# ] +# /// """ Test Report Generator -Requirements: - pip install pytest-json-report +Description: + This script runs pytest tests (specifically designed for OpenAI API compatibility checks) + for different providers, aggregates the results from JSON reports, and generates + a markdown summary report (REPORT.md). + + It automatically cleans up old test result files, keeping only the latest + per provider. + + +Configuration: + - Provider details (models, display names) are loaded from `tests/verifications/config.yaml`. + - Test cases are defined in YAML files within `tests/verifications/openai_api/fixtures/test_cases/`. + - Test results are stored in `tests/verifications/test_results/`. Usage: - # Generate a report using existing test results + # Generate a report using the latest existing test results python tests/verifications/generate_report.py - # Run tests and generate a report + # Run tests for all configured providers and generate a report python tests/verifications/generate_report.py --run-tests - # Run tests for specific providers + # Run tests only for specific providers (space-separated) python tests/verifications/generate_report.py --run-tests --providers fireworks openai + # Run tests matching a keyword expression (uses pytest -k) + python tests/verifications/generate_report.py --run-tests --providers fireworks --k "streaming" + + # Run a specific test case for a provider + python tests/verifications/generate_report.py --run-tests --providers fireworks --k "test_chat_streaming_basic and basic_earth" + # Save the report to a custom location python tests/verifications/generate_report.py --output custom_report.md - - # Clean up old test result files - python tests/verifications/generate_report.py --cleanup """ import argparse @@ -35,6 +56,9 @@ import subprocess import time from collections import defaultdict from pathlib import Path +from typing import Any, DefaultDict, Dict, Set, Tuple + +from tests.verifications.openai_api.fixtures.fixtures import _load_all_verification_configs # Define the root directory for test results RESULTS_DIR = Path(__file__).parent / "test_results" @@ -43,17 +67,12 @@ RESULTS_DIR.mkdir(exist_ok=True) # Maximum number of test result files to keep per provider MAX_RESULTS_PER_PROVIDER = 1 -# Custom order of providers PROVIDER_ORDER = ["together", "fireworks", "groq", "cerebras", "openai"] -# Dictionary to store providers and their models (will be populated dynamically) -PROVIDERS = defaultdict(set) - -# Tests will be dynamically extracted from results -ALL_TESTS = set() +VERIFICATION_CONFIG = _load_all_verification_configs() -def run_tests(provider): +def run_tests(provider, keyword=None): """Run pytest for a specific provider and save results""" print(f"Running tests for provider: {provider}") @@ -61,20 +80,28 @@ def run_tests(provider): result_file = RESULTS_DIR / f"{provider}_{timestamp}.json" temp_json_file = RESULTS_DIR / f"temp_{provider}_{timestamp}.json" + # Determine project root directory relative to this script + project_root = Path(__file__).parent.parent.parent + # Run pytest with JSON output cmd = [ "python", "-m", "pytest", - "tests/verifications/openai/test_chat_completion.py", + "tests/verifications/openai_api/test_chat_completion.py", f"--provider={provider}", "-v", "--json-report", f"--json-report-file={temp_json_file}", ] + # Append -k argument if provided + if keyword: + cmd.extend(["-k", keyword]) + try: - result = subprocess.run(cmd, capture_output=True, text=True) + # Run subprocess with cwd set to project root + result = subprocess.run(cmd, capture_output=True, text=True, cwd=project_root) print(f"Pytest exit code: {result.returncode}") # Check if the JSON file was created @@ -103,18 +130,30 @@ def run_tests(provider): return None -def parse_results(result_file): - """Parse the test results file and extract pass/fail by model and test""" +def parse_results( + result_file, +) -> Tuple[DefaultDict[str, DefaultDict[str, Dict[str, bool]]], DefaultDict[str, Set[str]], Set[str]]: + """Parse a single test results file. + + Returns: + Tuple containing: + - parsed_results: DefaultDict[provider, DefaultDict[model, Dict[test_name, pass_status]]] + - providers_in_file: DefaultDict[provider, Set[model]] found in this file. + - tests_in_file: Set[test_name] found in this file. + """ if not os.path.exists(result_file): print(f"Results file does not exist: {result_file}") - return {} + # Return empty defaultdicts/set matching the type hint + return defaultdict(lambda: defaultdict(dict)), defaultdict(set), set() with open(result_file, "r") as f: results = json.load(f) - # Initialize results dictionary - parsed_results = defaultdict(lambda: defaultdict(dict)) - provider = os.path.basename(result_file).split("_")[0] + # Initialize results dictionary with specific types + parsed_results: DefaultDict[str, DefaultDict[str, Dict[str, bool]]] = defaultdict(lambda: defaultdict(dict)) + providers_in_file: DefaultDict[str, Set[str]] = defaultdict(set) + tests_in_file: Set[str] = set() + provider: str = os.path.basename(result_file).split("_")[0] # Debug: Print summary of test results print(f"Test results summary for {provider}:") @@ -127,124 +166,72 @@ def parse_results(result_file): # Extract test results if "tests" not in results or not results["tests"]: print(f"No test results found in {result_file}") - return parsed_results + # Return empty defaultdicts/set matching the type hint + return defaultdict(lambda: defaultdict(dict)), defaultdict(set), set() - # Map for normalizing model names - model_name_map = { - "Llama-3.3-8B-Instruct": "Llama-3.3-8B-Instruct", - "Llama-3.3-70B-Instruct": "Llama-3.3-70B-Instruct", - "Llama-3.2-11B-Vision-Instruct": "Llama-3.2-11B-Vision-Instruct", - "Llama-4-Scout-17B-16E": "Llama-4-Scout-17B-16E-Instruct", - "Llama-4-Scout-17B-16E-Instruct": "Llama-4-Scout-17B-16E-Instruct", - "Llama-4-Maverick-17B-128E": "Llama-4-Maverick-17B-128E-Instruct", - "Llama-4-Maverick-17B-128E-Instruct": "Llama-4-Maverick-17B-128E-Instruct", - "gpt-4o": "gpt-4o", - "gpt-4o-mini": "gpt-4o-mini", - } - - # Keep track of all models found for this provider - provider_models = set() - - # Track all unique test cases for each base test - test_case_counts = defaultdict(int) - - # First pass: count the number of cases for each test + # Process the tests for test in results["tests"]: test_id = test.get("nodeid", "") - if "call" in test: - test_name = test_id.split("::")[1].split("[")[0] - input_output_match = re.search(r"\[input_output(\d+)-", test_id) - if input_output_match: - test_case_counts[test_name] += 1 + if not (call_phase := test.get("call")): + continue + call_outcome = call_phase.get("outcome") + if call_outcome not in ("passed", "failed"): + continue - # Second pass: process the tests with case numbers only for tests with multiple cases - for test in results["tests"]: - test_id = test.get("nodeid", "") - outcome = test.get("outcome", "") + # --- Extract data from metadata --- + metadata = test.get("metadata", {}) + model = metadata.get("model") + case_id = metadata.get("case_id") # String ID (if provided) + case_index = metadata.get("case_index") # Integer index (if no ID provided) - # Only process tests that have been executed (not setup errors) - if "call" in test: - # Regular test that actually ran - test_name = test_id.split("::")[1].split("[")[0] + # Check if we have a model and at least one case identifier + if not model or (case_id is None and case_index is None): + print( + f"Warning: Missing 'model' or case identifier ('case_id'/'case_index') metadata for test: {test_id}. Skipping." + ) + continue - # Extract input_output parameter to differentiate between test cases - input_output_match = re.search(r"\[input_output(\d+)-", test_id) - input_output_index = input_output_match.group(1) if input_output_match else "" + try: + test_name_base = test_id.split("::")[1].split("[")[0] + except (IndexError, ValueError) as e: + print(f"Warning: Could not parse base test name for {test_id}. Error: {e}. Skipping.") + continue - # Create a more detailed test name with case number only if there are multiple cases - detailed_test_name = test_name - if input_output_index and test_case_counts[test_name] > 1: - detailed_test_name = f"{test_name} (case {input_output_index})" + # Construct detailed test name using ID or index + if case_id is not None: + detailed_test_name = f"{test_name_base} ({case_id})" + elif case_index == 0: + # If case_id is missing and index is 0, assume single case, use base name only + detailed_test_name = test_name_base + elif case_index is not None: # case_index > 0 + # Use case_index for naming if case_id wasn't provided and index > 0 + detailed_test_name = f"{test_name_base} (case{case_index})" + else: + # This case should be prevented by the earlier check, but handle defensively + print(f"Error: No case identifier found for test {test_id} after initial check. Skipping.") + continue - # Track all unique test names - ALL_TESTS.add(detailed_test_name) + # Populate collections for this file + tests_in_file.add(detailed_test_name) + providers_in_file[provider].add(model) - # Extract model name from test_id using a more robust pattern - model_match = re.search(r"\[input_output\d+-([^\]]+)\]", test_id) - if model_match: - raw_model = model_match.group(1) - model = model_name_map.get(raw_model, raw_model) + if call_outcome == "passed": + parsed_results[provider][model][detailed_test_name] = True + elif call_outcome == "failed": + parsed_results[provider][model][detailed_test_name] = False - # Add to set of known models for this provider - provider_models.add(model) + # Final Summary Warning (Optional) + if not parsed_results.get(provider): + print(f"Warning: No valid test results parsed for provider {provider} from file {result_file}") - # Also update the global PROVIDERS dictionary - PROVIDERS[provider].add(model) - - # Store the result - if outcome == "passed": - parsed_results[provider][model][detailed_test_name] = True - else: - parsed_results[provider][model][detailed_test_name] = False - - print(f"Parsed test result: {detailed_test_name} for model {model}: {outcome}") - elif outcome == "error" and "setup" in test and test.get("setup", {}).get("outcome") == "failed": - # This is a setup failure, which likely means a configuration issue - # Extract the base test name and model name - parts = test_id.split("::") - if len(parts) > 1: - test_name = parts[1].split("[")[0] - - # Extract input_output parameter to differentiate between test cases - input_output_match = re.search(r"\[input_output(\d+)-", test_id) - input_output_index = input_output_match.group(1) if input_output_match else "" - - # Create a more detailed test name with case number only if there are multiple cases - detailed_test_name = test_name - if input_output_index and test_case_counts[test_name] > 1: - detailed_test_name = f"{test_name} (case {input_output_index})" - - if detailed_test_name in ALL_TESTS: - # Use a more robust pattern for model extraction - model_match = re.search(r"\[input_output\d+-([^\]]+)\]", test_id) - if model_match: - raw_model = model_match.group(1) - model = model_name_map.get(raw_model, raw_model) - - # Add to set of known models for this provider - provider_models.add(model) - - # Also update the global PROVIDERS dictionary - PROVIDERS[provider].add(model) - - # Mark setup failures as false (failed) - parsed_results[provider][model][detailed_test_name] = False - print(f"Parsed setup failure: {detailed_test_name} for model {model}") - - # Debug: Print parsed results - if not parsed_results[provider]: - print(f"Warning: No test results parsed for provider {provider}") - else: - for model, tests in parsed_results[provider].items(): - print(f"Model {model}: {len(tests)} test results") - - return parsed_results + return parsed_results, providers_in_file, tests_in_file -def cleanup_old_results(): - """Clean up old test result files, keeping only the newest N per provider""" - for provider in PROVIDERS.keys(): +def cleanup_old_results(providers_to_clean: Dict[str, Set[str]]): + """Clean up old test result files, keeping only the newest N per provider.""" + # Use the passed-in providers dictionary + for provider in providers_to_clean.keys(): # Get all result files for this provider provider_files = list(RESULTS_DIR.glob(f"{provider}_*.json")) @@ -289,8 +276,17 @@ def get_latest_results_by_provider(): return provider_results -def generate_report(results_dict, output_file=None): - """Generate the markdown report""" +def generate_report( + results_dict: Dict[str, Any], providers: Dict[str, Set[str]], all_tests: Set[str], output_file=None +): + """Generate the markdown report. + + Args: + results_dict: Aggregated results [provider][model][test_name] -> status. + providers: Dict of all providers and their models {provider: {models}}. + all_tests: Set of all test names found. + output_file: Optional path to save the report. + """ if output_file is None: # Default to creating the report in the same directory as this script output_file = Path(__file__).parent / "REPORT.md" @@ -299,8 +295,8 @@ def generate_report(results_dict, output_file=None): # Get the timestamp from result files provider_timestamps = {} - provider_results = get_latest_results_by_provider() - for provider, result_file in provider_results.items(): + provider_results_files = get_latest_results_by_provider() + for provider, result_file in provider_results_files.items(): # Extract timestamp from filename (format: provider_timestamp.json) try: timestamp_str = result_file.stem.split("_")[1] @@ -310,12 +306,33 @@ def generate_report(results_dict, output_file=None): except (IndexError, ValueError): provider_timestamps[provider] = "Unknown" - # Convert provider model sets to sorted lists - for provider in PROVIDERS: - PROVIDERS[provider] = sorted(PROVIDERS[provider]) + # Convert provider model sets to sorted lists (use passed-in providers dict) + providers_sorted = {prov: sorted(models) for prov, models in providers.items()} - # Sort tests alphabetically - sorted_tests = sorted(ALL_TESTS) + # Sort tests alphabetically (use passed-in all_tests set) + sorted_tests = sorted(all_tests) + + # Calculate counts for each base test name + base_test_case_counts: DefaultDict[str, int] = defaultdict(int) + base_test_name_map: Dict[str, str] = {} + for test_name in sorted_tests: + match = re.match(r"^(.*?)( \([^)]+\))?$", test_name) + if match: + base_name = match.group(1).strip() + base_test_case_counts[base_name] += 1 + base_test_name_map[test_name] = base_name + else: + # Should not happen with current naming, but handle defensively + base_test_case_counts[test_name] += 1 + base_test_name_map[test_name] = test_name + + if not sorted_tests: + print("Warning: No test results found to generate a report.") + # Optionally create an empty report or return early + with open(output_file, "w") as f: + f.write("# Test Results Report\n\nNo test results found.\n") + print(f"Generated empty report: {output_file}") + return report = ["# Test Results Report\n"] report.append(f"*Generated on: {time.strftime('%Y-%m-%d %H:%M:%S')}*\n") @@ -336,19 +353,15 @@ def generate_report(results_dict, output_file=None): # Add a summary section report.append("## Summary\n") - # Count total tests and passes + # Count total tests and passes (use passed-in providers and all_tests) total_tests = 0 passed_tests = 0 provider_totals = {} - - # Prepare summary data - for provider in PROVIDERS.keys(): + for provider, models in providers_sorted.items(): provider_passed = 0 provider_total = 0 - if provider in results_dict: - provider_models = PROVIDERS[provider] - for model in provider_models: + for model in models: if model in results_dict[provider]: model_results = results_dict[provider][model] for test in sorted_tests: @@ -358,33 +371,26 @@ def generate_report(results_dict, output_file=None): if model_results[test]: provider_passed += 1 passed_tests += 1 - provider_totals[provider] = (provider_passed, provider_total) - # Add summary table + # Add summary table (use passed-in providers dict) report.append("| Provider | Pass Rate | Tests Passed | Total Tests |") report.append("| --- | --- | --- | --- |") - - # Use the custom order for summary table - for provider in [p for p in PROVIDER_ORDER if p in PROVIDERS]: + for provider in [p for p in PROVIDER_ORDER if p in providers]: # Check against keys of passed-in dict passed, total = provider_totals.get(provider, (0, 0)) pass_rate = f"{(passed / total * 100):.1f}%" if total > 0 else "N/A" report.append(f"| {provider.capitalize()} | {pass_rate} | {passed} | {total} |") - - # Add providers not in the custom order - for provider in [p for p in PROVIDERS if p not in PROVIDER_ORDER]: + for provider in [p for p in providers if p not in PROVIDER_ORDER]: # Check against keys of passed-in dict passed, total = provider_totals.get(provider, (0, 0)) pass_rate = f"{(passed / total * 100):.1f}%" if total > 0 else "N/A" report.append(f"| {provider.capitalize()} | {pass_rate} | {passed} | {total} |") - report.append("\n") - # Process each provider in the custom order, then any additional providers for provider in sorted( - PROVIDERS.keys(), key=lambda p: (PROVIDER_ORDER.index(p) if p in PROVIDER_ORDER else float("inf"), p) + providers_sorted.keys(), key=lambda p: (PROVIDER_ORDER.index(p) if p in PROVIDER_ORDER else float("inf"), p) ): - if not PROVIDERS[provider]: - # Skip providers with no models + provider_models = providers_sorted[provider] # Use sorted models + if not provider_models: continue report.append(f"\n## {provider.capitalize()}\n") @@ -394,34 +400,70 @@ def generate_report(results_dict, output_file=None): report.append(f"*Tests run on: {provider_timestamps[provider]}*\n") # Add test command for reproducing results - test_cmd = f"pytest tests/verifications/openai/test_chat_completion.py --provider={provider} -v" - report.append(f"```bash\n{test_cmd}\n```\n") + test_cmd_all = f"pytest tests/verifications/openai_api/test_chat_completion.py --provider={provider} -v" + report.append(f"```bash\n# Run all tests for this provider:\n{test_cmd_all}\n") - # Get the relevant models for this provider - provider_models = PROVIDERS[provider] + # Find an example test with a case ID + example_base_test_name = None + example_case_id = None + # Get first test as fallback base, handle empty list + first_test_name = sorted_tests[0] if sorted_tests else "unknown_test" - # Create table header with models as columns - header = "| Test | " + " | ".join(provider_models) + " |" + match = re.match(r"^(.*?) \((.*?)\)$", first_test_name) + if match: + example_base_test_name = match.group(1).strip() + example_case_id = match.group(2).strip() + else: + example_base_test_name = first_test_name + + base_name = base_test_name_map.get(test, test) # Get base name + case_count = base_test_case_counts.get(base_name, 1) # Get count + filter_str = f"{example_base_test_name} and {example_case_id}" if case_count > 1 else example_base_test_name + + test_cmd_specific_case = ( + f'pytest tests/verifications/openai_api/test_chat_completion.py --provider={provider} -k "{filter_str}"' + ) + report.append( + f"# Example: Run only the '{example_case_id}' case of {example_base_test_name}:\n{test_cmd_specific_case}\n```\n" + ) + + # Get display names (use passed-in providers dict) + provider_config = VERIFICATION_CONFIG.get("providers", {}).get(provider, {}) + display_name_map = provider_config.get("model_display_names", {}) + + # Add Model Key Table (use provider_models) + report.append(f"\n**Model Key ({provider.capitalize()})**\n") + provider_key_lines = ["| Display Name | Full Model ID |", "| --- | --- |"] + for model_id in provider_models: + display_name = display_name_map.get(model_id, model_id) + provider_key_lines.append(f"| {display_name} | `{model_id}` |") + report.extend(provider_key_lines) + report.append("\n") + + # Create results table header (use provider_models) + display_names = [display_name_map.get(m, m) for m in provider_models] + header = "| Test | " + " | ".join(display_names) + " |" separator = "| --- | " + " | ".join(["---"] * len(provider_models)) + " |" - report.append(header) report.append(separator) - # Get results for this provider - provider_results = results_dict.get(provider, {}) + # Get results for this provider from results_dict + provider_results_data = results_dict.get(provider, {}) - # Add rows for each test + # Add rows for each test (use sorted_tests) for test in sorted_tests: - row = f"| {test} |" + # Determine display name based on case count + base_name = base_test_name_map.get(test, test) # Get base name + case_count = base_test_case_counts.get(base_name, 1) # Get count + display_test_name = base_name if case_count == 1 else test # Choose display name + row = f"| {display_test_name} |" # Use display name - # Add results for each model in this test - for model in provider_models: - if model in provider_results and test in provider_results[model]: - result = pass_icon if provider_results[model][test] else fail_icon + for model_id in provider_models: + if model_id in provider_results_data and test in provider_results_data[model_id]: + result = pass_icon if provider_results_data[model_id][test] else fail_icon else: result = na_icon row += f" {result} |" - report.append(row) # Write to file @@ -442,9 +484,13 @@ def main(): help="Specify providers to test (comma-separated or space-separated, default: all)", ) parser.add_argument("--output", type=str, help="Output file location (default: tests/verifications/REPORT.md)") + parser.add_argument("--k", type=str, help="Keyword expression to filter tests (passed to pytest -k)") args = parser.parse_args() all_results = {} + # Initialize collections to aggregate results in main + aggregated_providers = defaultdict(set) + aggregated_tests = set() if args.run_tests: # Get list of available providers from command line or use detected providers @@ -463,22 +509,31 @@ def main(): for provider in test_providers: provider = provider.strip() # Remove any whitespace - result_file = run_tests(provider) + result_file = run_tests(provider, keyword=args.k) if result_file: - provider_results = parse_results(result_file) - all_results.update(provider_results) + # Parse and aggregate results + parsed_results, providers_in_file, tests_in_file = parse_results(result_file) + all_results.update(parsed_results) + for prov, models in providers_in_file.items(): + aggregated_providers[prov].update(models) + aggregated_tests.update(tests_in_file) else: # Use existing results provider_result_files = get_latest_results_by_provider() for result_file in provider_result_files.values(): - provider_results = parse_results(result_file) - all_results.update(provider_results) + # Parse and aggregate results + parsed_results, providers_in_file, tests_in_file = parse_results(result_file) + all_results.update(parsed_results) + for prov, models in providers_in_file.items(): + aggregated_providers[prov].update(models) + aggregated_tests.update(tests_in_file) - # Generate the report - generate_report(all_results, args.output) + # Generate the report, passing aggregated data + generate_report(all_results, aggregated_providers, aggregated_tests, args.output) - cleanup_old_results() + # Cleanup, passing aggregated providers + cleanup_old_results(aggregated_providers) if __name__ == "__main__": diff --git a/tests/verifications/openai/fixtures/fixtures.py b/tests/verifications/openai/fixtures/fixtures.py deleted file mode 100644 index b86de3662..000000000 --- a/tests/verifications/openai/fixtures/fixtures.py +++ /dev/null @@ -1,97 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import os - -import pytest -from openai import OpenAI - - -@pytest.fixture -def providers_model_mapping(): - """ - Mapping from model names used in test cases to provider's model names. - """ - return { - "fireworks": { - "Llama-3.3-70B-Instruct": "accounts/fireworks/models/llama-v3p1-70b-instruct", - "Llama-3.2-11B-Vision-Instruct": "accounts/fireworks/models/llama-v3p2-11b-vision-instruct", - "Llama-4-Scout-17B-16E-Instruct": "accounts/fireworks/models/llama4-scout-instruct-basic", - "Llama-4-Maverick-17B-128E-Instruct": "accounts/fireworks/models/llama4-maverick-instruct-basic", - }, - "together": { - "Llama-3.3-70B-Instruct": "meta-llama/Llama-3.3-70B-Instruct-Turbo", - "Llama-3.2-11B-Vision-Instruct": "meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo", - "Llama-4-Scout-17B-16E-Instruct": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "Llama-4-Maverick-17B-128E-Instruct": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", - }, - "groq": { - "Llama-3.3-70B-Instruct": "llama-3.3-70b-versatile", - "Llama-3.2-11B-Vision-Instruct": "llama-3.2-11b-vision-preview", - "Llama-4-Scout-17B-16E-Instruct": "llama-4-scout-17b-16e-instruct", - "Llama-4-Maverick-17B-128E-Instruct": "llama-4-maverick-17b-128e-instruct", - }, - "cerebras": { - "Llama-3.3-70B-Instruct": "llama-3.3-70b", - }, - "openai": { - "gpt-4o": "gpt-4o", - "gpt-4o-mini": "gpt-4o-mini", - }, - } - - -@pytest.fixture -def provider_metadata(): - return { - "fireworks": ("https://api.fireworks.ai/inference/v1", "FIREWORKS_API_KEY"), - "together": ("https://api.together.xyz/v1", "TOGETHER_API_KEY"), - "groq": ("https://api.groq.com/openai/v1", "GROQ_API_KEY"), - "cerebras": ("https://api.cerebras.ai/v1", "CEREBRAS_API_KEY"), - "openai": ("https://api.openai.com/v1", "OPENAI_API_KEY"), - } - - -@pytest.fixture -def provider(request, provider_metadata): - provider = request.config.getoption("--provider") - base_url = request.config.getoption("--base-url") - - if provider and base_url and provider_metadata[provider][0] != base_url: - raise ValueError(f"Provider {provider} is not supported for base URL {base_url}") - - if not provider: - if not base_url: - raise ValueError("Provider and base URL are not provided") - for provider, metadata in provider_metadata.items(): - if metadata[0] == base_url: - provider = provider - break - - return provider - - -@pytest.fixture -def base_url(request, provider, provider_metadata): - return request.config.getoption("--base-url") or provider_metadata[provider][0] - - -@pytest.fixture -def api_key(request, provider, provider_metadata): - return request.config.getoption("--api-key") or os.getenv(provider_metadata[provider][1]) - - -@pytest.fixture -def model_mapping(provider, providers_model_mapping): - return providers_model_mapping[provider] - - -@pytest.fixture -def openai_client(base_url, api_key): - return OpenAI( - base_url=base_url, - api_key=api_key, - ) diff --git a/tests/verifications/openai/test_chat_completion.py b/tests/verifications/openai/test_chat_completion.py deleted file mode 100644 index c6a10de7b..000000000 --- a/tests/verifications/openai/test_chat_completion.py +++ /dev/null @@ -1,202 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -import pytest -from pydantic import BaseModel - -from tests.verifications.openai.fixtures.load import load_test_cases - -chat_completion_test_cases = load_test_cases("chat_completion") - - -@pytest.fixture -def correct_model_name(model, provider, providers_model_mapping): - """Return the provider-specific model name based on the generic model name.""" - mapping = providers_model_mapping[provider] - if model not in mapping: - pytest.skip(f"Provider {provider} does not support model {model}") - return mapping[model] - - -@pytest.mark.parametrize("model", chat_completion_test_cases["test_chat_basic"]["test_params"]["model"]) -@pytest.mark.parametrize( - "input_output", - chat_completion_test_cases["test_chat_basic"]["test_params"]["input_output"], -) -def test_chat_non_streaming_basic(openai_client, input_output, correct_model_name): - response = openai_client.chat.completions.create( - model=correct_model_name, - messages=input_output["input"]["messages"], - stream=False, - ) - assert response.choices[0].message.role == "assistant" - assert input_output["output"].lower() in response.choices[0].message.content.lower() - - -@pytest.mark.parametrize("model", chat_completion_test_cases["test_chat_basic"]["test_params"]["model"]) -@pytest.mark.parametrize( - "input_output", - chat_completion_test_cases["test_chat_basic"]["test_params"]["input_output"], -) -def test_chat_streaming_basic(openai_client, input_output, correct_model_name): - response = openai_client.chat.completions.create( - model=correct_model_name, - messages=input_output["input"]["messages"], - stream=True, - ) - content = "" - for chunk in response: - content += chunk.choices[0].delta.content or "" - - # TODO: add detailed type validation - - assert input_output["output"].lower() in content.lower() - - -@pytest.mark.parametrize("model", chat_completion_test_cases["test_chat_image"]["test_params"]["model"]) -@pytest.mark.parametrize( - "input_output", - chat_completion_test_cases["test_chat_image"]["test_params"]["input_output"], -) -def test_chat_non_streaming_image(openai_client, input_output, correct_model_name): - response = openai_client.chat.completions.create( - model=correct_model_name, - messages=input_output["input"]["messages"], - stream=False, - ) - assert response.choices[0].message.role == "assistant" - assert input_output["output"].lower() in response.choices[0].message.content.lower() - - -@pytest.mark.parametrize("model", chat_completion_test_cases["test_chat_image"]["test_params"]["model"]) -@pytest.mark.parametrize( - "input_output", - chat_completion_test_cases["test_chat_image"]["test_params"]["input_output"], -) -def test_chat_streaming_image(openai_client, input_output, correct_model_name): - response = openai_client.chat.completions.create( - model=correct_model_name, - messages=input_output["input"]["messages"], - stream=True, - ) - content = "" - for chunk in response: - content += chunk.choices[0].delta.content or "" - - # TODO: add detailed type validation - - assert input_output["output"].lower() in content.lower() - - -@pytest.mark.parametrize( - "model", - chat_completion_test_cases["test_chat_structured_output"]["test_params"]["model"], -) -@pytest.mark.parametrize( - "input_output", - chat_completion_test_cases["test_chat_structured_output"]["test_params"]["input_output"], -) -def test_chat_non_streaming_structured_output(openai_client, input_output, correct_model_name): - response = openai_client.chat.completions.create( - model=correct_model_name, - messages=input_output["input"]["messages"], - response_format=input_output["input"]["response_format"], - stream=False, - ) - - assert response.choices[0].message.role == "assistant" - maybe_json_content = response.choices[0].message.content - - validate_structured_output(maybe_json_content, input_output["output"]) - - -@pytest.mark.parametrize( - "model", - chat_completion_test_cases["test_chat_structured_output"]["test_params"]["model"], -) -@pytest.mark.parametrize( - "input_output", - chat_completion_test_cases["test_chat_structured_output"]["test_params"]["input_output"], -) -def test_chat_streaming_structured_output(openai_client, input_output, correct_model_name): - response = openai_client.chat.completions.create( - model=correct_model_name, - messages=input_output["input"]["messages"], - response_format=input_output["input"]["response_format"], - stream=True, - ) - maybe_json_content = "" - for chunk in response: - maybe_json_content += chunk.choices[0].delta.content or "" - validate_structured_output(maybe_json_content, input_output["output"]) - - -@pytest.mark.parametrize( - "model", - chat_completion_test_cases["test_tool_calling"]["test_params"]["model"], -) -@pytest.mark.parametrize( - "input_output", - chat_completion_test_cases["test_tool_calling"]["test_params"]["input_output"], -) -def test_chat_non_streaming_tool_calling(openai_client, input_output, correct_model_name): - response = openai_client.chat.completions.create( - model=correct_model_name, - messages=input_output["input"]["messages"], - tools=input_output["input"]["tools"], - stream=False, - ) - - assert response.choices[0].message.role == "assistant" - assert len(response.choices[0].message.tool_calls) > 0 - assert input_output["output"] == "get_weather_tool_call" - assert response.choices[0].message.tool_calls[0].function.name == "get_weather" - # TODO: add detailed type validation - - -def get_structured_output(maybe_json_content: str, schema_name: str) -> Any | None: - if schema_name == "valid_calendar_event": - - class CalendarEvent(BaseModel): - name: str - date: str - participants: list[str] - - try: - calendar_event = CalendarEvent.model_validate_json(maybe_json_content) - return calendar_event - except Exception: - return None - elif schema_name == "valid_math_reasoning": - - class Step(BaseModel): - explanation: str - output: str - - class MathReasoning(BaseModel): - steps: list[Step] - final_answer: str - - try: - math_reasoning = MathReasoning.model_validate_json(maybe_json_content) - return math_reasoning - except Exception: - return None - - return None - - -def validate_structured_output(maybe_json_content: str, schema_name: str) -> None: - structured_output = get_structured_output(maybe_json_content, schema_name) - assert structured_output is not None - if schema_name == "valid_calendar_event": - assert structured_output.name is not None - assert structured_output.date is not None - assert len(structured_output.participants) == 2 - elif schema_name == "valid_math_reasoning": - assert len(structured_output.final_answer) > 0 diff --git a/tests/verifications/openai/__init__.py b/tests/verifications/openai_api/__init__.py similarity index 100% rename from tests/verifications/openai/__init__.py rename to tests/verifications/openai_api/__init__.py diff --git a/tests/verifications/openai/fixtures/__init__.py b/tests/verifications/openai_api/fixtures/__init__.py similarity index 100% rename from tests/verifications/openai/fixtures/__init__.py rename to tests/verifications/openai_api/fixtures/__init__.py diff --git a/tests/verifications/openai_api/fixtures/fixtures.py b/tests/verifications/openai_api/fixtures/fixtures.py new file mode 100644 index 000000000..4f8c2e017 --- /dev/null +++ b/tests/verifications/openai_api/fixtures/fixtures.py @@ -0,0 +1,105 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import os +from pathlib import Path + +import pytest +import yaml +from openai import OpenAI + + +# --- Helper Function to Load Config --- +def _load_all_verification_configs(): + """Load and aggregate verification configs from the conf/ directory.""" + # Note: Path is relative to *this* file (fixtures.py) + conf_dir = Path(__file__).parent.parent.parent / "conf" + if not conf_dir.is_dir(): + # Use pytest.fail if called during test collection, otherwise raise error + # For simplicity here, we'll raise an error, assuming direct calls + # are less likely or can handle it. + raise FileNotFoundError(f"Verification config directory not found at {conf_dir}") + + all_provider_configs = {} + yaml_files = list(conf_dir.glob("*.yaml")) + if not yaml_files: + raise FileNotFoundError(f"No YAML configuration files found in {conf_dir}") + + for config_path in yaml_files: + provider_name = config_path.stem + try: + with open(config_path, "r") as f: + provider_config = yaml.safe_load(f) + if provider_config: + all_provider_configs[provider_name] = provider_config + else: + # Log warning if possible, or just skip empty files silently + print(f"Warning: Config file {config_path} is empty or invalid.") + except Exception as e: + raise IOError(f"Error loading config file {config_path}: {e}") from e + + return {"providers": all_provider_configs} + + +# --- End Helper Function --- + + +@pytest.fixture(scope="session") +def verification_config(): + """Pytest fixture to provide the loaded verification config.""" + try: + return _load_all_verification_configs() + except (FileNotFoundError, IOError) as e: + pytest.fail(str(e)) # Fail test collection if config loading fails + + +@pytest.fixture +def provider(request, verification_config): + provider = request.config.getoption("--provider") + base_url = request.config.getoption("--base-url") + + if provider and base_url and verification_config["providers"][provider]["base_url"] != base_url: + raise ValueError(f"Provider {provider} is not supported for base URL {base_url}") + + if not provider: + if not base_url: + raise ValueError("Provider and base URL are not provided") + for provider, metadata in verification_config["providers"].items(): + if metadata["base_url"] == base_url: + provider = provider + break + + return provider + + +@pytest.fixture +def base_url(request, provider, verification_config): + return request.config.getoption("--base-url") or verification_config["providers"][provider]["base_url"] + + +@pytest.fixture +def api_key(request, provider, verification_config): + provider_conf = verification_config.get("providers", {}).get(provider, {}) + api_key_env_var = provider_conf.get("api_key_var") + + key_from_option = request.config.getoption("--api-key") + key_from_env = os.getenv(api_key_env_var) if api_key_env_var else None + + final_key = key_from_option or key_from_env + return final_key + + +@pytest.fixture +def model_mapping(provider, providers_model_mapping): + return providers_model_mapping[provider] + + +@pytest.fixture +def openai_client(base_url, api_key): + return OpenAI( + base_url=base_url, + api_key=api_key, + ) diff --git a/tests/verifications/openai/fixtures/load.py b/tests/verifications/openai_api/fixtures/load.py similarity index 100% rename from tests/verifications/openai/fixtures/load.py rename to tests/verifications/openai_api/fixtures/load.py diff --git a/tests/verifications/openai/fixtures/test_cases/chat_completion.yaml b/tests/verifications/openai_api/fixtures/test_cases/chat_completion.yaml similarity index 78% rename from tests/verifications/openai/fixtures/test_cases/chat_completion.yaml rename to tests/verifications/openai_api/fixtures/test_cases/chat_completion.yaml index 2c302a704..78ea8245d 100644 --- a/tests/verifications/openai/fixtures/test_cases/chat_completion.yaml +++ b/tests/verifications/openai_api/fixtures/test_cases/chat_completion.yaml @@ -1,31 +1,24 @@ test_chat_basic: test_name: test_chat_basic test_params: - input_output: - - input: + case: + - case_id: "earth" + input: messages: - content: Which planet do humans live on? role: user output: Earth - - input: + - case_id: "saturn" + input: messages: - content: Which planet has rings around it with a name starting with letter S? role: user output: Saturn - model: - - Llama-3.3-8B-Instruct - - Llama-3.3-70B-Instruct - - Llama-4-Scout-17B-16E - - Llama-4-Scout-17B-16E-Instruct - - Llama-4-Maverick-17B-128E - - Llama-4-Maverick-17B-128E-Instruct - - gpt-4o - - gpt-4o-mini test_chat_image: test_name: test_chat_image test_params: - input_output: + case: - input: messages: - content: @@ -36,18 +29,12 @@ test_chat_image: type: image_url role: user output: llama - model: - - Llama-4-Scout-17B-16E - - Llama-4-Scout-17B-16E-Instruct - - Llama-4-Maverick-17B-128E - - Llama-4-Maverick-17B-128E-Instruct - - gpt-4o - - gpt-4o-mini test_chat_structured_output: test_name: test_chat_structured_output test_params: - input_output: - - input: + case: + - case_id: "calendar" + input: messages: - content: Extract the event information. role: system @@ -77,7 +64,8 @@ test_chat_structured_output: type: object type: json_schema output: valid_calendar_event - - input: + - case_id: "math" + input: messages: - content: You are a helpful math tutor. Guide the user through the solution step by step. @@ -118,19 +106,10 @@ test_chat_structured_output: type: object type: json_schema output: valid_math_reasoning - model: - - Llama-3.3-8B-Instruct - - Llama-3.3-70B-Instruct - - Llama-4-Scout-17B-16E - - Llama-4-Scout-17B-16E-Instruct - - Llama-4-Maverick-17B-128E - - Llama-4-Maverick-17B-128E-Instruct - - gpt-4o - - gpt-4o-mini test_tool_calling: test_name: test_tool_calling test_params: - input_output: + case: - input: messages: - content: You are a helpful assistant that can use tools to get information. @@ -152,11 +131,3 @@ test_tool_calling: type: object type: function output: get_weather_tool_call - model: - - Llama-3.3-70B-Instruct - - Llama-4-Scout-17B-16E - - Llama-4-Scout-17B-16E-Instruct - - Llama-4-Maverick-17B-128E - - Llama-4-Maverick-17B-128E-Instruct - - gpt-4o - - gpt-4o-mini diff --git a/tests/verifications/openai_api/test_chat_completion.py b/tests/verifications/openai_api/test_chat_completion.py new file mode 100644 index 000000000..dc08ec944 --- /dev/null +++ b/tests/verifications/openai_api/test_chat_completion.py @@ -0,0 +1,271 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import re +from typing import Any + +import pytest +from pydantic import BaseModel + +from tests.verifications.openai_api.fixtures.fixtures import _load_all_verification_configs +from tests.verifications.openai_api.fixtures.load import load_test_cases + +chat_completion_test_cases = load_test_cases("chat_completion") + + +def case_id_generator(case): + """Generate a test ID from the case's 'case_id' field, or use a default.""" + case_id = case.get("case_id") + if isinstance(case_id, (str, int)): + return re.sub(r"\\W|^(?=\\d)", "_", str(case_id)) + return None + + +def pytest_generate_tests(metafunc): + """Dynamically parametrize tests based on the selected provider and config.""" + if "model" in metafunc.fixturenames: + provider = metafunc.config.getoption("provider") + if not provider: + print("Warning: --provider not specified. Skipping model parametrization.") + metafunc.parametrize("model", []) + return + + try: + config_data = _load_all_verification_configs() + except (FileNotFoundError, IOError) as e: + print(f"ERROR loading verification configs: {e}") + config_data = {"providers": {}} + + provider_config = config_data.get("providers", {}).get(provider) + if provider_config: + models = provider_config.get("models", []) + if models: + metafunc.parametrize("model", models) + else: + print(f"Warning: No models found for provider '{provider}' in config.") + metafunc.parametrize("model", []) # Parametrize empty if no models found + else: + print(f"Warning: Provider '{provider}' not found in config. No models parametrized.") + metafunc.parametrize("model", []) # Parametrize empty if provider not found + + +def should_skip_test(verification_config, provider, model, test_name_base): + """Check if a test should be skipped based on config exclusions.""" + provider_config = verification_config.get("providers", {}).get(provider) + if not provider_config: + return False # No config for provider, don't skip + + exclusions = provider_config.get("test_exclusions", {}).get(model, []) + return test_name_base in exclusions + + +# Helper to get the base test name from the request object +def get_base_test_name(request): + return request.node.originalname + + +# --- Test Functions --- + + +@pytest.mark.parametrize( + "case", + chat_completion_test_cases["test_chat_basic"]["test_params"]["case"], + ids=case_id_generator, +) +def test_chat_non_streaming_basic(request, openai_client, model, provider, verification_config, case): + test_name_base = get_base_test_name(request) + if should_skip_test(verification_config, provider, model, test_name_base): + pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") + + response = openai_client.chat.completions.create( + model=model, + messages=case["input"]["messages"], + stream=False, + ) + assert response.choices[0].message.role == "assistant" + assert case["output"].lower() in response.choices[0].message.content.lower() + + +@pytest.mark.parametrize( + "case", + chat_completion_test_cases["test_chat_basic"]["test_params"]["case"], + ids=case_id_generator, +) +def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case): + test_name_base = get_base_test_name(request) + if should_skip_test(verification_config, provider, model, test_name_base): + pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") + + response = openai_client.chat.completions.create( + model=model, + messages=case["input"]["messages"], + stream=True, + ) + content = "" + for chunk in response: + content += chunk.choices[0].delta.content or "" + + # TODO: add detailed type validation + + assert case["output"].lower() in content.lower() + + +@pytest.mark.parametrize( + "case", + chat_completion_test_cases["test_chat_image"]["test_params"]["case"], + ids=case_id_generator, +) +def test_chat_non_streaming_image(request, openai_client, model, provider, verification_config, case): + test_name_base = get_base_test_name(request) + if should_skip_test(verification_config, provider, model, test_name_base): + pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") + + response = openai_client.chat.completions.create( + model=model, + messages=case["input"]["messages"], + stream=False, + ) + assert response.choices[0].message.role == "assistant" + assert case["output"].lower() in response.choices[0].message.content.lower() + + +@pytest.mark.parametrize( + "case", + chat_completion_test_cases["test_chat_image"]["test_params"]["case"], + ids=case_id_generator, +) +def test_chat_streaming_image(request, openai_client, model, provider, verification_config, case): + test_name_base = get_base_test_name(request) + if should_skip_test(verification_config, provider, model, test_name_base): + pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") + + response = openai_client.chat.completions.create( + model=model, + messages=case["input"]["messages"], + stream=True, + ) + content = "" + for chunk in response: + content += chunk.choices[0].delta.content or "" + + # TODO: add detailed type validation + + assert case["output"].lower() in content.lower() + + +@pytest.mark.parametrize( + "case", + chat_completion_test_cases["test_chat_structured_output"]["test_params"]["case"], + ids=case_id_generator, +) +def test_chat_non_streaming_structured_output(request, openai_client, model, provider, verification_config, case): + test_name_base = get_base_test_name(request) + if should_skip_test(verification_config, provider, model, test_name_base): + pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") + + response = openai_client.chat.completions.create( + model=model, + messages=case["input"]["messages"], + response_format=case["input"]["response_format"], + stream=False, + ) + + assert response.choices[0].message.role == "assistant" + maybe_json_content = response.choices[0].message.content + + validate_structured_output(maybe_json_content, case["output"]) + + +@pytest.mark.parametrize( + "case", + chat_completion_test_cases["test_chat_structured_output"]["test_params"]["case"], + ids=case_id_generator, +) +def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case): + test_name_base = get_base_test_name(request) + if should_skip_test(verification_config, provider, model, test_name_base): + pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") + + response = openai_client.chat.completions.create( + model=model, + messages=case["input"]["messages"], + response_format=case["input"]["response_format"], + stream=True, + ) + maybe_json_content = "" + for chunk in response: + maybe_json_content += chunk.choices[0].delta.content or "" + validate_structured_output(maybe_json_content, case["output"]) + + +@pytest.mark.parametrize( + "case", + chat_completion_test_cases["test_tool_calling"]["test_params"]["case"], + ids=case_id_generator, +) +def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case): + test_name_base = get_base_test_name(request) + if should_skip_test(verification_config, provider, model, test_name_base): + pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") + + response = openai_client.chat.completions.create( + model=model, + messages=case["input"]["messages"], + tools=case["input"]["tools"], + stream=False, + ) + + assert response.choices[0].message.role == "assistant" + assert len(response.choices[0].message.tool_calls) > 0 + assert case["output"] == "get_weather_tool_call" + assert response.choices[0].message.tool_calls[0].function.name == "get_weather" + # TODO: add detailed type validation + + +# --- Helper functions (structured output validation) --- + + +def get_structured_output(maybe_json_content: str, schema_name: str) -> Any | None: + if schema_name == "valid_calendar_event": + + class CalendarEvent(BaseModel): + name: str + date: str + participants: list[str] + + try: + calendar_event = CalendarEvent.model_validate_json(maybe_json_content) + return calendar_event + except Exception: + return None + elif schema_name == "valid_math_reasoning": + + class Step(BaseModel): + explanation: str + output: str + + class MathReasoning(BaseModel): + steps: list[Step] + final_answer: str + + try: + math_reasoning = MathReasoning.model_validate_json(maybe_json_content) + return math_reasoning + except Exception: + return None + + return None + + +def validate_structured_output(maybe_json_content: str, schema_name: str) -> None: + structured_output = get_structured_output(maybe_json_content, schema_name) + assert structured_output is not None + if schema_name == "valid_calendar_event": + assert structured_output.name is not None + assert structured_output.date is not None + assert len(structured_output.participants) == 2 + elif schema_name == "valid_math_reasoning": + assert len(structured_output.final_answer) > 0 diff --git a/tests/verifications/test_results/fireworks_1744154308.json b/tests/verifications/test_results/fireworks_1744154308.json deleted file mode 100644 index 691f6e474..000000000 --- a/tests/verifications/test_results/fireworks_1744154308.json +++ /dev/null @@ -1,2744 +0,0 @@ -{ - "created": 1744154399.039055, - "duration": 87.73799800872803, - "exitcode": 1, - "root": "/Users/erichuang/projects/llama-stack", - "environment": {}, - "summary": { - "skipped": 52, - "passed": 28, - "failed": 3, - "total": 83, - "collected": 83 - }, - "collectors": [ - { - "nodeid": "", - "outcome": "passed", - "result": [ - { - "nodeid": "tests/verifications/openai/test_chat_completion.py", - "type": "Module" - } - ] - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py", - "outcome": "passed", - "result": [ - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o-mini]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o-mini]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o-mini]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o-mini]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 60 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 60 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 60 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 60 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o]", - "type": "Function", - "lineno": 60 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o-mini]", - "type": "Function", - "lineno": 60 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 75 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 75 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 75 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 75 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o]", - "type": "Function", - "lineno": 75 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o-mini]", - "type": "Function", - "lineno": 75 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o-mini]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o-mini]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o-mini]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o-mini]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 138 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 138 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 138 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 138 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 138 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o]", - "type": "Function", - "lineno": 138 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o-mini]", - "type": "Function", - "lineno": 138 - } - ] - } - ], - "tests": [ - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.17320987500716, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider fireworks does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.000177707988768816, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", - "lineno": 25, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.009193749981932342, - "outcome": "passed" - }, - "call": { - "duration": 1.1473859580000862, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00043337501119822264, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.01645291701424867, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider fireworks does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.0002898749662563205, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 25, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.01562033302616328, - "outcome": "passed" - }, - "call": { - "duration": 0.8782661251025274, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002795408945530653, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.008571124984882772, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider fireworks does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.0003043749602511525, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 25, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.00842841702979058, - "outcome": "passed" - }, - "call": { - "duration": 1.3863223339430988, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0009970410028472543, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-gpt-4o]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.007089875056408346, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider fireworks does not support model gpt-4o')" - }, - "teardown": { - "duration": 0.00017958390526473522, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o-mini]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.005809499998576939, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider fireworks does not support model gpt-4o-mini')" - }, - "teardown": { - "duration": 0.00016495899762958288, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.0119722920935601, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider fireworks does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.00016962504014372826, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", - "lineno": 25, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.005716291954740882, - "outcome": "passed" - }, - "call": { - "duration": 0.6822018750244752, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0005292498972266912, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.025827708072029054, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider fireworks does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.000295999925583601, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 25, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.010980832972563803, - "outcome": "passed" - }, - "call": { - "duration": 0.7537062909686938, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0008091670460999012, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.006567832897417247, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider fireworks does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.0001545000122860074, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 25, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.005985083989799023, - "outcome": "passed" - }, - "call": { - "duration": 0.7263387079583481, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0006324589485302567, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-gpt-4o]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.0171962499152869, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider fireworks does not support model gpt-4o')" - }, - "teardown": { - "duration": 0.000780042028054595, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o-mini]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.01365620899014175, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider fireworks does not support model gpt-4o-mini')" - }, - "teardown": { - "duration": 0.00016758404672145844, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.0064070840599015355, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider fireworks does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.0002031669719144702, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", - "lineno": 40, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.010951624950394034, - "outcome": "passed" - }, - "call": { - "duration": 0.5433399169705808, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0013178749941289425, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.022056750021874905, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider fireworks does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.0006570409750565886, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 40, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.008314333041198552, - "outcome": "passed" - }, - "call": { - "duration": 0.7779882500180975, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0006799160037189722, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.03601404093205929, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider fireworks does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.000610582996159792, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 40, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.014321292052045465, - "outcome": "passed" - }, - "call": { - "duration": 1.0243758750148118, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0010485410457476974, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output0-gpt-4o]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.021133000031113625, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider fireworks does not support model gpt-4o')" - }, - "teardown": { - "duration": 0.0005400830414146185, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o-mini]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output0-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.007212458993308246, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider fireworks does not support model gpt-4o-mini')" - }, - "teardown": { - "duration": 0.00026770797558128834, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.012334750033915043, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider fireworks does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.00042683398351073265, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", - "lineno": 40, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.011477917083539069, - "outcome": "passed" - }, - "call": { - "duration": 1.670572166913189, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0005759169580414891, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.024620208074338734, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider fireworks does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.0005166250048205256, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 40, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.008708957931958139, - "outcome": "passed" - }, - "call": { - "duration": 0.6654335829662159, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002927089808508754, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.018128167022950947, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider fireworks does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.0001929170684888959, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 40, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.0063874589977785945, - "outcome": "passed" - }, - "call": { - "duration": 0.8047525839647278, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00039245898369699717, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output1-gpt-4o]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.01366533397231251, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider fireworks does not support model gpt-4o')" - }, - "teardown": { - "duration": 0.00028241705149412155, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o-mini]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output1-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.010844790958799422, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider fireworks does not support model gpt-4o-mini')" - }, - "teardown": { - "duration": 0.000258082989603281, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E]", - "lineno": 60, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.00936354196164757, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider fireworks does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.00020533299539238214, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 60, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.008578249951824546, - "outcome": "passed" - }, - "call": { - "duration": 2.6288582499837503, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0006052498938515782, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", - "lineno": 60, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.02061279199551791, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider fireworks does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.00029320805333554745, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 60, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.00995812495239079, - "outcome": "passed" - }, - "call": { - "duration": 3.0904540000483394, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0003214169992133975, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o]", - "lineno": 60, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_image[input_output0-gpt-4o]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.0261635419446975, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider fireworks does not support model gpt-4o')" - }, - "teardown": { - "duration": 0.00032716698478907347, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o-mini]", - "lineno": 60, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_image[input_output0-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.027220541960559785, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider fireworks does not support model gpt-4o-mini')" - }, - "teardown": { - "duration": 0.0003192499279975891, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E]", - "lineno": 75, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.010883458075113595, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider fireworks does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.0002687909873202443, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 75, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.0171177500160411, - "outcome": "passed" - }, - "call": { - "duration": 1.6752691670553759, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0004877089522778988, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", - "lineno": 75, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.011608208995312452, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider fireworks does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.00017137499526143074, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 75, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.009284624946303666, - "outcome": "passed" - }, - "call": { - "duration": 3.537356249988079, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0005068340105935931, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o]", - "lineno": 75, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_image[input_output0-gpt-4o]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.016660499968566, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider fireworks does not support model gpt-4o')" - }, - "teardown": { - "duration": 0.00029341597110033035, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o-mini]", - "lineno": 75, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_image[input_output0-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.01374066702555865, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider fireworks does not support model gpt-4o-mini')" - }, - "teardown": { - "duration": 0.0002625000197440386, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.013120374991558492, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider fireworks does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.00021954195108264685, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.015080374898388982, - "outcome": "passed" - }, - "call": { - "duration": 1.157175041968003, - "outcome": "passed" - }, - "teardown": { - "duration": 0.000495875021442771, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.013946042046882212, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider fireworks does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.0002954580122604966, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.011617792071774602, - "outcome": "passed" - }, - "call": { - "duration": 0.9537639999762177, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0004819999448955059, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.027436082949861884, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider fireworks does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.00030274991877377033, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.016110333963297307, - "outcome": "passed" - }, - "call": { - "duration": 0.8493227910948917, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0004883749643340707, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-gpt-4o]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.017850833013653755, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider fireworks does not support model gpt-4o')" - }, - "teardown": { - "duration": 0.0003287500003352761, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o-mini]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.012523208046332002, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider fireworks does not support model gpt-4o-mini')" - }, - "teardown": { - "duration": 0.00023500004317611456, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.007516667013987899, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider fireworks does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.00018912507221102715, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.007337165996432304, - "outcome": "passed" - }, - "call": { - "duration": 3.124099582899362, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0006703329272568226, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.014259999967180192, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider fireworks does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.00030262500513345003, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.010863124975003302, - "outcome": "passed" - }, - "call": { - "duration": 1.3330956250429153, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00018679199274629354, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.005797958001494408, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider fireworks does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.00017529097385704517, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.005647709011100233, - "outcome": "passed" - }, - "call": { - "duration": 3.2295467499643564, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0005654999986290932, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-gpt-4o]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.007151791942305863, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider fireworks does not support model gpt-4o')" - }, - "teardown": { - "duration": 0.00015316694043576717, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o-mini]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.006435790914110839, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider fireworks does not support model gpt-4o-mini')" - }, - "teardown": { - "duration": 0.00015954102855175734, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.006164791993796825, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider fireworks does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.00014074996579438448, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", - "lineno": 117, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.010064583038911223, - "outcome": "passed" - }, - "call": { - "duration": 1.1676458748988807, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002513329964131117, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.011011417023837566, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider fireworks does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.00020608294289559126, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 117, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.011654542060568929, - "outcome": "passed" - }, - "call": { - "duration": 0.7950789160095155, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002690000692382455, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.0066834589233621955, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider fireworks does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.00017270795069634914, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 117, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.011390416999347508, - "outcome": "passed" - }, - "call": { - "duration": 0.7844940840732306, - "outcome": "passed" - }, - "teardown": { - "duration": 0.000511458027176559, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-gpt-4o]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.005813500029034913, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider fireworks does not support model gpt-4o')" - }, - "teardown": { - "duration": 0.00015495799016207457, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o-mini]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.0075639160349965096, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider fireworks does not support model gpt-4o-mini')" - }, - "teardown": { - "duration": 0.00014358304906636477, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.008526541059836745, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider fireworks does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.00015841599088162184, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", - "lineno": 117, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.007805416011251509, - "outcome": "passed" - }, - "call": { - "duration": 13.25898533302825, - "outcome": "failed", - "crash": { - "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py", - "lineno": 196, - "message": "assert None is not None" - }, - "traceback": [ - { - "path": "tests/verifications/openai/test_chat_completion.py", - "lineno": 136, - "message": "" - }, - { - "path": "tests/verifications/openai/test_chat_completion.py", - "lineno": 196, - "message": "AssertionError" - } - ], - "longrepr": "openai_client = \ninput_output = {'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solution step by step.',... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\ncorrect_model_name = 'accounts/fireworks/models/llama-v3p1-70b-instruct'\n\n @pytest.mark.parametrize(\n \"model\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"model\"],\n )\n @pytest.mark.parametrize(\n \"input_output\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"input_output\"],\n )\n def test_chat_streaming_structured_output(openai_client, input_output, correct_model_name):\n response = openai_client.chat.completions.create(\n model=correct_model_name,\n messages=input_output[\"input\"][\"messages\"],\n response_format=input_output[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n maybe_json_content += chunk.choices[0].delta.content or \"\"\n> validate_structured_output(maybe_json_content, input_output[\"output\"])\n\ntests/verifications/openai/test_chat_completion.py:136: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nmaybe_json_content = '{ \"final_answer\": \"}To solve the equation 8x + 7 = -23, we need to isolate the variable x. We can do this by followin...tassistantassistantassistantassistantassistantassistantassistantassistantassistantassistantassistantassistantassistant'\nschema_name = 'valid_math_reasoning'\n\n def validate_structured_output(maybe_json_content: str, schema_name: str) -> None:\n structured_output = get_structured_output(maybe_json_content, schema_name)\n> assert structured_output is not None\nE assert None is not None\n\ntests/verifications/openai/test_chat_completion.py:196: AssertionError" - }, - "teardown": { - "duration": 0.00022583396639674902, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.006412541959434748, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider fireworks does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.0001449589617550373, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 117, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.010353000019676983, - "outcome": "passed" - }, - "call": { - "duration": 4.559281209018081, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00021179206669330597, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.011320417048409581, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider fireworks does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.0001623749267309904, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 117, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.005637791007757187, - "outcome": "passed" - }, - "call": { - "duration": 2.9282109580235556, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00019149994477629662, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-gpt-4o]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.021475916961207986, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider fireworks does not support model gpt-4o')" - }, - "teardown": { - "duration": 0.0002605828922241926, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o-mini]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.012046082993037999, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider fireworks does not support model gpt-4o-mini')" - }, - "teardown": { - "duration": 0.00016966694965958595, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-3.3-70B-Instruct]", - "lineno": 138, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_tool_calling[input_output0-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.00782629195600748, - "outcome": "passed" - }, - "call": { - "duration": 0.9290615000063553, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0004110001027584076, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E]", - "lineno": 138, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.00842183397617191, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider fireworks does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.00023745803628116846, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 138, - "outcome": "failed", - "keywords": [ - "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.010762874968349934, - "outcome": "passed" - }, - "call": { - "duration": 23.62101216695737, - "outcome": "failed", - "crash": { - "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py", - "lineno": 156, - "message": "TypeError: object of type 'NoneType' has no len()" - }, - "traceback": [ - { - "path": "tests/verifications/openai/test_chat_completion.py", - "lineno": 156, - "message": "TypeError" - } - ], - "longrepr": "openai_client = \ninput_output = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\ncorrect_model_name = 'accounts/fireworks/models/llama4-scout-instruct-basic'\n\n @pytest.mark.parametrize(\n \"model\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"model\"],\n )\n @pytest.mark.parametrize(\n \"input_output\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"input_output\"],\n )\n def test_chat_non_streaming_tool_calling(openai_client, input_output, correct_model_name):\n response = openai_client.chat.completions.create(\n model=correct_model_name,\n messages=input_output[\"input\"][\"messages\"],\n tools=input_output[\"input\"][\"tools\"],\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert len(response.choices[0].message.tool_calls) > 0\nE TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai/test_chat_completion.py:156: TypeError" - }, - "teardown": { - "duration": 0.0004520840011537075, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E]", - "lineno": 138, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.00953104195650667, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider fireworks does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.00017912499606609344, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 138, - "outcome": "failed", - "keywords": [ - "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.010302042006514966, - "outcome": "passed" - }, - "call": { - "duration": 5.55651158397086, - "outcome": "failed", - "crash": { - "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py", - "lineno": 156, - "message": "TypeError: object of type 'NoneType' has no len()" - }, - "traceback": [ - { - "path": "tests/verifications/openai/test_chat_completion.py", - "lineno": 156, - "message": "TypeError" - } - ], - "longrepr": "openai_client = \ninput_output = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\ncorrect_model_name = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\n\n @pytest.mark.parametrize(\n \"model\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"model\"],\n )\n @pytest.mark.parametrize(\n \"input_output\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"input_output\"],\n )\n def test_chat_non_streaming_tool_calling(openai_client, input_output, correct_model_name):\n response = openai_client.chat.completions.create(\n model=correct_model_name,\n messages=input_output[\"input\"][\"messages\"],\n tools=input_output[\"input\"][\"tools\"],\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert len(response.choices[0].message.tool_calls) > 0\nE TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai/test_chat_completion.py:156: TypeError" - }, - "teardown": { - "duration": 0.0003929579397663474, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o]", - "lineno": 138, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_tool_calling[input_output0-gpt-4o]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.01593891705852002, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider fireworks does not support model gpt-4o')" - }, - "teardown": { - "duration": 0.0003579579060897231, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o-mini]", - "lineno": 138, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_tool_calling[input_output0-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.01874550001230091, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider fireworks does not support model gpt-4o-mini')" - }, - "teardown": { - "duration": 0.00031995808240026236, - "outcome": "passed" - } - } - ] -} diff --git a/tests/verifications/test_results/fireworks_1744264202.json b/tests/verifications/test_results/fireworks_1744264202.json new file mode 100644 index 000000000..d14738be9 --- /dev/null +++ b/tests/verifications/test_results/fireworks_1744264202.json @@ -0,0 +1,1329 @@ +{ + "created": 1744264258.730061, + "duration": 53.86071586608887, + "exitcode": 1, + "root": "/Users/erichuang/projects/llama-stack", + "environment": {}, + "summary": { + "passed": 28, + "skipped": 2, + "failed": 3, + "total": 33, + "collected": 33 + }, + "collectors": [ + { + "nodeid": "", + "outcome": "passed", + "result": [ + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py", + "type": "Module" + } + ] + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py", + "outcome": "passed", + "result": [ + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]", + "type": "Function", + "lineno": 72 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]", + "type": "Function", + "lineno": 72 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]", + "type": "Function", + "lineno": 72 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]", + "type": "Function", + "lineno": 72 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]", + "type": "Function", + "lineno": 72 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]", + "type": "Function", + "lineno": 72 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]", + "type": "Function", + "lineno": 91 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]", + "type": "Function", + "lineno": 91 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]", + "type": "Function", + "lineno": 91 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]", + "type": "Function", + "lineno": 91 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]", + "type": "Function", + "lineno": 91 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]", + "type": "Function", + "lineno": 91 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", + "type": "Function", + "lineno": 115 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", + "type": "Function", + "lineno": 115 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", + "type": "Function", + "lineno": 115 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", + "type": "Function", + "lineno": 134 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", + "type": "Function", + "lineno": 134 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", + "type": "Function", + "lineno": 134 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]", + "type": "Function", + "lineno": 158 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]", + "type": "Function", + "lineno": 158 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]", + "type": "Function", + "lineno": 158 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]", + "type": "Function", + "lineno": 158 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]", + "type": "Function", + "lineno": 158 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]", + "type": "Function", + "lineno": 158 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]", + "type": "Function", + "lineno": 181 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]", + "type": "Function", + "lineno": 181 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]", + "type": "Function", + "lineno": 181 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]", + "type": "Function", + "lineno": 181 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]", + "type": "Function", + "lineno": 181 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]", + "type": "Function", + "lineno": 181 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", + "type": "Function", + "lineno": 203 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", + "type": "Function", + "lineno": 203 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", + "type": "Function", + "lineno": 203 + } + ] + } + ], + "tests": [ + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]", + "lineno": 72, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama-v3p3-70b-instruct-earth", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", + "case_id": "earth" + }, + "setup": { + "duration": 0.05236550001427531, + "outcome": "passed" + }, + "call": { + "duration": 0.5364967910572886, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00015075004193931818, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]", + "lineno": 72, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama-v3p3-70b-instruct-saturn", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", + "case_id": "saturn" + }, + "setup": { + "duration": 0.00699599995277822, + "outcome": "passed" + }, + "call": { + "duration": 0.5843954589217901, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0003858329728245735, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]", + "lineno": 72, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-scout-instruct-basic-earth", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-scout-instruct-basic", + "case_id": "earth" + }, + "setup": { + "duration": 0.009176500025205314, + "outcome": "passed" + }, + "call": { + "duration": 0.9258683329680935, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00015787500888109207, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]", + "lineno": 72, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-scout-instruct-basic-saturn", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-scout-instruct-basic", + "case_id": "saturn" + }, + "setup": { + "duration": 0.011275375029072165, + "outcome": "passed" + }, + "call": { + "duration": 0.6890578339807689, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0004926669644191861, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]", + "lineno": 72, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-maverick-instruct-basic-earth", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", + "case_id": "earth" + }, + "setup": { + "duration": 0.007520624902099371, + "outcome": "passed" + }, + "call": { + "duration": 0.6675686669768766, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00016137503553181887, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]", + "lineno": 72, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-maverick-instruct-basic-saturn", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", + "case_id": "saturn" + }, + "setup": { + "duration": 0.0076431670458987355, + "outcome": "passed" + }, + "call": { + "duration": 1.6813415409997106, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0004928340204060078, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]", + "lineno": 91, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama-v3p3-70b-instruct-earth", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", + "case_id": "earth" + }, + "setup": { + "duration": 0.01302404107991606, + "outcome": "passed" + }, + "call": { + "duration": 1.3206909999717027, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0002220839960500598, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]", + "lineno": 91, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama-v3p3-70b-instruct-saturn", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", + "case_id": "saturn" + }, + "setup": { + "duration": 0.0071772499941289425, + "outcome": "passed" + }, + "call": { + "duration": 0.4109888339880854, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0005431669997051358, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]", + "lineno": 91, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-scout-instruct-basic-earth", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-scout-instruct-basic", + "case_id": "earth" + }, + "setup": { + "duration": 0.012043708004057407, + "outcome": "passed" + }, + "call": { + "duration": 0.4509220840409398, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00016408402007073164, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]", + "lineno": 91, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-scout-instruct-basic-saturn", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-scout-instruct-basic", + "case_id": "saturn" + }, + "setup": { + "duration": 0.007165874936617911, + "outcome": "passed" + }, + "call": { + "duration": 0.6527335830032825, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0006419579731300473, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]", + "lineno": 91, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-maverick-instruct-basic-earth", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", + "case_id": "earth" + }, + "setup": { + "duration": 0.007546542095951736, + "outcome": "passed" + }, + "call": { + "duration": 0.9360042089829221, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00020483299158513546, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]", + "lineno": 91, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-maverick-instruct-basic-saturn", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", + "case_id": "saturn" + }, + "setup": { + "duration": 0.046697250101715326, + "outcome": "passed" + }, + "call": { + "duration": 0.668349124956876, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0005031249020248652, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", + "lineno": 115, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama-v3p3-70b-instruct-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", + "case_id": "case0" + }, + "setup": { + "duration": 0.012287458986975253, + "outcome": "passed" + }, + "call": { + "duration": 0.00015287497080862522, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 124, 'Skipped: Skipping test_chat_non_streaming_image for model accounts/fireworks/models/llama-v3p3-70b-instruct on provider fireworks based on config.')" + }, + "teardown": { + "duration": 0.00012162502389401197, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", + "lineno": 115, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-scout-instruct-basic-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-scout-instruct-basic", + "case_id": "case0" + }, + "setup": { + "duration": 0.007204124936833978, + "outcome": "passed" + }, + "call": { + "duration": 1.8676417920505628, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0001557499635964632, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", + "lineno": 115, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-maverick-instruct-basic-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", + "case_id": "case0" + }, + "setup": { + "duration": 0.008226625039242208, + "outcome": "passed" + }, + "call": { + "duration": 3.2724285409785807, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0002898330567404628, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", + "lineno": 134, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama-v3p3-70b-instruct-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", + "case_id": "case0" + }, + "setup": { + "duration": 0.011927249957807362, + "outcome": "passed" + }, + "call": { + "duration": 0.00017358292825520039, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 143, 'Skipped: Skipping test_chat_streaming_image for model accounts/fireworks/models/llama-v3p3-70b-instruct on provider fireworks based on config.')" + }, + "teardown": { + "duration": 0.00014037499204277992, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", + "lineno": 134, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-scout-instruct-basic-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-scout-instruct-basic", + "case_id": "case0" + }, + "setup": { + "duration": 0.008731417008675635, + "outcome": "passed" + }, + "call": { + "duration": 2.8333610829431564, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0005132080987095833, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", + "lineno": 134, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-maverick-instruct-basic-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", + "case_id": "case0" + }, + "setup": { + "duration": 0.016569208004511893, + "outcome": "passed" + }, + "call": { + "duration": 2.302010750048794, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00016108399722725153, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]", + "lineno": 158, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama-v3p3-70b-instruct-calendar", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", + "case_id": "calendar" + }, + "setup": { + "duration": 0.039960999973118305, + "outcome": "passed" + }, + "call": { + "duration": 7.661373125039972, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00015833403449505568, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]", + "lineno": 158, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama-v3p3-70b-instruct-math", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", + "case_id": "math" + }, + "setup": { + "duration": 0.006928625050932169, + "outcome": "passed" + }, + "call": { + "duration": 2.762534625013359, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0006561250193044543, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]", + "lineno": 158, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-scout-instruct-basic-calendar", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-scout-instruct-basic", + "case_id": "calendar" + }, + "setup": { + "duration": 0.008602249901741743, + "outcome": "passed" + }, + "call": { + "duration": 0.8311484589939937, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0005021670367568731, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]", + "lineno": 158, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-scout-instruct-basic-math", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-scout-instruct-basic", + "case_id": "math" + }, + "setup": { + "duration": 0.015500334091484547, + "outcome": "passed" + }, + "call": { + "duration": 2.505719291046262, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0002619170118123293, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]", + "lineno": 158, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-maverick-instruct-basic-calendar", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", + "case_id": "calendar" + }, + "setup": { + "duration": 0.01948041608557105, + "outcome": "passed" + }, + "call": { + "duration": 0.6336237500654534, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00016637507360428572, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]", + "lineno": 158, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-maverick-instruct-basic-math", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", + "case_id": "math" + }, + "setup": { + "duration": 0.006810749997384846, + "outcome": "passed" + }, + "call": { + "duration": 1.9086956249084324, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00018824997823685408, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]", + "lineno": 181, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama-v3p3-70b-instruct-calendar", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", + "case_id": "calendar" + }, + "setup": { + "duration": 0.007881582947447896, + "outcome": "passed" + }, + "call": { + "duration": 0.7142562499502674, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0007035828894004226, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]", + "lineno": 181, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama-v3p3-70b-instruct-math", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", + "case_id": "math" + }, + "setup": { + "duration": 0.00848070892971009, + "outcome": "passed" + }, + "call": { + "duration": 1.5210869159782305, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00021216599270701408, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]", + "lineno": 181, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-scout-instruct-basic-calendar", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-scout-instruct-basic", + "case_id": "calendar" + }, + "setup": { + "duration": 0.009669666993431747, + "outcome": "passed" + }, + "call": { + "duration": 1.3105999580584466, + "outcome": "passed" + }, + "teardown": { + "duration": 0.000588166993111372, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]", + "lineno": 181, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-scout-instruct-basic-math", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-scout-instruct-basic", + "case_id": "math" + }, + "setup": { + "duration": 0.007745541981421411, + "outcome": "passed" + }, + "call": { + "duration": 3.250162083073519, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0001455000601708889, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]", + "lineno": 181, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-maverick-instruct-basic-calendar", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", + "case_id": "calendar" + }, + "setup": { + "duration": 0.009726207936182618, + "outcome": "passed" + }, + "call": { + "duration": 0.5564592910232022, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00019470800179988146, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]", + "lineno": 181, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-maverick-instruct-basic-math", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", + "case_id": "math" + }, + "setup": { + "duration": 0.018431040924042463, + "outcome": "passed" + }, + "call": { + "duration": 3.8501765420660377, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00015279196668416262, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", + "lineno": 203, + "outcome": "failed", + "keywords": [ + "test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama-v3p3-70b-instruct-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", + "case_id": "case0" + }, + "setup": { + "duration": 0.007509749964810908, + "outcome": "passed" + }, + "call": { + "duration": 0.4906975000631064, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", + "lineno": 222, + "message": "TypeError: object of type 'NoneType' has no len()" + }, + "traceback": [ + { + "path": "tests/verifications/openai_api/test_chat_completion.py", + "lineno": 222, + "message": "TypeError" + } + ], + "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert len(response.choices[0].message.tool_calls) > 0\nE TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:222: TypeError" + }, + "teardown": { + "duration": 0.00023995805531740189, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", + "lineno": 203, + "outcome": "failed", + "keywords": [ + "test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-scout-instruct-basic-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-scout-instruct-basic", + "case_id": "case0" + }, + "setup": { + "duration": 0.007144959061406553, + "outcome": "passed" + }, + "call": { + "duration": 3.818257624981925, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", + "lineno": 222, + "message": "TypeError: object of type 'NoneType' has no len()" + }, + "traceback": [ + { + "path": "tests/verifications/openai_api/test_chat_completion.py", + "lineno": 222, + "message": "TypeError" + } + ], + "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert len(response.choices[0].message.tool_calls) > 0\nE TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:222: TypeError" + }, + "teardown": { + "duration": 0.0002668750239536166, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", + "lineno": 203, + "outcome": "failed", + "keywords": [ + "test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", + "parametrize", + "pytestmark", + "accounts/fireworks/models/llama4-maverick-instruct-basic-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", + "case_id": "case0" + }, + "setup": { + "duration": 0.015290249953977764, + "outcome": "passed" + }, + "call": { + "duration": 1.5883799999719486, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", + "lineno": 222, + "message": "TypeError: object of type 'NoneType' has no len()" + }, + "traceback": [ + { + "path": "tests/verifications/openai_api/test_chat_completion.py", + "lineno": 222, + "message": "TypeError" + } + ], + "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert len(response.choices[0].message.tool_calls) > 0\nE TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:222: TypeError" + }, + "teardown": { + "duration": 0.0008049579337239265, + "outcome": "passed" + } + } + ] +} diff --git a/tests/verifications/test_results/openai_1744154522.json b/tests/verifications/test_results/openai_1744154522.json deleted file mode 100644 index 310f3500d..000000000 --- a/tests/verifications/test_results/openai_1744154522.json +++ /dev/null @@ -1,2672 +0,0 @@ -{ - "created": 1744154576.251519, - "duration": 51.50739002227783, - "exitcode": 0, - "root": "/Users/erichuang/projects/llama-stack", - "environment": {}, - "summary": { - "skipped": 61, - "passed": 22, - "total": 83, - "collected": 83 - }, - "collectors": [ - { - "nodeid": "", - "outcome": "passed", - "result": [ - { - "nodeid": "tests/verifications/openai/test_chat_completion.py", - "type": "Module" - } - ] - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py", - "outcome": "passed", - "result": [ - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o-mini]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o-mini]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o-mini]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o-mini]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 60 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 60 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 60 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 60 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o]", - "type": "Function", - "lineno": 60 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o-mini]", - "type": "Function", - "lineno": 60 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 75 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 75 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 75 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 75 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o]", - "type": "Function", - "lineno": 75 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o-mini]", - "type": "Function", - "lineno": 75 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o-mini]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o-mini]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o-mini]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o-mini]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 138 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 138 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 138 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 138 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 138 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o]", - "type": "Function", - "lineno": 138 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o-mini]", - "type": "Function", - "lineno": 138 - } - ] - } - ], - "tests": [ - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.0531630830373615, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.0001657919492572546, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.006063499953597784, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-3.3-70B-Instruct')" - }, - "teardown": { - "duration": 0.00014004099648445845, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.005356832989491522, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.00016508297994732857, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.006139832898043096, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E-Instruct')" - }, - "teardown": { - "duration": 0.00014450005255639553, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.00542324990965426, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.00014112505596131086, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.004965625004842877, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E-Instruct')" - }, - "teardown": { - "duration": 0.00013720791321247816, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o]", - "lineno": 25, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-gpt-4o]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.005054084002040327, - "outcome": "passed" - }, - "call": { - "duration": 0.6271341659594327, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00043925002682954073, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o-mini]", - "lineno": 25, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.0159178749890998, - "outcome": "passed" - }, - "call": { - "duration": 0.44088316697161645, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0006467089988291264, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.016705541987903416, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.0005769169656559825, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.012067249976098537, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-3.3-70B-Instruct')" - }, - "teardown": { - "duration": 0.00016683305148035288, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.009295083000324667, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.00017204193864017725, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.009534333017654717, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E-Instruct')" - }, - "teardown": { - "duration": 0.00020175008103251457, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.006628665956668556, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.0003687090938910842, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.0061322919791564345, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E-Instruct')" - }, - "teardown": { - "duration": 0.0003664169926196337, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o]", - "lineno": 25, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-gpt-4o]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.00623433303553611, - "outcome": "passed" - }, - "call": { - "duration": 0.7898445830214769, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0006602079374715686, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o-mini]", - "lineno": 25, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.014758958015590906, - "outcome": "passed" - }, - "call": { - "duration": 1.1555478329537436, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0011781250359490514, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.03454475000035018, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.000967124942690134, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.025206666090525687, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-3.3-70B-Instruct')" - }, - "teardown": { - "duration": 0.000189624959602952, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.014331333106383681, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.00023133307695388794, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.009339665994048119, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E-Instruct')" - }, - "teardown": { - "duration": 0.00020329200197011232, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.010387042071670294, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.00018254201859235764, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.012297999928705394, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E-Instruct')" - }, - "teardown": { - "duration": 0.00018662505317479372, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o]", - "lineno": 40, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_basic[input_output0-gpt-4o]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.006984042003750801, - "outcome": "passed" - }, - "call": { - "duration": 0.32529433304443955, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0033042499562725425, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o-mini]", - "lineno": 40, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_basic[input_output0-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.01832079200539738, - "outcome": "passed" - }, - "call": { - "duration": 0.48440287495031953, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00047233293298631907, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.02893691696226597, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.0001747499918565154, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.006553041050210595, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-3.3-70B-Instruct')" - }, - "teardown": { - "duration": 0.00016829196829348803, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.013746666954830289, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.00019237503875046968, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.007175332983024418, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E-Instruct')" - }, - "teardown": { - "duration": 0.0001873329747468233, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.006127291941083968, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.00019004102796316147, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.006421791040338576, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E-Instruct')" - }, - "teardown": { - "duration": 0.0001611249754205346, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o]", - "lineno": 40, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_basic[input_output1-gpt-4o]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.009806249989196658, - "outcome": "passed" - }, - "call": { - "duration": 0.9556747920578346, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0004937920020893216, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o-mini]", - "lineno": 40, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_basic[input_output1-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.03146500000730157, - "outcome": "passed" - }, - "call": { - "duration": 1.082494750036858, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0006242080125957727, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E]", - "lineno": 60, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.021534667001105845, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.0003469999646767974, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 60, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.025929750059731305, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E-Instruct')" - }, - "teardown": { - "duration": 0.0008774169255048037, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", - "lineno": 60, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.012507125036790967, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.00022008304949849844, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 60, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.008156375028192997, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E-Instruct')" - }, - "teardown": { - "duration": 0.0002079169498756528, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o]", - "lineno": 60, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_image[input_output0-gpt-4o]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.012587749981321394, - "outcome": "passed" - }, - "call": { - "duration": 2.7379885419504717, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00044579198583960533, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o-mini]", - "lineno": 60, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_image[input_output0-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.017111250082962215, - "outcome": "passed" - }, - "call": { - "duration": 2.599374584038742, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0009177909232676029, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E]", - "lineno": 75, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.02198700001463294, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.00042749999556690454, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 75, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.015032917028293014, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E-Instruct')" - }, - "teardown": { - "duration": 0.00041016703471541405, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", - "lineno": 75, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.013976250076666474, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.00027600000612437725, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 75, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.00799729092977941, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E-Instruct')" - }, - "teardown": { - "duration": 0.00020320899784564972, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o]", - "lineno": 75, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_image[input_output0-gpt-4o]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.010483540943823755, - "outcome": "passed" - }, - "call": { - "duration": 4.249965250026435, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0008596250554546714, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o-mini]", - "lineno": 75, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_image[input_output0-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.018141582957468927, - "outcome": "passed" - }, - "call": { - "duration": 2.297856790944934, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0005075830267742276, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.017144332989118993, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.0006829580524936318, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.009827250032685697, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-3.3-70B-Instruct')" - }, - "teardown": { - "duration": 0.00024204188957810402, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.006737958989106119, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.00022729102056473494, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.006030917051248252, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E-Instruct')" - }, - "teardown": { - "duration": 0.00022229203023016453, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.009183833957649767, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.00022629194427281618, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.007097500027157366, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E-Instruct')" - }, - "teardown": { - "duration": 0.00826825003605336, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-gpt-4o]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.006604874972254038, - "outcome": "passed" - }, - "call": { - "duration": 1.4057738750707358, - "outcome": "passed" - }, - "teardown": { - "duration": 0.000506040989421308, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o-mini]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.015966624952852726, - "outcome": "passed" - }, - "call": { - "duration": 0.540478374925442, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0009536249563097954, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.020631707971915603, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.0004928340204060078, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.016745459055528045, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-3.3-70B-Instruct')" - }, - "teardown": { - "duration": 0.0003412909572944045, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.012252667103894055, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.00028650008607655764, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.01128904102370143, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E-Instruct')" - }, - "teardown": { - "duration": 0.00027041707653552294, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.009191332967020571, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.0002074999501928687, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.007687666919082403, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E-Instruct')" - }, - "teardown": { - "duration": 0.0002027079463005066, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-gpt-4o]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.007542708073742688, - "outcome": "passed" - }, - "call": { - "duration": 4.244797708000988, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0012778330128639936, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o-mini]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.026919999974779785, - "outcome": "passed" - }, - "call": { - "duration": 9.006108874920756, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00046324997674673796, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.01554666692391038, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.0004023330984637141, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.007354958914220333, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-3.3-70B-Instruct')" - }, - "teardown": { - "duration": 0.0002900830004364252, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.017274250043556094, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.0002668329980224371, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.006813667016103864, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E-Instruct')" - }, - "teardown": { - "duration": 0.00024500000290572643, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.007385291974060237, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.00017024995759129524, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.00857366609852761, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E-Instruct')" - }, - "teardown": { - "duration": 0.00016850000247359276, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o]", - "lineno": 117, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-gpt-4o]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.005570041947066784, - "outcome": "passed" - }, - "call": { - "duration": 0.8564215000951663, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0004029169213026762, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o-mini]", - "lineno": 117, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.00786762498319149, - "outcome": "passed" - }, - "call": { - "duration": 0.6419672920601442, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0005102079594507813, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.017147499951533973, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.00032350001856684685, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.01194737502373755, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-3.3-70B-Instruct')" - }, - "teardown": { - "duration": 0.0005004579434171319, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.010250666993670166, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.00022554199676960707, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.007847042055800557, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E-Instruct')" - }, - "teardown": { - "duration": 0.000283458037301898, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.008078000042587519, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.0001794169656932354, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.007204750087112188, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E-Instruct')" - }, - "teardown": { - "duration": 0.00017725001089274883, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o]", - "lineno": 117, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-gpt-4o]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.006797667010687292, - "outcome": "passed" - }, - "call": { - "duration": 5.411579457926564, - "outcome": "passed" - }, - "teardown": { - "duration": 0.001134666963480413, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o-mini]", - "lineno": 117, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.025059624924324453, - "outcome": "passed" - }, - "call": { - "duration": 9.112342999898829, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0009202499641105533, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-3.3-70B-Instruct]", - "lineno": 138, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_tool_calling[input_output0-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.024287916952744126, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider openai does not support model Llama-3.3-70B-Instruct')" - }, - "teardown": { - "duration": 0.00015587499365210533, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E]", - "lineno": 138, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.006531457998789847, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.00014670798555016518, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 138, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.006190375075675547, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider openai does not support model Llama-4-Scout-17B-16E-Instruct')" - }, - "teardown": { - "duration": 0.0001603750279173255, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E]", - "lineno": 138, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.005670750048011541, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.0001479999627918005, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 138, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.005662833107635379, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider openai does not support model Llama-4-Maverick-17B-128E-Instruct')" - }, - "teardown": { - "duration": 0.0001480829669162631, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o]", - "lineno": 138, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_tool_calling[input_output0-gpt-4o]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.00573637499473989, - "outcome": "passed" - }, - "call": { - "duration": 0.6269576249178499, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0010142088867723942, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o-mini]", - "lineno": 138, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_tool_calling[input_output0-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.01623620803002268, - "outcome": "passed" - }, - "call": { - "duration": 0.7144521250156686, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0011040839599445462, - "outcome": "passed" - } - } - ] -} diff --git a/tests/verifications/test_results/openai_1744264304.json b/tests/verifications/test_results/openai_1744264304.json new file mode 100644 index 000000000..fe9c2fcac --- /dev/null +++ b/tests/verifications/test_results/openai_1744264304.json @@ -0,0 +1,868 @@ +{ + "created": 1744264338.9923031, + "duration": 32.825536012649536, + "exitcode": 0, + "root": "/Users/erichuang/projects/llama-stack", + "environment": {}, + "summary": { + "passed": 22, + "total": 22, + "collected": 22 + }, + "collectors": [ + { + "nodeid": "", + "outcome": "passed", + "result": [ + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py", + "type": "Module" + } + ] + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py", + "outcome": "passed", + "result": [ + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-earth]", + "type": "Function", + "lineno": 72 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-saturn]", + "type": "Function", + "lineno": 72 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-mini-earth]", + "type": "Function", + "lineno": 72 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-mini-saturn]", + "type": "Function", + "lineno": 72 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-earth]", + "type": "Function", + "lineno": 91 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-saturn]", + "type": "Function", + "lineno": 91 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-mini-earth]", + "type": "Function", + "lineno": 91 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-mini-saturn]", + "type": "Function", + "lineno": 91 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[gpt-4o-case0]", + "type": "Function", + "lineno": 115 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[gpt-4o-mini-case0]", + "type": "Function", + "lineno": 115 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[gpt-4o-case0]", + "type": "Function", + "lineno": 134 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[gpt-4o-mini-case0]", + "type": "Function", + "lineno": 134 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-calendar]", + "type": "Function", + "lineno": 158 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-math]", + "type": "Function", + "lineno": 158 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-mini-calendar]", + "type": "Function", + "lineno": 158 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-mini-math]", + "type": "Function", + "lineno": 158 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-calendar]", + "type": "Function", + "lineno": 181 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-math]", + "type": "Function", + "lineno": 181 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-mini-calendar]", + "type": "Function", + "lineno": 181 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-mini-math]", + "type": "Function", + "lineno": 181 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[gpt-4o-case0]", + "type": "Function", + "lineno": 203 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[gpt-4o-mini-case0]", + "type": "Function", + "lineno": 203 + } + ] + } + ], + "tests": [ + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-earth]", + "lineno": 72, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[gpt-4o-earth]", + "parametrize", + "pytestmark", + "gpt-4o-earth", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o", + "case_id": "earth" + }, + "setup": { + "duration": 0.05381445901002735, + "outcome": "passed" + }, + "call": { + "duration": 0.49848275003023446, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00018287496641278267, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-saturn]", + "lineno": 72, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[gpt-4o-saturn]", + "parametrize", + "pytestmark", + "gpt-4o-saturn", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o", + "case_id": "saturn" + }, + "setup": { + "duration": 0.007965500000864267, + "outcome": "passed" + }, + "call": { + "duration": 0.9293275829404593, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00018229195848107338, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-mini-earth]", + "lineno": 72, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[gpt-4o-mini-earth]", + "parametrize", + "pytestmark", + "gpt-4o-mini-earth", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o-mini", + "case_id": "earth" + }, + "setup": { + "duration": 0.00875679193995893, + "outcome": "passed" + }, + "call": { + "duration": 0.5793640419142321, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0005307920509949327, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-mini-saturn]", + "lineno": 72, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[gpt-4o-mini-saturn]", + "parametrize", + "pytestmark", + "gpt-4o-mini-saturn", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o-mini", + "case_id": "saturn" + }, + "setup": { + "duration": 0.01076845801435411, + "outcome": "passed" + }, + "call": { + "duration": 0.8752291660057381, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0004834589781239629, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-earth]", + "lineno": 91, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_basic[gpt-4o-earth]", + "parametrize", + "pytestmark", + "gpt-4o-earth", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o", + "case_id": "earth" + }, + "setup": { + "duration": 0.01662245800253004, + "outcome": "passed" + }, + "call": { + "duration": 0.8336971249664202, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0024086670018732548, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-saturn]", + "lineno": 91, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_basic[gpt-4o-saturn]", + "parametrize", + "pytestmark", + "gpt-4o-saturn", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o", + "case_id": "saturn" + }, + "setup": { + "duration": 0.009416291955858469, + "outcome": "passed" + }, + "call": { + "duration": 0.43594495789147913, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0009131249971687794, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-mini-earth]", + "lineno": 91, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_basic[gpt-4o-mini-earth]", + "parametrize", + "pytestmark", + "gpt-4o-mini-earth", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o-mini", + "case_id": "earth" + }, + "setup": { + "duration": 0.013155042077414691, + "outcome": "passed" + }, + "call": { + "duration": 0.6119836670113727, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00023804197553545237, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-mini-saturn]", + "lineno": 91, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_basic[gpt-4o-mini-saturn]", + "parametrize", + "pytestmark", + "gpt-4o-mini-saturn", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o-mini", + "case_id": "saturn" + }, + "setup": { + "duration": 0.009004916995763779, + "outcome": "passed" + }, + "call": { + "duration": 0.8327413749648258, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00046841695439070463, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[gpt-4o-case0]", + "lineno": 115, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_image[gpt-4o-case0]", + "parametrize", + "pytestmark", + "gpt-4o-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o", + "case_id": "case0" + }, + "setup": { + "duration": 0.009574208059348166, + "outcome": "passed" + }, + "call": { + "duration": 2.221839000005275, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00015945907216519117, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[gpt-4o-mini-case0]", + "lineno": 115, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_image[gpt-4o-mini-case0]", + "parametrize", + "pytestmark", + "gpt-4o-mini-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o-mini", + "case_id": "case0" + }, + "setup": { + "duration": 0.0084402080392465, + "outcome": "passed" + }, + "call": { + "duration": 2.298736457945779, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0002423750702291727, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[gpt-4o-case0]", + "lineno": 134, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_image[gpt-4o-case0]", + "parametrize", + "pytestmark", + "gpt-4o-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o", + "case_id": "case0" + }, + "setup": { + "duration": 0.007330416003242135, + "outcome": "passed" + }, + "call": { + "duration": 4.062959833070636, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00015470804646611214, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[gpt-4o-mini-case0]", + "lineno": 134, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_image[gpt-4o-mini-case0]", + "parametrize", + "pytestmark", + "gpt-4o-mini-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o-mini", + "case_id": "case0" + }, + "setup": { + "duration": 0.019998832955025136, + "outcome": "passed" + }, + "call": { + "duration": 2.609432084020227, + "outcome": "passed" + }, + "teardown": { + "duration": 0.005618917057290673, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-calendar]", + "lineno": 158, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[gpt-4o-calendar]", + "parametrize", + "pytestmark", + "gpt-4o-calendar", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o", + "case_id": "calendar" + }, + "setup": { + "duration": 0.00867662497330457, + "outcome": "passed" + }, + "call": { + "duration": 0.6856697499752045, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00018445902969688177, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-math]", + "lineno": 158, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[gpt-4o-math]", + "parametrize", + "pytestmark", + "gpt-4o-math", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o", + "case_id": "math" + }, + "setup": { + "duration": 0.01139050000347197, + "outcome": "passed" + }, + "call": { + "duration": 2.764390083961189, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0003164170775562525, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-mini-calendar]", + "lineno": 158, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[gpt-4o-mini-calendar]", + "parametrize", + "pytestmark", + "gpt-4o-mini-calendar", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o-mini", + "case_id": "calendar" + }, + "setup": { + "duration": 0.01321374997496605, + "outcome": "passed" + }, + "call": { + "duration": 0.8284227909753099, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00030170800164341927, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-mini-math]", + "lineno": 158, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[gpt-4o-mini-math]", + "parametrize", + "pytestmark", + "gpt-4o-mini-math", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o-mini", + "case_id": "math" + }, + "setup": { + "duration": 0.013477458036504686, + "outcome": "passed" + }, + "call": { + "duration": 2.4146235829684883, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00025754200760275126, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-calendar]", + "lineno": 181, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_structured_output[gpt-4o-calendar]", + "parametrize", + "pytestmark", + "gpt-4o-calendar", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o", + "case_id": "calendar" + }, + "setup": { + "duration": 0.006940583931282163, + "outcome": "passed" + }, + "call": { + "duration": 0.5102092920569703, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00023379107005894184, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-math]", + "lineno": 181, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_structured_output[gpt-4o-math]", + "parametrize", + "pytestmark", + "gpt-4o-math", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o", + "case_id": "math" + }, + "setup": { + "duration": 0.007166999974288046, + "outcome": "passed" + }, + "call": { + "duration": 3.5751801669830456, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00015041697770357132, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-mini-calendar]", + "lineno": 181, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_structured_output[gpt-4o-mini-calendar]", + "parametrize", + "pytestmark", + "gpt-4o-mini-calendar", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o-mini", + "case_id": "calendar" + }, + "setup": { + "duration": 0.010652625001966953, + "outcome": "passed" + }, + "call": { + "duration": 0.6648182499920949, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0008647920330986381, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-mini-math]", + "lineno": 181, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_structured_output[gpt-4o-mini-math]", + "parametrize", + "pytestmark", + "gpt-4o-mini-math", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o-mini", + "case_id": "math" + }, + "setup": { + "duration": 0.007372208056040108, + "outcome": "passed" + }, + "call": { + "duration": 2.80747462506406, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00028124998789280653, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[gpt-4o-case0]", + "lineno": 203, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_tool_calling[gpt-4o-case0]", + "parametrize", + "pytestmark", + "gpt-4o-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o", + "case_id": "case0" + }, + "setup": { + "duration": 0.01625587500166148, + "outcome": "passed" + }, + "call": { + "duration": 0.6878769160248339, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0002637499710544944, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[gpt-4o-mini-case0]", + "lineno": 203, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_tool_calling[gpt-4o-mini-case0]", + "parametrize", + "pytestmark", + "gpt-4o-mini-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "gpt-4o-mini", + "case_id": "case0" + }, + "setup": { + "duration": 0.008817250025458634, + "outcome": "passed" + }, + "call": { + "duration": 0.7181202919455245, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0017147079342976213, + "outcome": "passed" + } + } + ] +} diff --git a/tests/verifications/test_results/together_1744154399.json b/tests/verifications/test_results/together_1744154399.json deleted file mode 100644 index ae801e83b..000000000 --- a/tests/verifications/test_results/together_1744154399.json +++ /dev/null @@ -1,2830 +0,0 @@ -{ - "created": 1744154470.9868789, - "duration": 59.6187219619751, - "exitcode": 1, - "root": "/Users/erichuang/projects/llama-stack", - "environment": {}, - "summary": { - "skipped": 52, - "passed": 21, - "failed": 10, - "total": 83, - "collected": 83 - }, - "collectors": [ - { - "nodeid": "", - "outcome": "passed", - "result": [ - { - "nodeid": "tests/verifications/openai/test_chat_completion.py", - "type": "Module" - } - ] - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py", - "outcome": "passed", - "result": [ - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o-mini]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o-mini]", - "type": "Function", - "lineno": 25 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o-mini]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o-mini]", - "type": "Function", - "lineno": 40 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 60 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 60 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 60 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 60 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o]", - "type": "Function", - "lineno": 60 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o-mini]", - "type": "Function", - "lineno": 60 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 75 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 75 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 75 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 75 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o]", - "type": "Function", - "lineno": 75 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o-mini]", - "type": "Function", - "lineno": 75 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o-mini]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o-mini]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o-mini]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o-mini]", - "type": "Function", - "lineno": 117 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-3.3-70B-Instruct]", - "type": "Function", - "lineno": 138 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E]", - "type": "Function", - "lineno": 138 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "type": "Function", - "lineno": 138 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E]", - "type": "Function", - "lineno": 138 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "type": "Function", - "lineno": 138 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o]", - "type": "Function", - "lineno": 138 - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o-mini]", - "type": "Function", - "lineno": 138 - } - ] - } - ], - "tests": [ - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.39231995795853436, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider together does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.0002014160854741931, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", - "lineno": 25, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.0071710830088704824, - "outcome": "passed" - }, - "call": { - "duration": 0.7968309168936685, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0004362498875707388, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.012780916062183678, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider together does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.00029158301185816526, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 25, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.013563874992541969, - "outcome": "passed" - }, - "call": { - "duration": 0.5071627920260653, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0005456249928101897, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.020708917058072984, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider together does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.00030325003899633884, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 25, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.014170082984492183, - "outcome": "passed" - }, - "call": { - "duration": 1.2383921250002459, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0009597090538591146, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-gpt-4o]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.013402250013314188, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider together does not support model gpt-4o')" - }, - "teardown": { - "duration": 0.00028245802968740463, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output0-gpt-4o-mini]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output0-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.008693707990460098, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider together does not support model gpt-4o-mini')" - }, - "teardown": { - "duration": 0.00016249995678663254, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.005904874997213483, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider together does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.0001960420049726963, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", - "lineno": 25, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.006532749976031482, - "outcome": "passed" - }, - "call": { - "duration": 0.5410778749501333, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00019516597967594862, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.009374375105835497, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider together does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.00015524995978921652, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 25, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.007205875008367002, - "outcome": "passed" - }, - "call": { - "duration": 0.42584729101508856, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0009506250498816371, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.029625958995893598, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider together does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.0001860830234363675, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 25, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.023576707928441465, - "outcome": "passed" - }, - "call": { - "duration": 1.2249365829629824, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0004278330598026514, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-gpt-4o]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.014816291979514062, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider together does not support model gpt-4o')" - }, - "teardown": { - "duration": 0.00029558304231613874, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_basic[input_output1-gpt-4o-mini]", - "lineno": 25, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_basic[input_output1-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.012769333901815116, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 26, 'Skipped: Provider together does not support model gpt-4o-mini')" - }, - "teardown": { - "duration": 0.00024329195730388165, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output0-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.009145625052042305, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider together does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.00021195888984948397, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", - "lineno": 40, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_basic[input_output0-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.0133140409598127, - "outcome": "passed" - }, - "call": { - "duration": 0.7228892090497538, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0004301250446587801, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.013998750015161932, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider together does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.0002961249556392431, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 40, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_basic[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.012570249964483082, - "outcome": "passed" - }, - "call": { - "duration": 0.7193170419195667, - "outcome": "failed", - "crash": { - "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py", - "lineno": 54, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai/test_chat_completion.py", - "lineno": 54, - "message": "IndexError" - } - ], - "longrepr": "openai_client = \ninput_output = {'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\ncorrect_model_name = 'meta-llama/Llama-4-Scout-17B-16E-Instruct'\n\n @pytest.mark.parametrize(\"model\", chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"model\"])\n @pytest.mark.parametrize(\n \"input_output\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"input_output\"],\n )\n def test_chat_streaming_basic(openai_client, input_output, correct_model_name):\n response = openai_client.chat.completions.create(\n model=correct_model_name,\n messages=input_output[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai/test_chat_completion.py:54: IndexError" - }, - "teardown": { - "duration": 0.00022504094522446394, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.006660082959569991, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider together does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.0001445829402655363, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 40, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_basic[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.021228999947197735, - "outcome": "passed" - }, - "call": { - "duration": 1.5670281670754775, - "outcome": "failed", - "crash": { - "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py", - "lineno": 54, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai/test_chat_completion.py", - "lineno": 54, - "message": "IndexError" - } - ], - "longrepr": "openai_client = \ninput_output = {'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\ncorrect_model_name = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\n\n @pytest.mark.parametrize(\"model\", chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"model\"])\n @pytest.mark.parametrize(\n \"input_output\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"input_output\"],\n )\n def test_chat_streaming_basic(openai_client, input_output, correct_model_name):\n response = openai_client.chat.completions.create(\n model=correct_model_name,\n messages=input_output[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai/test_chat_completion.py:54: IndexError" - }, - "teardown": { - "duration": 0.0004656669916585088, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output0-gpt-4o]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.009595917072147131, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider together does not support model gpt-4o')" - }, - "teardown": { - "duration": 0.00025625003036111593, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output0-gpt-4o-mini]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output0-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.009242708911187947, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider together does not support model gpt-4o-mini')" - }, - "teardown": { - "duration": 0.0002484159776940942, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output1-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.00905474997125566, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider together does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.00023312494158744812, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", - "lineno": 40, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_basic[input_output1-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.007183165987953544, - "outcome": "passed" - }, - "call": { - "duration": 1.0667660840554163, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0005163750611245632, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.05233616603072733, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider together does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.0003471659729257226, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 40, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_basic[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.015932541922666132, - "outcome": "passed" - }, - "call": { - "duration": 0.41540695796720684, - "outcome": "failed", - "crash": { - "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py", - "lineno": 54, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai/test_chat_completion.py", - "lineno": 54, - "message": "IndexError" - } - ], - "longrepr": "openai_client = \ninput_output = {'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\ncorrect_model_name = 'meta-llama/Llama-4-Scout-17B-16E-Instruct'\n\n @pytest.mark.parametrize(\"model\", chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"model\"])\n @pytest.mark.parametrize(\n \"input_output\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"input_output\"],\n )\n def test_chat_streaming_basic(openai_client, input_output, correct_model_name):\n response = openai_client.chat.completions.create(\n model=correct_model_name,\n messages=input_output[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai/test_chat_completion.py:54: IndexError" - }, - "teardown": { - "duration": 0.0002845840062946081, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.007243875064887106, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider together does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.00016258296091109514, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 40, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_basic[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.009275624994188547, - "outcome": "passed" - }, - "call": { - "duration": 1.43309554096777, - "outcome": "failed", - "crash": { - "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py", - "lineno": 54, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai/test_chat_completion.py", - "lineno": 54, - "message": "IndexError" - } - ], - "longrepr": "openai_client = \ninput_output = {'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\ncorrect_model_name = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\n\n @pytest.mark.parametrize(\"model\", chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"model\"])\n @pytest.mark.parametrize(\n \"input_output\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"input_output\"],\n )\n def test_chat_streaming_basic(openai_client, input_output, correct_model_name):\n response = openai_client.chat.completions.create(\n model=correct_model_name,\n messages=input_output[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai/test_chat_completion.py:54: IndexError" - }, - "teardown": { - "duration": 0.0003690000157803297, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output1-gpt-4o]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.011570582981221378, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider together does not support model gpt-4o')" - }, - "teardown": { - "duration": 0.00024937500711530447, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_basic[input_output1-gpt-4o-mini]", - "lineno": 40, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_basic[input_output1-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.010756584000773728, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 41, 'Skipped: Provider together does not support model gpt-4o-mini')" - }, - "teardown": { - "duration": 0.00026183295994997025, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E]", - "lineno": 60, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.008863041992299259, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider together does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.00023283297196030617, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 60, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.007975792046636343, - "outcome": "passed" - }, - "call": { - "duration": 2.1585817909799516, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0005107080796733499, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", - "lineno": 60, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.05228079203516245, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider together does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.0017226670170202851, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 60, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.009964749915525317, - "outcome": "passed" - }, - "call": { - "duration": 4.6593364590080455, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0009852920193225145, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o]", - "lineno": 60, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_image[input_output0-gpt-4o]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.023214041953906417, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider together does not support model gpt-4o')" - }, - "teardown": { - "duration": 0.0003567079547792673, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_image[input_output0-gpt-4o-mini]", - "lineno": 60, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_image[input_output0-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.01705008395947516, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 61, 'Skipped: Provider together does not support model gpt-4o-mini')" - }, - "teardown": { - "duration": 0.0003085409989580512, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E]", - "lineno": 75, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.014711958006955683, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider together does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.0003121249610558152, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 75, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_image[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.01843333407305181, - "outcome": "passed" - }, - "call": { - "duration": 2.8683876669965684, - "outcome": "failed", - "crash": { - "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py", - "lineno": 89, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai/test_chat_completion.py", - "lineno": 89, - "message": "IndexError" - } - ], - "longrepr": "openai_client = \ninput_output = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\ncorrect_model_name = 'meta-llama/Llama-4-Scout-17B-16E-Instruct'\n\n @pytest.mark.parametrize(\"model\", chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"model\"])\n @pytest.mark.parametrize(\n \"input_output\",\n chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"input_output\"],\n )\n def test_chat_streaming_image(openai_client, input_output, correct_model_name):\n response = openai_client.chat.completions.create(\n model=correct_model_name,\n messages=input_output[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai/test_chat_completion.py:89: IndexError" - }, - "teardown": { - "duration": 0.00028662499971687794, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", - "lineno": 75, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.00653208396397531, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider together does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.00021291698794811964, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 75, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_image[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.006028458010405302, - "outcome": "passed" - }, - "call": { - "duration": 4.981105040991679, - "outcome": "failed", - "crash": { - "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py", - "lineno": 89, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai/test_chat_completion.py", - "lineno": 89, - "message": "IndexError" - } - ], - "longrepr": "openai_client = \ninput_output = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\ncorrect_model_name = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\n\n @pytest.mark.parametrize(\"model\", chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"model\"])\n @pytest.mark.parametrize(\n \"input_output\",\n chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"input_output\"],\n )\n def test_chat_streaming_image(openai_client, input_output, correct_model_name):\n response = openai_client.chat.completions.create(\n model=correct_model_name,\n messages=input_output[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai/test_chat_completion.py:89: IndexError" - }, - "teardown": { - "duration": 0.0010110830189660192, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o]", - "lineno": 75, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_image[input_output0-gpt-4o]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.01591233303770423, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider together does not support model gpt-4o')" - }, - "teardown": { - "duration": 0.0003783750580623746, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_image[input_output0-gpt-4o-mini]", - "lineno": 75, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_image[input_output0-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.010691000032238662, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 76, 'Skipped: Provider together does not support model gpt-4o-mini')" - }, - "teardown": { - "duration": 0.00027445796877145767, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.01258529198821634, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider together does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.0002044580178335309, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.010904791066423059, - "outcome": "passed" - }, - "call": { - "duration": 0.8311828339938074, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00048687495291233063, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.029216791968792677, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider together does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.0002269580727443099, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.013182583032175899, - "outcome": "passed" - }, - "call": { - "duration": 1.7446029160637408, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0008087089518085122, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.02009516698308289, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider together does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.000320291961543262, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.015216833096928895, - "outcome": "passed" - }, - "call": { - "duration": 0.8049291669158265, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0005109170451760292, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-gpt-4o]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.0171551660168916, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider together does not support model gpt-4o')" - }, - "teardown": { - "duration": 0.0005707499803975224, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output0-gpt-4o-mini]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output0-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.01131124992389232, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider together does not support model gpt-4o-mini')" - }, - "teardown": { - "duration": 0.0003044159384444356, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.0054290409898385406, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider together does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.00014645792543888092, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.011368000064976513, - "outcome": "passed" - }, - "call": { - "duration": 4.363120499998331, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0003998749889433384, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.04945958300959319, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider together does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.0002401659730821848, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.011090958025306463, - "outcome": "passed" - }, - "call": { - "duration": 4.699277375009842, - "outcome": "passed" - }, - "teardown": { - "duration": 0.000689250067807734, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.020744459005072713, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider together does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.0001836250303313136, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.005926624988205731, - "outcome": "passed" - }, - "call": { - "duration": 2.7814464160474017, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0009554170537739992, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-gpt-4o]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.03027112502604723, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider together does not support model gpt-4o')" - }, - "teardown": { - "duration": 0.0003245410043746233, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_structured_output[input_output1-gpt-4o-mini]", - "lineno": 95, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_structured_output[input_output1-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.009138708002865314, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 96, 'Skipped: Provider together does not support model gpt-4o-mini')" - }, - "teardown": { - "duration": 0.0001919999485835433, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.0064505410846322775, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider together does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.00015720794908702374, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", - "lineno": 117, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.00582624995149672, - "outcome": "passed" - }, - "call": { - "duration": 0.8302567919017747, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00020354206208139658, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.014151416951790452, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider together does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.00034970801789313555, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 117, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.012150791939347982, - "outcome": "passed" - }, - "call": { - "duration": 0.7078855830477551, - "outcome": "failed", - "crash": { - "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py", - "lineno": 135, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai/test_chat_completion.py", - "lineno": 135, - "message": "IndexError" - } - ], - "longrepr": "openai_client = \ninput_output = {'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'content': 'Alice and Bob ar...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\ncorrect_model_name = 'meta-llama/Llama-4-Scout-17B-16E-Instruct'\n\n @pytest.mark.parametrize(\n \"model\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"model\"],\n )\n @pytest.mark.parametrize(\n \"input_output\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"input_output\"],\n )\n def test_chat_streaming_structured_output(openai_client, input_output, correct_model_name):\n response = openai_client.chat.completions.create(\n model=correct_model_name,\n messages=input_output[\"input\"][\"messages\"],\n response_format=input_output[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai/test_chat_completion.py:135: IndexError" - }, - "teardown": { - "duration": 0.0008542909054085612, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.022667833953164518, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider together does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.0006820419803261757, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 117, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.01285991701297462, - "outcome": "passed" - }, - "call": { - "duration": 0.6888671671040356, - "outcome": "failed", - "crash": { - "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py", - "lineno": 135, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai/test_chat_completion.py", - "lineno": 135, - "message": "IndexError" - } - ], - "longrepr": "openai_client = \ninput_output = {'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'content': 'Alice and Bob ar...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\ncorrect_model_name = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\n\n @pytest.mark.parametrize(\n \"model\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"model\"],\n )\n @pytest.mark.parametrize(\n \"input_output\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"input_output\"],\n )\n def test_chat_streaming_structured_output(openai_client, input_output, correct_model_name):\n response = openai_client.chat.completions.create(\n model=correct_model_name,\n messages=input_output[\"input\"][\"messages\"],\n response_format=input_output[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai/test_chat_completion.py:135: IndexError" - }, - "teardown": { - "duration": 0.0007953330641612411, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-gpt-4o]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.015029000001959503, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider together does not support model gpt-4o')" - }, - "teardown": { - "duration": 0.00015666603576391935, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output0-gpt-4o-mini]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output0-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.00622316705994308, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider together does not support model gpt-4o-mini')" - }, - "teardown": { - "duration": 0.0001533749746158719, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-Llama-3.3-8B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-8B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.005598834017291665, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider together does not support model Llama-3.3-8B-Instruct')" - }, - "teardown": { - "duration": 0.00013062497600913048, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", - "lineno": 117, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.005876541952602565, - "outcome": "passed" - }, - "call": { - "duration": 7.561108374968171, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0004579999949783087, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.018791542039252818, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider together does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.0004900830099359155, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 117, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.0065952910808846354, - "outcome": "passed" - }, - "call": { - "duration": 2.6826554159633815, - "outcome": "failed", - "crash": { - "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py", - "lineno": 135, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai/test_chat_completion.py", - "lineno": 135, - "message": "IndexError" - } - ], - "longrepr": "openai_client = \ninput_output = {'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solution step by step.',... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\ncorrect_model_name = 'meta-llama/Llama-4-Scout-17B-16E-Instruct'\n\n @pytest.mark.parametrize(\n \"model\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"model\"],\n )\n @pytest.mark.parametrize(\n \"input_output\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"input_output\"],\n )\n def test_chat_streaming_structured_output(openai_client, input_output, correct_model_name):\n response = openai_client.chat.completions.create(\n model=correct_model_name,\n messages=input_output[\"input\"][\"messages\"],\n response_format=input_output[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai/test_chat_completion.py:135: IndexError" - }, - "teardown": { - "duration": 0.0009669580031186342, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.019489208003506064, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider together does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.0007419160101562738, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 117, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output1-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.012299792026169598, - "outcome": "passed" - }, - "call": { - "duration": 2.829678333015181, - "outcome": "failed", - "crash": { - "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py", - "lineno": 135, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai/test_chat_completion.py", - "lineno": 135, - "message": "IndexError" - } - ], - "longrepr": "openai_client = \ninput_output = {'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solution step by step.',... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\ncorrect_model_name = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\n\n @pytest.mark.parametrize(\n \"model\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"model\"],\n )\n @pytest.mark.parametrize(\n \"input_output\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"input_output\"],\n )\n def test_chat_streaming_structured_output(openai_client, input_output, correct_model_name):\n response = openai_client.chat.completions.create(\n model=correct_model_name,\n messages=input_output[\"input\"][\"messages\"],\n response_format=input_output[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai/test_chat_completion.py:135: IndexError" - }, - "teardown": { - "duration": 0.0010418329620733857, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-gpt-4o]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.016189916990697384, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider together does not support model gpt-4o')" - }, - "teardown": { - "duration": 0.00027966592460870743, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_streaming_structured_output[input_output1-gpt-4o-mini]", - "lineno": 117, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_structured_output[input_output1-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output1-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.010247125057503581, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 118, 'Skipped: Provider together does not support model gpt-4o-mini')" - }, - "teardown": { - "duration": 0.00023291702382266521, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-3.3-70B-Instruct]", - "lineno": 138, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_tool_calling[input_output0-Llama-3.3-70B-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-3.3-70B-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.012632582918740809, - "outcome": "passed" - }, - "call": { - "duration": 0.40774812502786517, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0007319580763578415, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E]", - "lineno": 138, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.019890791969373822, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider together does not support model Llama-4-Scout-17B-16E')" - }, - "teardown": { - "duration": 0.0006391670322045684, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "lineno": 138, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Scout-17B-16E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Scout-17B-16E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.0178165000397712, - "outcome": "passed" - }, - "call": { - "duration": 0.38229950005188584, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0010000420734286308, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E]", - "lineno": 138, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.024259291938506067, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider together does not support model Llama-4-Maverick-17B-128E')" - }, - "teardown": { - "duration": 0.0003602079814299941, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "lineno": 138, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_tool_calling[input_output0-Llama-4-Maverick-17B-128E-Instruct]", - "parametrize", - "pytestmark", - "input_output0-Llama-4-Maverick-17B-128E-Instruct", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.012425708002410829, - "outcome": "passed" - }, - "call": { - "duration": 0.7610744580160826, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0005935420049354434, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o]", - "lineno": 138, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_tool_calling[input_output0-gpt-4o]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.018717541941441596, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider together does not support model gpt-4o')" - }, - "teardown": { - "duration": 0.000659791985526681, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai/test_chat_completion.py::test_chat_non_streaming_tool_calling[input_output0-gpt-4o-mini]", - "lineno": 138, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_tool_calling[input_output0-gpt-4o-mini]", - "parametrize", - "pytestmark", - "input_output0-gpt-4o-mini", - "test_chat_completion.py", - "openai", - "verifications", - "tests", - "llama-stack", - "" - ], - "setup": { - "duration": 0.012784749967977405, - "outcome": "skipped", - "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai/test_chat_completion.py', 139, 'Skipped: Provider together does not support model gpt-4o-mini')" - }, - "teardown": { - "duration": 0.0002145830076187849, - "outcome": "passed" - } - } - ] -} diff --git a/tests/verifications/test_results/together_1744264258.json b/tests/verifications/test_results/together_1744264258.json new file mode 100644 index 000000000..c38dd52b5 --- /dev/null +++ b/tests/verifications/test_results/together_1744264258.json @@ -0,0 +1,1420 @@ +{ + "created": 1744264304.064288, + "duration": 42.470197916030884, + "exitcode": 1, + "root": "/Users/erichuang/projects/llama-stack", + "environment": {}, + "summary": { + "passed": 21, + "failed": 10, + "skipped": 2, + "total": 33, + "collected": 33 + }, + "collectors": [ + { + "nodeid": "", + "outcome": "passed", + "result": [ + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py", + "type": "Module" + } + ] + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py", + "outcome": "passed", + "result": [ + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", + "type": "Function", + "lineno": 72 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", + "type": "Function", + "lineno": 72 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", + "type": "Function", + "lineno": 72 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", + "type": "Function", + "lineno": 72 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", + "type": "Function", + "lineno": 72 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", + "type": "Function", + "lineno": 72 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", + "type": "Function", + "lineno": 91 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", + "type": "Function", + "lineno": 91 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", + "type": "Function", + "lineno": 91 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", + "type": "Function", + "lineno": 91 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", + "type": "Function", + "lineno": 91 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", + "type": "Function", + "lineno": 91 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", + "type": "Function", + "lineno": 115 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", + "type": "Function", + "lineno": 115 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", + "type": "Function", + "lineno": 115 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", + "type": "Function", + "lineno": 134 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", + "type": "Function", + "lineno": 134 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", + "type": "Function", + "lineno": 134 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", + "type": "Function", + "lineno": 158 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", + "type": "Function", + "lineno": 158 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", + "type": "Function", + "lineno": 158 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", + "type": "Function", + "lineno": 158 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", + "type": "Function", + "lineno": 158 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", + "type": "Function", + "lineno": 158 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", + "type": "Function", + "lineno": 181 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", + "type": "Function", + "lineno": 181 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", + "type": "Function", + "lineno": 181 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", + "type": "Function", + "lineno": 181 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", + "type": "Function", + "lineno": 181 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", + "type": "Function", + "lineno": 181 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", + "type": "Function", + "lineno": 203 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", + "type": "Function", + "lineno": 203 + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", + "type": "Function", + "lineno": 203 + } + ] + } + ], + "tests": [ + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", + "lineno": 72, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", + "parametrize", + "pytestmark", + "meta-llama/Llama-3.3-70B-Instruct-Turbo-earth", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", + "case_id": "earth" + }, + "setup": { + "duration": 0.06113254197407514, + "outcome": "passed" + }, + "call": { + "duration": 1.0720349580515176, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00015966698992997408, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", + "lineno": 72, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", + "parametrize", + "pytestmark", + "meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", + "case_id": "saturn" + }, + "setup": { + "duration": 0.006908083101734519, + "outcome": "passed" + }, + "call": { + "duration": 0.5013210839824751, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0005375830223783851, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", + "lineno": 72, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Scout-17B-16E-Instruct-earth", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "case_id": "earth" + }, + "setup": { + "duration": 0.006910792086273432, + "outcome": "passed" + }, + "call": { + "duration": 0.5142245410243049, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0004069580463692546, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", + "lineno": 72, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "case_id": "saturn" + }, + "setup": { + "duration": 0.009730000048875809, + "outcome": "passed" + }, + "call": { + "duration": 0.40133179200347513, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0004558749496936798, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", + "lineno": 72, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "case_id": "earth" + }, + "setup": { + "duration": 0.008247417048551142, + "outcome": "passed" + }, + "call": { + "duration": 0.7914331250358373, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00020262505859136581, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", + "lineno": 72, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "case_id": "saturn" + }, + "setup": { + "duration": 0.00922900007572025, + "outcome": "passed" + }, + "call": { + "duration": 1.2742049579974264, + "outcome": "passed" + }, + "teardown": { + "duration": 0.000688415952026844, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", + "lineno": 91, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", + "parametrize", + "pytestmark", + "meta-llama/Llama-3.3-70B-Instruct-Turbo-earth", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", + "case_id": "earth" + }, + "setup": { + "duration": 0.006949124974198639, + "outcome": "passed" + }, + "call": { + "duration": 0.4681705000111833, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00017795804888010025, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", + "lineno": 91, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", + "parametrize", + "pytestmark", + "meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", + "case_id": "saturn" + }, + "setup": { + "duration": 0.008564374991692603, + "outcome": "passed" + }, + "call": { + "duration": 1.7430362500017509, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00015312491450458765, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", + "lineno": 91, + "outcome": "failed", + "keywords": [ + "test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Scout-17B-16E-Instruct-earth", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "case_id": "earth" + }, + "setup": { + "duration": 0.007404124946333468, + "outcome": "passed" + }, + "call": { + "duration": 0.515926624997519, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", + "lineno": 109, + "message": "IndexError: list index out of range" + }, + "traceback": [ + { + "path": "tests/verifications/openai_api/test_chat_completion.py", + "lineno": 109, + "message": "IndexError" + } + ], + "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'earth', 'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:109: IndexError" + }, + "teardown": { + "duration": 0.0002389999572187662, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", + "lineno": 91, + "outcome": "failed", + "keywords": [ + "test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "case_id": "saturn" + }, + "setup": { + "duration": 0.0071305419551208615, + "outcome": "passed" + }, + "call": { + "duration": 0.37054662499576807, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", + "lineno": 109, + "message": "IndexError: list index out of range" + }, + "traceback": [ + { + "path": "tests/verifications/openai_api/test_chat_completion.py", + "lineno": 109, + "message": "IndexError" + } + ], + "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'saturn', 'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:109: IndexError" + }, + "teardown": { + "duration": 0.0006014580139890313, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", + "lineno": 91, + "outcome": "failed", + "keywords": [ + "test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "case_id": "earth" + }, + "setup": { + "duration": 0.007489709067158401, + "outcome": "passed" + }, + "call": { + "duration": 0.7767745839664713, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", + "lineno": 109, + "message": "IndexError: list index out of range" + }, + "traceback": [ + { + "path": "tests/verifications/openai_api/test_chat_completion.py", + "lineno": 109, + "message": "IndexError" + } + ], + "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'earth', 'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:109: IndexError" + }, + "teardown": { + "duration": 0.00025491707492619753, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", + "lineno": 91, + "outcome": "failed", + "keywords": [ + "test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "case_id": "saturn" + }, + "setup": { + "duration": 0.006736499955877662, + "outcome": "passed" + }, + "call": { + "duration": 0.43948554201051593, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", + "lineno": 109, + "message": "IndexError: list index out of range" + }, + "traceback": [ + { + "path": "tests/verifications/openai_api/test_chat_completion.py", + "lineno": 109, + "message": "IndexError" + } + ], + "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'saturn', 'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:109: IndexError" + }, + "teardown": { + "duration": 0.0002264160430058837, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", + "lineno": 115, + "outcome": "skipped", + "keywords": [ + "test_chat_non_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", + "parametrize", + "pytestmark", + "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", + "case_id": "case0" + }, + "setup": { + "duration": 0.007171708042733371, + "outcome": "passed" + }, + "call": { + "duration": 0.00013554200995713472, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 124, 'Skipped: Skipping test_chat_non_streaming_image for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')" + }, + "teardown": { + "duration": 0.0001235839445143938, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", + "lineno": 115, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "case_id": "case0" + }, + "setup": { + "duration": 0.008639499894343317, + "outcome": "passed" + }, + "call": { + "duration": 1.4001279999502003, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00014812499284744263, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", + "lineno": 115, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "case_id": "case0" + }, + "setup": { + "duration": 0.015450250008143485, + "outcome": "passed" + }, + "call": { + "duration": 3.3522649579681456, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00041629199404269457, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", + "lineno": 134, + "outcome": "skipped", + "keywords": [ + "test_chat_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", + "parametrize", + "pytestmark", + "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", + "case_id": "case0" + }, + "setup": { + "duration": 0.007634000037796795, + "outcome": "passed" + }, + "call": { + "duration": 0.0001563339028507471, + "outcome": "skipped", + "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 143, 'Skipped: Skipping test_chat_streaming_image for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')" + }, + "teardown": { + "duration": 0.0001324999611824751, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", + "lineno": 134, + "outcome": "failed", + "keywords": [ + "test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "case_id": "case0" + }, + "setup": { + "duration": 0.007050334010273218, + "outcome": "passed" + }, + "call": { + "duration": 1.7063317500287667, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", + "lineno": 152, + "message": "IndexError: list index out of range" + }, + "traceback": [ + { + "path": "tests/verifications/openai_api/test_chat_completion.py", + "lineno": 152, + "message": "IndexError" + } + ], + "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_image(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:152: IndexError" + }, + "teardown": { + "duration": 0.0002109999768435955, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", + "lineno": 134, + "outcome": "failed", + "keywords": [ + "test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "case_id": "case0" + }, + "setup": { + "duration": 0.006729208980686963, + "outcome": "passed" + }, + "call": { + "duration": 3.829621708020568, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", + "lineno": 152, + "message": "IndexError: list index out of range" + }, + "traceback": [ + { + "path": "tests/verifications/openai_api/test_chat_completion.py", + "lineno": 152, + "message": "IndexError" + } + ], + "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_image(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:152: IndexError" + }, + "teardown": { + "duration": 0.0002882500411942601, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", + "lineno": 158, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", + "parametrize", + "pytestmark", + "meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", + "case_id": "calendar" + }, + "setup": { + "duration": 0.007713916013017297, + "outcome": "passed" + }, + "call": { + "duration": 2.48285808309447, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00020350003615021706, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", + "lineno": 158, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", + "parametrize", + "pytestmark", + "meta-llama/Llama-3.3-70B-Instruct-Turbo-math", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", + "case_id": "math" + }, + "setup": { + "duration": 0.010098082944750786, + "outcome": "passed" + }, + "call": { + "duration": 1.6994713749736547, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00014512497000396252, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", + "lineno": 158, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "case_id": "calendar" + }, + "setup": { + "duration": 0.006934792036190629, + "outcome": "passed" + }, + "call": { + "duration": 1.277176082949154, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0004985419800505042, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", + "lineno": 158, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Scout-17B-16E-Instruct-math", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "case_id": "math" + }, + "setup": { + "duration": 0.012558708898723125, + "outcome": "passed" + }, + "call": { + "duration": 2.442075416096486, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0003505420172587037, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", + "lineno": 158, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "case_id": "calendar" + }, + "setup": { + "duration": 0.012642999994568527, + "outcome": "passed" + }, + "call": { + "duration": 0.9305703329155222, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00016004196368157864, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", + "lineno": 158, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "case_id": "math" + }, + "setup": { + "duration": 0.008792415959760547, + "outcome": "passed" + }, + "call": { + "duration": 2.194098167004995, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0003667499404400587, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", + "lineno": 181, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", + "parametrize", + "pytestmark", + "meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", + "case_id": "calendar" + }, + "setup": { + "duration": 0.01219504198525101, + "outcome": "passed" + }, + "call": { + "duration": 2.045097667025402, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00029958400409668684, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", + "lineno": 181, + "outcome": "passed", + "keywords": [ + "test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", + "parametrize", + "pytestmark", + "meta-llama/Llama-3.3-70B-Instruct-Turbo-math", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", + "case_id": "math" + }, + "setup": { + "duration": 0.014203459024429321, + "outcome": "passed" + }, + "call": { + "duration": 1.3079068749211729, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0001914579188451171, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", + "lineno": 181, + "outcome": "failed", + "keywords": [ + "test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "case_id": "calendar" + }, + "setup": { + "duration": 0.04714570892974734, + "outcome": "passed" + }, + "call": { + "duration": 0.44743770791683346, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", + "lineno": 200, + "message": "IndexError: list index out of range" + }, + "traceback": [ + { + "path": "tests/verifications/openai_api/test_chat_completion.py", + "lineno": 200, + "message": "IndexError" + } + ], + "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'calendar', 'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'cont...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:200: IndexError" + }, + "teardown": { + "duration": 0.00022199994418770075, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", + "lineno": 181, + "outcome": "failed", + "keywords": [ + "test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Scout-17B-16E-Instruct-math", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "case_id": "math" + }, + "setup": { + "duration": 0.012237709015607834, + "outcome": "passed" + }, + "call": { + "duration": 3.180020791012794, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", + "lineno": 200, + "message": "IndexError: list index out of range" + }, + "traceback": [ + { + "path": "tests/verifications/openai_api/test_chat_completion.py", + "lineno": 200, + "message": "IndexError" + } + ], + "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'math', 'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solut... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:200: IndexError" + }, + "teardown": { + "duration": 0.000273333047516644, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", + "lineno": 181, + "outcome": "failed", + "keywords": [ + "test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "case_id": "calendar" + }, + "setup": { + "duration": 0.013312208000570536, + "outcome": "passed" + }, + "call": { + "duration": 0.4110311249969527, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", + "lineno": 200, + "message": "IndexError: list index out of range" + }, + "traceback": [ + { + "path": "tests/verifications/openai_api/test_chat_completion.py", + "lineno": 200, + "message": "IndexError" + } + ], + "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'calendar', 'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'cont...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:200: IndexError" + }, + "teardown": { + "duration": 0.00022975006140768528, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", + "lineno": 181, + "outcome": "failed", + "keywords": [ + "test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "case_id": "math" + }, + "setup": { + "duration": 0.006676917080767453, + "outcome": "passed" + }, + "call": { + "duration": 2.316411833046004, + "outcome": "failed", + "crash": { + "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", + "lineno": 200, + "message": "IndexError: list index out of range" + }, + "traceback": [ + { + "path": "tests/verifications/openai_api/test_chat_completion.py", + "lineno": 200, + "message": "IndexError" + } + ], + "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'math', 'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solut... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:200: IndexError" + }, + "teardown": { + "duration": 0.000245374976657331, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", + "lineno": 203, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", + "parametrize", + "pytestmark", + "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", + "case_id": "case0" + }, + "setup": { + "duration": 0.007064500008709729, + "outcome": "passed" + }, + "call": { + "duration": 0.606806542025879, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00046320806723088026, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", + "lineno": 203, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "case_id": "case0" + }, + "setup": { + "duration": 0.009071375010535121, + "outcome": "passed" + }, + "call": { + "duration": 0.41908070899080485, + "outcome": "passed" + }, + "teardown": { + "duration": 0.00026074994821101427, + "outcome": "passed" + } + }, + { + "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", + "lineno": 203, + "outcome": "passed", + "keywords": [ + "test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", + "parametrize", + "pytestmark", + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0", + "test_chat_completion.py", + "openai_api", + "verifications", + "tests", + "llama-stack", + "" + ], + "metadata": { + "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "case_id": "case0" + }, + "setup": { + "duration": 0.0068333749659359455, + "outcome": "passed" + }, + "call": { + "duration": 0.8904451669659466, + "outcome": "passed" + }, + "teardown": { + "duration": 0.0005833340110257268, + "outcome": "passed" + } + } + ] +} From de6ec5803e18e336c936c5d5f8d9d8a9302b14bf Mon Sep 17 00:00:00 2001 From: Francisco Arceo Date: Thu, 10 Apr 2025 11:37:31 -0600 Subject: [PATCH 11/11] fix: Fix linter failures from #1921 (#1932) # What does this PR do? fix: Fix linter failures from #1921 Signed-off-by: Francisco Javier Arceo --- tests/verifications/conf/cerebras.yaml | 2 +- tests/verifications/conf/fireworks.yaml | 2 +- tests/verifications/conf/groq.yaml | 2 +- tests/verifications/conf/openai.yaml | 2 +- tests/verifications/conf/together.yaml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/verifications/conf/cerebras.yaml b/tests/verifications/conf/cerebras.yaml index 32a60e766..5b19b4916 100644 --- a/tests/verifications/conf/cerebras.yaml +++ b/tests/verifications/conf/cerebras.yaml @@ -7,4 +7,4 @@ model_display_names: test_exclusions: llama-3.3-70b: - test_chat_non_streaming_image - - test_chat_streaming_image \ No newline at end of file + - test_chat_streaming_image diff --git a/tests/verifications/conf/fireworks.yaml b/tests/verifications/conf/fireworks.yaml index 30d6e4d75..f55b707ba 100644 --- a/tests/verifications/conf/fireworks.yaml +++ b/tests/verifications/conf/fireworks.yaml @@ -11,4 +11,4 @@ model_display_names: test_exclusions: accounts/fireworks/models/llama-v3p3-70b-instruct: - test_chat_non_streaming_image - - test_chat_streaming_image \ No newline at end of file + - test_chat_streaming_image diff --git a/tests/verifications/conf/groq.yaml b/tests/verifications/conf/groq.yaml index ef31a66e5..7871036dc 100644 --- a/tests/verifications/conf/groq.yaml +++ b/tests/verifications/conf/groq.yaml @@ -11,4 +11,4 @@ model_display_names: test_exclusions: llama-3.3-70b-versatile: - test_chat_non_streaming_image - - test_chat_streaming_image \ No newline at end of file + - test_chat_streaming_image diff --git a/tests/verifications/conf/openai.yaml b/tests/verifications/conf/openai.yaml index 89ae698f3..95a6259f7 100644 --- a/tests/verifications/conf/openai.yaml +++ b/tests/verifications/conf/openai.yaml @@ -6,4 +6,4 @@ models: model_display_names: gpt-4o: gpt-4o gpt-4o-mini: gpt-4o-mini -test_exclusions: {} \ No newline at end of file +test_exclusions: {} diff --git a/tests/verifications/conf/together.yaml b/tests/verifications/conf/together.yaml index 80e86fa77..258616662 100644 --- a/tests/verifications/conf/together.yaml +++ b/tests/verifications/conf/together.yaml @@ -11,4 +11,4 @@ model_display_names: test_exclusions: meta-llama/Llama-3.3-70B-Instruct-Turbo: - test_chat_non_streaming_image - - test_chat_streaming_image \ No newline at end of file + - test_chat_streaming_image