{ "created": 1744328847.853437, "duration": 49.9419469833374, "exitcode": 1, "root": "/Users/erichuang/projects/llama-stack", "environment": {}, "summary": { "passed": 22, "failed": 12, "skipped": 2, "total": 36, "collected": 36 }, "collectors": [ { "nodeid": "", "outcome": "passed", "result": [ { "nodeid": "tests/verifications/openai_api/test_chat_completion.py", "type": "Module" } ] }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py", "outcome": "passed", "result": [ { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", "type": "Function", "lineno": 73 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", "type": "Function", "lineno": 73 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", "type": "Function", "lineno": 73 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", "type": "Function", "lineno": 73 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", "type": "Function", "lineno": 73 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", "type": "Function", "lineno": 73 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", "type": "Function", "lineno": 92 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", "type": "Function", "lineno": 92 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", "type": "Function", "lineno": 92 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", "type": "Function", "lineno": 92 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", "type": "Function", "lineno": 92 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", "type": "Function", "lineno": 92 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "type": "Function", "lineno": 116 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "type": "Function", "lineno": 116 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "type": "Function", "lineno": 116 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "type": "Function", "lineno": 135 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "type": "Function", "lineno": 135 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "type": "Function", "lineno": 135 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", "type": "Function", "lineno": 159 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", "type": "Function", "lineno": 159 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", "type": "Function", "lineno": 159 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", "type": "Function", "lineno": 159 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", "type": "Function", "lineno": 159 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", "type": "Function", "lineno": 159 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", "type": "Function", "lineno": 182 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", "type": "Function", "lineno": 182 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", "type": "Function", "lineno": 182 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", "type": "Function", "lineno": 182 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", "type": "Function", "lineno": 182 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", "type": "Function", "lineno": 182 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "type": "Function", "lineno": 204 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "type": "Function", "lineno": 204 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "type": "Function", "lineno": 204 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "type": "Function", "lineno": 228 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "type": "Function", "lineno": 228 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "type": "Function", "lineno": 228 } ] } ], "tests": [ { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", "lineno": 73, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-earth", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "earth" }, "setup": { "duration": 0.15774220903404057, "outcome": "passed" }, "call": { "duration": 0.5396400419995189, "outcome": "passed" }, "teardown": { "duration": 0.0002977499971166253, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", "lineno": 73, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "saturn" }, "setup": { "duration": 0.015632833004929125, "outcome": "passed" }, "call": { "duration": 0.4675290420418605, "outcome": "passed" }, "teardown": { "duration": 0.00029129208996891975, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", "lineno": 73, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-earth", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "earth" }, "setup": { "duration": 0.01530187507160008, "outcome": "passed" }, "call": { "duration": 0.501894542016089, "outcome": "passed" }, "teardown": { "duration": 0.0002060839906334877, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", "lineno": 73, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "saturn" }, "setup": { "duration": 0.014841833035461605, "outcome": "passed" }, "call": { "duration": 0.4202229160582647, "outcome": "passed" }, "teardown": { "duration": 0.0005559159908443689, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", "lineno": 73, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "earth" }, "setup": { "duration": 0.008204624988138676, "outcome": "passed" }, "call": { "duration": 1.991508833016269, "outcome": "passed" }, "teardown": { "duration": 0.000539042055606842, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", "lineno": 73, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "saturn" }, "setup": { "duration": 0.022528667002916336, "outcome": "passed" }, "call": { "duration": 0.37111237505450845, "outcome": "passed" }, "teardown": { "duration": 0.0005334159359335899, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", "lineno": 92, "outcome": "passed", "keywords": [ "test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-earth", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "earth" }, "setup": { "duration": 0.00922920904122293, "outcome": "passed" }, "call": { "duration": 1.1684916669037193, "outcome": "passed" }, "teardown": { "duration": 0.0002740409690886736, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", "lineno": 92, "outcome": "passed", "keywords": [ "test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "saturn" }, "setup": { "duration": 0.010883333045057952, "outcome": "passed" }, "call": { "duration": 0.4275277080014348, "outcome": "passed" }, "teardown": { "duration": 0.00043112505227327347, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", "lineno": 92, "outcome": "failed", "keywords": [ "test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-earth", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "earth" }, "setup": { "duration": 0.012945958063937724, "outcome": "passed" }, "call": { "duration": 0.5551295839250088, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 110, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 110, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'earth', 'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:110: IndexError" }, "teardown": { "duration": 0.0002744169905781746, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", "lineno": 92, "outcome": "failed", "keywords": [ "test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "saturn" }, "setup": { "duration": 0.017372542060911655, "outcome": "passed" }, "call": { "duration": 0.3579877089941874, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 110, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 110, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'saturn', 'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:110: IndexError" }, "teardown": { "duration": 0.0005445419810712337, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", "lineno": 92, "outcome": "failed", "keywords": [ "test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "earth" }, "setup": { "duration": 0.014297832967713475, "outcome": "passed" }, "call": { "duration": 0.8067362919682637, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 110, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 110, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'earth', 'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:110: IndexError" }, "teardown": { "duration": 0.0003220830112695694, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", "lineno": 92, "outcome": "failed", "keywords": [ "test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "saturn" }, "setup": { "duration": 0.008816750021651387, "outcome": "passed" }, "call": { "duration": 0.5383605000097305, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 110, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 110, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'saturn', 'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:110: IndexError" }, "teardown": { "duration": 0.00018316600471735, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "lineno": 116, "outcome": "skipped", "keywords": [ "test_chat_non_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "case0" }, "setup": { "duration": 0.0074389580404385924, "outcome": "passed" }, "call": { "duration": 0.00014933396596461535, "outcome": "skipped", "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 125, 'Skipped: Skipping test_chat_non_streaming_image for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')" }, "teardown": { "duration": 0.00012462493032217026, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "lineno": 116, "outcome": "passed", "keywords": [ "test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "case0" }, "setup": { "duration": 0.013580625061877072, "outcome": "passed" }, "call": { "duration": 2.89831429196056, "outcome": "passed" }, "teardown": { "duration": 0.000491458922624588, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "lineno": 116, "outcome": "passed", "keywords": [ "test_chat_non_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "case0" }, "setup": { "duration": 0.008266666904091835, "outcome": "passed" }, "call": { "duration": 3.8873212080216035, "outcome": "passed" }, "teardown": { "duration": 0.00016850000247359276, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "lineno": 135, "outcome": "skipped", "keywords": [ "test_chat_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "case0" }, "setup": { "duration": 0.0080461660400033, "outcome": "passed" }, "call": { "duration": 0.00014758307952433825, "outcome": "skipped", "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 144, 'Skipped: Skipping test_chat_streaming_image for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')" }, "teardown": { "duration": 0.00012695800978690386, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "lineno": 135, "outcome": "failed", "keywords": [ "test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "case0" }, "setup": { "duration": 0.00845700001809746, "outcome": "passed" }, "call": { "duration": 1.6604419159702957, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 153, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 153, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_image(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:153: IndexError" }, "teardown": { "duration": 0.00033458403777331114, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "lineno": 135, "outcome": "failed", "keywords": [ "test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "case0" }, "setup": { "duration": 0.012580333976075053, "outcome": "passed" }, "call": { "duration": 4.728511792025529, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 153, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 153, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_image(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:153: IndexError" }, "teardown": { "duration": 0.00023266696371138096, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", "lineno": 159, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "calendar" }, "setup": { "duration": 0.011554082971997559, "outcome": "passed" }, "call": { "duration": 1.3857994999270886, "outcome": "passed" }, "teardown": { "duration": 0.0003951250109821558, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", "lineno": 159, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-math", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "math" }, "setup": { "duration": 0.007673708954825997, "outcome": "passed" }, "call": { "duration": 3.082161583006382, "outcome": "passed" }, "teardown": { "duration": 0.0002532500075176358, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", "lineno": 159, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "calendar" }, "setup": { "duration": 0.014791041961871088, "outcome": "passed" }, "call": { "duration": 0.6918012499809265, "outcome": "passed" }, "teardown": { "duration": 0.00027070799842476845, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", "lineno": 159, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-math", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "math" }, "setup": { "duration": 0.014746625092811882, "outcome": "passed" }, "call": { "duration": 3.5890139170223847, "outcome": "passed" }, "teardown": { "duration": 0.00030137505382299423, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", "lineno": 159, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "calendar" }, "setup": { "duration": 0.036798374960199, "outcome": "passed" }, "call": { "duration": 0.6914895409718156, "outcome": "passed" }, "teardown": { "duration": 0.00023716699797660112, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", "lineno": 159, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "math" }, "setup": { "duration": 0.05965254199691117, "outcome": "passed" }, "call": { "duration": 2.609581291093491, "outcome": "passed" }, "teardown": { "duration": 0.0002674580318853259, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", "lineno": 182, "outcome": "passed", "keywords": [ "test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "calendar" }, "setup": { "duration": 0.014533916022628546, "outcome": "passed" }, "call": { "duration": 0.6227063750848174, "outcome": "passed" }, "teardown": { "duration": 0.00019699998665601015, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", "lineno": 182, "outcome": "passed", "keywords": [ "test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-math", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "math" }, "setup": { "duration": 0.009818125050514936, "outcome": "passed" }, "call": { "duration": 5.144610875053331, "outcome": "passed" }, "teardown": { "duration": 0.00045220903120934963, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", "lineno": 182, "outcome": "failed", "keywords": [ "test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "calendar" }, "setup": { "duration": 0.012392290984280407, "outcome": "passed" }, "call": { "duration": 0.777625665999949, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 201, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 201, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'calendar', 'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'cont...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:201: IndexError" }, "teardown": { "duration": 0.000559916952624917, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", "lineno": 182, "outcome": "failed", "keywords": [ "test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-math", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "math" }, "setup": { "duration": 0.010390624986030161, "outcome": "passed" }, "call": { "duration": 2.680094916955568, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 201, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 201, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'math', 'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solut... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:201: IndexError" }, "teardown": { "duration": 0.00041987502481788397, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", "lineno": 182, "outcome": "failed", "keywords": [ "test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "calendar" }, "setup": { "duration": 0.01190529193263501, "outcome": "passed" }, "call": { "duration": 0.6690819580107927, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 201, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 201, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'calendar', 'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'cont...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:201: IndexError" }, "teardown": { "duration": 0.000247166957706213, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", "lineno": 182, "outcome": "failed", "keywords": [ "test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "math" }, "setup": { "duration": 0.009588208980858326, "outcome": "passed" }, "call": { "duration": 2.4867218340514228, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 201, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 201, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'math', 'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solut... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:201: IndexError" }, "teardown": { "duration": 0.00022487505339086056, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "lineno": 204, "outcome": "passed", "keywords": [ "test_chat_non_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "case0" }, "setup": { "duration": 0.008509417064487934, "outcome": "passed" }, "call": { "duration": 0.45511841599363834, "outcome": "passed" }, "teardown": { "duration": 0.00031033402774482965, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "lineno": 204, "outcome": "passed", "keywords": [ "test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "case0" }, "setup": { "duration": 0.01352791697718203, "outcome": "passed" }, "call": { "duration": 0.7166531670372933, "outcome": "passed" }, "teardown": { "duration": 0.00031470798421651125, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "lineno": 204, "outcome": "passed", "keywords": [ "test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "case0" }, "setup": { "duration": 0.01369225000962615, "outcome": "passed" }, "call": { "duration": 0.34134254103992134, "outcome": "passed" }, "teardown": { "duration": 0.0002922919811680913, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "lineno": 228, "outcome": "passed", "keywords": [ "test_chat_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "case0" }, "setup": { "duration": 0.025748749962076545, "outcome": "passed" }, "call": { "duration": 0.7462511250050738, "outcome": "passed" }, "teardown": { "duration": 0.00030449999030679464, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "lineno": 228, "outcome": "failed", "keywords": [ "test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "case0" }, "setup": { "duration": 0.015131957945413888, "outcome": "passed" }, "call": { "duration": 0.4556894999695942, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 251, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 251, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=True,\n )\n \n # Accumulate partial tool_calls here\n tool_calls_buffer = {}\n current_id = None\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:251: IndexError" }, "teardown": { "duration": 0.000539042055606842, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "lineno": 228, "outcome": "failed", "keywords": [ "test_chat_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "case0" }, "setup": { "duration": 0.016429082956165075, "outcome": "passed" }, "call": { "duration": 0.3677835420239717, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 251, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 251, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=True,\n )\n \n # Accumulate partial tool_calls here\n tool_calls_buffer = {}\n current_id = None\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:251: IndexError" }, "teardown": { "duration": 0.001610000035725534, "outcome": "passed" } } ], "run_timestamp": 1744328795 }