{ "created": 1744264304.064288, "duration": 42.470197916030884, "exitcode": 1, "root": "/Users/erichuang/projects/llama-stack", "environment": {}, "summary": { "passed": 21, "failed": 10, "skipped": 2, "total": 33, "collected": 33 }, "collectors": [ { "nodeid": "", "outcome": "passed", "result": [ { "nodeid": "tests/verifications/openai_api/test_chat_completion.py", "type": "Module" } ] }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py", "outcome": "passed", "result": [ { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", "type": "Function", "lineno": 72 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", "type": "Function", "lineno": 72 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", "type": "Function", "lineno": 72 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", "type": "Function", "lineno": 72 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", "type": "Function", "lineno": 72 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", "type": "Function", "lineno": 72 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", "type": "Function", "lineno": 91 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", "type": "Function", "lineno": 91 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", "type": "Function", "lineno": 91 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", "type": "Function", "lineno": 91 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", "type": "Function", "lineno": 91 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", "type": "Function", "lineno": 91 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "type": "Function", "lineno": 115 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "type": "Function", "lineno": 115 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "type": "Function", "lineno": 115 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "type": "Function", "lineno": 134 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "type": "Function", "lineno": 134 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "type": "Function", "lineno": 134 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", "type": "Function", "lineno": 158 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", "type": "Function", "lineno": 158 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", "type": "Function", "lineno": 158 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", "type": "Function", "lineno": 158 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", "type": "Function", "lineno": 158 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", "type": "Function", "lineno": 158 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", "type": "Function", "lineno": 181 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", "type": "Function", "lineno": 181 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", "type": "Function", "lineno": 181 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", "type": "Function", "lineno": 181 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", "type": "Function", "lineno": 181 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", "type": "Function", "lineno": 181 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "type": "Function", "lineno": 203 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "type": "Function", "lineno": 203 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "type": "Function", "lineno": 203 } ] } ], "tests": [ { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", "lineno": 72, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-earth", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "earth" }, "setup": { "duration": 0.06113254197407514, "outcome": "passed" }, "call": { "duration": 1.0720349580515176, "outcome": "passed" }, "teardown": { "duration": 0.00015966698992997408, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", "lineno": 72, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "saturn" }, "setup": { "duration": 0.006908083101734519, "outcome": "passed" }, "call": { "duration": 0.5013210839824751, "outcome": "passed" }, "teardown": { "duration": 0.0005375830223783851, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", "lineno": 72, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-earth", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "earth" }, "setup": { "duration": 0.006910792086273432, "outcome": "passed" }, "call": { "duration": 0.5142245410243049, "outcome": "passed" }, "teardown": { "duration": 0.0004069580463692546, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", "lineno": 72, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "saturn" }, "setup": { "duration": 0.009730000048875809, "outcome": "passed" }, "call": { "duration": 0.40133179200347513, "outcome": "passed" }, "teardown": { "duration": 0.0004558749496936798, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", "lineno": 72, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "earth" }, "setup": { "duration": 0.008247417048551142, "outcome": "passed" }, "call": { "duration": 0.7914331250358373, "outcome": "passed" }, "teardown": { "duration": 0.00020262505859136581, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", "lineno": 72, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "saturn" }, "setup": { "duration": 0.00922900007572025, "outcome": "passed" }, "call": { "duration": 1.2742049579974264, "outcome": "passed" }, "teardown": { "duration": 0.000688415952026844, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", "lineno": 91, "outcome": "passed", "keywords": [ "test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-earth", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "earth" }, "setup": { "duration": 0.006949124974198639, "outcome": "passed" }, "call": { "duration": 0.4681705000111833, "outcome": "passed" }, "teardown": { "duration": 0.00017795804888010025, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", "lineno": 91, "outcome": "passed", "keywords": [ "test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "saturn" }, "setup": { "duration": 0.008564374991692603, "outcome": "passed" }, "call": { "duration": 1.7430362500017509, "outcome": "passed" }, "teardown": { "duration": 0.00015312491450458765, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", "lineno": 91, "outcome": "failed", "keywords": [ "test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-earth", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "earth" }, "setup": { "duration": 0.007404124946333468, "outcome": "passed" }, "call": { "duration": 0.515926624997519, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 109, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 109, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'earth', 'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:109: IndexError" }, "teardown": { "duration": 0.0002389999572187662, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", "lineno": 91, "outcome": "failed", "keywords": [ "test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "saturn" }, "setup": { "duration": 0.0071305419551208615, "outcome": "passed" }, "call": { "duration": 0.37054662499576807, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 109, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 109, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'saturn', 'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:109: IndexError" }, "teardown": { "duration": 0.0006014580139890313, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", "lineno": 91, "outcome": "failed", "keywords": [ "test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "earth" }, "setup": { "duration": 0.007489709067158401, "outcome": "passed" }, "call": { "duration": 0.7767745839664713, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 109, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 109, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'earth', 'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:109: IndexError" }, "teardown": { "duration": 0.00025491707492619753, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", "lineno": 91, "outcome": "failed", "keywords": [ "test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "saturn" }, "setup": { "duration": 0.006736499955877662, "outcome": "passed" }, "call": { "duration": 0.43948554201051593, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 109, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 109, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'saturn', 'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:109: IndexError" }, "teardown": { "duration": 0.0002264160430058837, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "lineno": 115, "outcome": "skipped", "keywords": [ "test_chat_non_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "case0" }, "setup": { "duration": 0.007171708042733371, "outcome": "passed" }, "call": { "duration": 0.00013554200995713472, "outcome": "skipped", "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 124, 'Skipped: Skipping test_chat_non_streaming_image for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')" }, "teardown": { "duration": 0.0001235839445143938, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "lineno": 115, "outcome": "passed", "keywords": [ "test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "case0" }, "setup": { "duration": 0.008639499894343317, "outcome": "passed" }, "call": { "duration": 1.4001279999502003, "outcome": "passed" }, "teardown": { "duration": 0.00014812499284744263, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "lineno": 115, "outcome": "passed", "keywords": [ "test_chat_non_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "case0" }, "setup": { "duration": 0.015450250008143485, "outcome": "passed" }, "call": { "duration": 3.3522649579681456, "outcome": "passed" }, "teardown": { "duration": 0.00041629199404269457, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "lineno": 134, "outcome": "skipped", "keywords": [ "test_chat_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "case0" }, "setup": { "duration": 0.007634000037796795, "outcome": "passed" }, "call": { "duration": 0.0001563339028507471, "outcome": "skipped", "longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 143, 'Skipped: Skipping test_chat_streaming_image for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')" }, "teardown": { "duration": 0.0001324999611824751, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "lineno": 134, "outcome": "failed", "keywords": [ "test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "case0" }, "setup": { "duration": 0.007050334010273218, "outcome": "passed" }, "call": { "duration": 1.7063317500287667, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 152, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 152, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_image(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:152: IndexError" }, "teardown": { "duration": 0.0002109999768435955, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "lineno": 134, "outcome": "failed", "keywords": [ "test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "case0" }, "setup": { "duration": 0.006729208980686963, "outcome": "passed" }, "call": { "duration": 3.829621708020568, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 152, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 152, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_image(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:152: IndexError" }, "teardown": { "duration": 0.0002882500411942601, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", "lineno": 158, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "calendar" }, "setup": { "duration": 0.007713916013017297, "outcome": "passed" }, "call": { "duration": 2.48285808309447, "outcome": "passed" }, "teardown": { "duration": 0.00020350003615021706, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", "lineno": 158, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-math", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "math" }, "setup": { "duration": 0.010098082944750786, "outcome": "passed" }, "call": { "duration": 1.6994713749736547, "outcome": "passed" }, "teardown": { "duration": 0.00014512497000396252, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", "lineno": 158, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "calendar" }, "setup": { "duration": 0.006934792036190629, "outcome": "passed" }, "call": { "duration": 1.277176082949154, "outcome": "passed" }, "teardown": { "duration": 0.0004985419800505042, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", "lineno": 158, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-math", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "math" }, "setup": { "duration": 0.012558708898723125, "outcome": "passed" }, "call": { "duration": 2.442075416096486, "outcome": "passed" }, "teardown": { "duration": 0.0003505420172587037, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", "lineno": 158, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "calendar" }, "setup": { "duration": 0.012642999994568527, "outcome": "passed" }, "call": { "duration": 0.9305703329155222, "outcome": "passed" }, "teardown": { "duration": 0.00016004196368157864, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", "lineno": 158, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "math" }, "setup": { "duration": 0.008792415959760547, "outcome": "passed" }, "call": { "duration": 2.194098167004995, "outcome": "passed" }, "teardown": { "duration": 0.0003667499404400587, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", "lineno": 181, "outcome": "passed", "keywords": [ "test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "calendar" }, "setup": { "duration": 0.01219504198525101, "outcome": "passed" }, "call": { "duration": 2.045097667025402, "outcome": "passed" }, "teardown": { "duration": 0.00029958400409668684, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", "lineno": 181, "outcome": "passed", "keywords": [ "test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-math", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "math" }, "setup": { "duration": 0.014203459024429321, "outcome": "passed" }, "call": { "duration": 1.3079068749211729, "outcome": "passed" }, "teardown": { "duration": 0.0001914579188451171, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", "lineno": 181, "outcome": "failed", "keywords": [ "test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "calendar" }, "setup": { "duration": 0.04714570892974734, "outcome": "passed" }, "call": { "duration": 0.44743770791683346, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 200, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 200, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'calendar', 'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'cont...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:200: IndexError" }, "teardown": { "duration": 0.00022199994418770075, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", "lineno": 181, "outcome": "failed", "keywords": [ "test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-math", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "math" }, "setup": { "duration": 0.012237709015607834, "outcome": "passed" }, "call": { "duration": 3.180020791012794, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 200, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 200, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'math', 'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solut... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:200: IndexError" }, "teardown": { "duration": 0.000273333047516644, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", "lineno": 181, "outcome": "failed", "keywords": [ "test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "calendar" }, "setup": { "duration": 0.013312208000570536, "outcome": "passed" }, "call": { "duration": 0.4110311249969527, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 200, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 200, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'calendar', 'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'cont...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:200: IndexError" }, "teardown": { "duration": 0.00022975006140768528, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", "lineno": 181, "outcome": "failed", "keywords": [ "test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "math" }, "setup": { "duration": 0.006676917080767453, "outcome": "passed" }, "call": { "duration": 2.316411833046004, "outcome": "failed", "crash": { "path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 200, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 200, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'math', 'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solut... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:200: IndexError" }, "teardown": { "duration": 0.000245374976657331, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "lineno": 203, "outcome": "passed", "keywords": [ "test_chat_non_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "case0" }, "setup": { "duration": 0.007064500008709729, "outcome": "passed" }, "call": { "duration": 0.606806542025879, "outcome": "passed" }, "teardown": { "duration": 0.00046320806723088026, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "lineno": 203, "outcome": "passed", "keywords": [ "test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "case0" }, "setup": { "duration": 0.009071375010535121, "outcome": "passed" }, "call": { "duration": 0.41908070899080485, "outcome": "passed" }, "teardown": { "duration": 0.00026074994821101427, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "lineno": 203, "outcome": "passed", "keywords": [ "test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "case0" }, "setup": { "duration": 0.0068333749659359455, "outcome": "passed" }, "call": { "duration": 0.8904451669659466, "outcome": "passed" }, "teardown": { "duration": 0.0005833340110257268, "outcome": "passed" } } ] }