{ "created": 1744915672.332456, "duration": 157.25543904304504, "exitcode": 1, "root": "/home/erichuang/llama-stack", "environment": {}, "summary": { "passed": 41, "failed": 39, "skipped": 4, "total": 84, "collected": 84 }, "collectors": [ { "nodeid": "", "outcome": "passed", "result": [ { "nodeid": "tests/verifications/openai_api/test_chat_completion.py", "type": "Module" } ] }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py", "outcome": "passed", "result": [ { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", "type": "Function", "lineno": 95 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", "type": "Function", "lineno": 95 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", "type": "Function", "lineno": 95 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", "type": "Function", "lineno": 95 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", "type": "Function", "lineno": 95 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", "type": "Function", "lineno": 95 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", "type": "Function", "lineno": 114 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", "type": "Function", "lineno": 114 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", "type": "Function", "lineno": 114 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", "type": "Function", "lineno": 114 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", "type": "Function", "lineno": 114 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", "type": "Function", "lineno": 114 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "type": "Function", "lineno": 138 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "type": "Function", "lineno": 138 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "type": "Function", "lineno": 138 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "type": "Function", "lineno": 157 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "type": "Function", "lineno": 157 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "type": "Function", "lineno": 157 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", "type": "Function", "lineno": 181 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", "type": "Function", "lineno": 181 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", "type": "Function", "lineno": 181 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", "type": "Function", "lineno": 181 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", "type": "Function", "lineno": 181 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", "type": "Function", "lineno": 181 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", "type": "Function", "lineno": 204 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", "type": "Function", "lineno": 204 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", "type": "Function", "lineno": 204 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", "type": "Function", "lineno": 204 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", "type": "Function", "lineno": 204 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", "type": "Function", "lineno": 204 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "type": "Function", "lineno": 226 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "type": "Function", "lineno": 226 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "type": "Function", "lineno": 226 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "type": "Function", "lineno": 250 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "type": "Function", "lineno": 250 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "type": "Function", "lineno": 250 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "type": "Function", "lineno": 278 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "type": "Function", "lineno": 278 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "type": "Function", "lineno": 278 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "type": "Function", "lineno": 302 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "type": "Function", "lineno": 302 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "type": "Function", "lineno": 302 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "type": "Function", "lineno": 329 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "type": "Function", "lineno": 329 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "type": "Function", "lineno": 329 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "type": "Function", "lineno": 352 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "type": "Function", "lineno": 352 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "type": "Function", "lineno": 352 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]", "type": "Function", "lineno": 380 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]", "type": "Function", "lineno": 380 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool]", "type": "Function", "lineno": 380 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]", "type": "Function", "lineno": 380 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]", "type": "Function", "lineno": 380 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]", "type": "Function", "lineno": 380 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]", "type": "Function", "lineno": 380 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]", "type": "Function", "lineno": 380 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]", "type": "Function", "lineno": 380 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]", "type": "Function", "lineno": 380 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]", "type": "Function", "lineno": 380 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]", "type": "Function", "lineno": 380 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]", "type": "Function", "lineno": 380 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]", "type": "Function", "lineno": 380 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]", "type": "Function", "lineno": 380 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]", "type": "Function", "lineno": 471 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]", "type": "Function", "lineno": 471 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool]", "type": "Function", "lineno": 471 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]", "type": "Function", "lineno": 471 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]", "type": "Function", "lineno": 471 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]", "type": "Function", "lineno": 471 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]", "type": "Function", "lineno": 471 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]", "type": "Function", "lineno": 471 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]", "type": "Function", "lineno": 471 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]", "type": "Function", "lineno": 471 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]", "type": "Function", "lineno": 471 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]", "type": "Function", "lineno": 471 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]", "type": "Function", "lineno": 471 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]", "type": "Function", "lineno": 471 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]", "type": "Function", "lineno": 471 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=False]", "type": "Function", "lineno": 554 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=True]", "type": "Function", "lineno": 554 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=False]", "type": "Function", "lineno": 554 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=True]", "type": "Function", "lineno": 554 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=False]", "type": "Function", "lineno": 554 }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=True]", "type": "Function", "lineno": 554 } ] } ], "tests": [ { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", "lineno": 95, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-earth", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "earth" }, "setup": { "duration": 0.11770237609744072, "outcome": "passed" }, "call": { "duration": 0.6406435770913959, "outcome": "passed" }, "teardown": { "duration": 0.0002960069105029106, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", "lineno": 95, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "saturn" }, "setup": { "duration": 0.07460446748882532, "outcome": "passed" }, "call": { "duration": 0.5787241570651531, "outcome": "passed" }, "teardown": { "duration": 0.00026445742696523666, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", "lineno": 95, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-earth", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "earth" }, "setup": { "duration": 0.07483412884175777, "outcome": "passed" }, "call": { "duration": 0.8699872978031635, "outcome": "passed" }, "teardown": { "duration": 0.0002666134387254715, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", "lineno": 95, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "saturn" }, "setup": { "duration": 0.07169668562710285, "outcome": "passed" }, "call": { "duration": 0.5061587328091264, "outcome": "passed" }, "teardown": { "duration": 0.00028620287775993347, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", "lineno": 95, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "earth" }, "setup": { "duration": 0.0829654112458229, "outcome": "passed" }, "call": { "duration": 1.2450250089168549, "outcome": "passed" }, "teardown": { "duration": 0.00024125166237354279, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", "lineno": 95, "outcome": "passed", "keywords": [ "test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "saturn" }, "setup": { "duration": 0.07169047556817532, "outcome": "passed" }, "call": { "duration": 0.7659840155392885, "outcome": "passed" }, "teardown": { "duration": 0.00023942161351442337, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", "lineno": 114, "outcome": "passed", "keywords": [ "test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-earth", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "earth" }, "setup": { "duration": 0.0718865105882287, "outcome": "passed" }, "call": { "duration": 0.9115259740501642, "outcome": "passed" }, "teardown": { "duration": 0.0002334779128432274, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", "lineno": 114, "outcome": "passed", "keywords": [ "test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "saturn" }, "setup": { "duration": 0.07380938995629549, "outcome": "passed" }, "call": { "duration": 0.9997824784368277, "outcome": "passed" }, "teardown": { "duration": 0.00029965396970510483, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", "lineno": 114, "outcome": "failed", "keywords": [ "test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-earth", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "earth" }, "setup": { "duration": 0.07564573176205158, "outcome": "passed" }, "call": { "duration": 0.7452597729861736, "outcome": "failed", "crash": { "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 132, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 132, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'earth', 'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:132: IndexError" }, "teardown": { "duration": 0.0003521796315908432, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", "lineno": 114, "outcome": "failed", "keywords": [ "test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "saturn" }, "setup": { "duration": 0.07307655736804008, "outcome": "passed" }, "call": { "duration": 0.45107892248779535, "outcome": "failed", "crash": { "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 132, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 132, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'saturn', 'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:132: IndexError" }, "teardown": { "duration": 0.00031046755611896515, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", "lineno": 114, "outcome": "failed", "keywords": [ "test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "earth" }, "setup": { "duration": 0.07041068747639656, "outcome": "passed" }, "call": { "duration": 1.1565949888899922, "outcome": "failed", "crash": { "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 132, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 132, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'earth', 'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:132: IndexError" }, "teardown": { "duration": 0.0002977624535560608, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", "lineno": 114, "outcome": "failed", "keywords": [ "test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "saturn" }, "setup": { "duration": 0.07026446517556906, "outcome": "passed" }, "call": { "duration": 0.7347098160535097, "outcome": "failed", "crash": { "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 132, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 132, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'saturn', 'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:132: IndexError" }, "teardown": { "duration": 0.000298389233648777, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "lineno": 138, "outcome": "skipped", "keywords": [ "test_chat_non_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "case0" }, "setup": { "duration": 0.07295764330774546, "outcome": "passed" }, "call": { "duration": 0.0002657398581504822, "outcome": "skipped", "longrepr": "('/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 147, 'Skipped: Skipping test_chat_non_streaming_image for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')" }, "teardown": { "duration": 0.00035269279032945633, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "lineno": 138, "outcome": "passed", "keywords": [ "test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "case0" }, "setup": { "duration": 0.07182978931814432, "outcome": "passed" }, "call": { "duration": 1.746746251359582, "outcome": "passed" }, "teardown": { "duration": 0.00026807747781276703, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "lineno": 138, "outcome": "passed", "keywords": [ "test_chat_non_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "case0" }, "setup": { "duration": 0.07101599592715502, "outcome": "passed" }, "call": { "duration": 5.472218153998256, "outcome": "passed" }, "teardown": { "duration": 0.00029551703482866287, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "lineno": 157, "outcome": "skipped", "keywords": [ "test_chat_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "case0" }, "setup": { "duration": 0.07391759566962719, "outcome": "passed" }, "call": { "duration": 0.00026184599846601486, "outcome": "skipped", "longrepr": "('/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 166, 'Skipped: Skipping test_chat_streaming_image for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')" }, "teardown": { "duration": 0.0002144472673535347, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "lineno": 157, "outcome": "failed", "keywords": [ "test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "case0" }, "setup": { "duration": 0.08702181186527014, "outcome": "passed" }, "call": { "duration": 2.685878618620336, "outcome": "failed", "crash": { "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 175, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 175, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_image(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:175: IndexError" }, "teardown": { "duration": 0.00031331367790699005, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "lineno": 157, "outcome": "failed", "keywords": [ "test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "case0" }, "setup": { "duration": 0.07287734653800726, "outcome": "passed" }, "call": { "duration": 3.985588035546243, "outcome": "failed", "crash": { "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 175, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 175, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_image(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:175: IndexError" }, "teardown": { "duration": 0.0002881418913602829, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", "lineno": 181, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "calendar" }, "setup": { "duration": 0.07055713329464197, "outcome": "passed" }, "call": { "duration": 0.7881239131093025, "outcome": "passed" }, "teardown": { "duration": 0.00024167727679014206, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", "lineno": 181, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-math", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "math" }, "setup": { "duration": 0.07159801851958036, "outcome": "passed" }, "call": { "duration": 4.972125994041562, "outcome": "passed" }, "teardown": { "duration": 0.0002335989847779274, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", "lineno": 181, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "calendar" }, "setup": { "duration": 0.07872365694493055, "outcome": "passed" }, "call": { "duration": 0.5325954724103212, "outcome": "passed" }, "teardown": { "duration": 0.0002604750916361809, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", "lineno": 181, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-math", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "math" }, "setup": { "duration": 0.07101414445787668, "outcome": "passed" }, "call": { "duration": 2.5978550128638744, "outcome": "passed" }, "teardown": { "duration": 0.00032774079591035843, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", "lineno": 181, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "calendar" }, "setup": { "duration": 0.0737495282664895, "outcome": "passed" }, "call": { "duration": 0.7547545190900564, "outcome": "passed" }, "teardown": { "duration": 0.00024818163365125656, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", "lineno": 181, "outcome": "passed", "keywords": [ "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "math" }, "setup": { "duration": 0.07616753969341516, "outcome": "passed" }, "call": { "duration": 4.4260268323123455, "outcome": "passed" }, "teardown": { "duration": 0.00023849774152040482, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", "lineno": 204, "outcome": "passed", "keywords": [ "test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "calendar" }, "setup": { "duration": 0.07280991226434708, "outcome": "passed" }, "call": { "duration": 1.2391796316951513, "outcome": "passed" }, "teardown": { "duration": 0.00022371485829353333, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", "lineno": 204, "outcome": "passed", "keywords": [ "test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-math", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "math" }, "setup": { "duration": 0.07361716963350773, "outcome": "passed" }, "call": { "duration": 6.5637129517272115, "outcome": "passed" }, "teardown": { "duration": 0.00024466682225465775, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", "lineno": 204, "outcome": "failed", "keywords": [ "test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "calendar" }, "setup": { "duration": 0.07750869635492563, "outcome": "passed" }, "call": { "duration": 0.6057027634233236, "outcome": "failed", "crash": { "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 223, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 223, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'calendar', 'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'cont...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:223: IndexError" }, "teardown": { "duration": 0.00031183846294879913, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", "lineno": 204, "outcome": "failed", "keywords": [ "test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-math", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "math" }, "setup": { "duration": 0.0722011923789978, "outcome": "passed" }, "call": { "duration": 3.635944495908916, "outcome": "failed", "crash": { "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 223, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 223, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'math', 'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solut... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:223: IndexError" }, "teardown": { "duration": 0.00029693637043237686, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", "lineno": 204, "outcome": "failed", "keywords": [ "test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "calendar" }, "setup": { "duration": 0.07182575855404139, "outcome": "passed" }, "call": { "duration": 0.583567344583571, "outcome": "failed", "crash": { "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 223, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 223, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'calendar', 'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'cont...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:223: IndexError" }, "teardown": { "duration": 0.0002760365605354309, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", "lineno": 204, "outcome": "failed", "keywords": [ "test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "math" }, "setup": { "duration": 0.07146896701306105, "outcome": "passed" }, "call": { "duration": 3.9762256713584065, "outcome": "failed", "crash": { "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 223, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 223, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'math', 'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solut... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:223: IndexError" }, "teardown": { "duration": 0.00030822213739156723, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "lineno": 226, "outcome": "passed", "keywords": [ "test_chat_non_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "case0" }, "setup": { "duration": 0.07303964532911777, "outcome": "passed" }, "call": { "duration": 0.7525951210409403, "outcome": "passed" }, "teardown": { "duration": 0.0002357764169573784, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "lineno": 226, "outcome": "passed", "keywords": [ "test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "case0" }, "setup": { "duration": 0.07376459613442421, "outcome": "passed" }, "call": { "duration": 0.4931257301941514, "outcome": "passed" }, "teardown": { "duration": 0.0002181418240070343, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "lineno": 226, "outcome": "passed", "keywords": [ "test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "case0" }, "setup": { "duration": 0.07053922209888697, "outcome": "passed" }, "call": { "duration": 0.5402723103761673, "outcome": "passed" }, "teardown": { "duration": 0.00022673048079013824, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "lineno": 250, "outcome": "passed", "keywords": [ "test_chat_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "case0" }, "setup": { "duration": 0.07116104569286108, "outcome": "passed" }, "call": { "duration": 0.816182347945869, "outcome": "passed" }, "teardown": { "duration": 0.0003773067146539688, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "lineno": 250, "outcome": "failed", "keywords": [ "test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "case0" }, "setup": { "duration": 0.07258457317948341, "outcome": "passed" }, "call": { "duration": 0.4603788387030363, "outcome": "failed", "crash": { "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 688, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 268, "message": "" }, { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 688, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=True,\n )\n \n> _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:268: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = \n\n def _accumulate_streaming_tool_calls(stream):\n \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n tool_calls_buffer = {}\n current_id = None\n full_content = \"\" # Initialize content accumulator\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError" }, "teardown": { "duration": 0.0003149937838315964, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "lineno": 250, "outcome": "failed", "keywords": [ "test_chat_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "case0" }, "setup": { "duration": 0.07071060407906771, "outcome": "passed" }, "call": { "duration": 0.5184564171358943, "outcome": "failed", "crash": { "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 688, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 268, "message": "" }, { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 688, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=True,\n )\n \n> _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:268: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = \n\n def _accumulate_streaming_tool_calls(stream):\n \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n tool_calls_buffer = {}\n current_id = None\n full_content = \"\" # Initialize content accumulator\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError" }, "teardown": { "duration": 0.00034826435148715973, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "lineno": 278, "outcome": "passed", "keywords": [ "test_chat_non_streaming_tool_choice_required[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "case0" }, "setup": { "duration": 0.08638827316462994, "outcome": "passed" }, "call": { "duration": 0.9104743050411344, "outcome": "passed" }, "teardown": { "duration": 0.0002754591405391693, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "lineno": 278, "outcome": "passed", "keywords": [ "test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "case0" }, "setup": { "duration": 0.0742760868743062, "outcome": "passed" }, "call": { "duration": 0.462676758877933, "outcome": "passed" }, "teardown": { "duration": 0.00025860220193862915, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "lineno": 278, "outcome": "passed", "keywords": [ "test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "case0" }, "setup": { "duration": 0.07174691930413246, "outcome": "passed" }, "call": { "duration": 0.9501504441723228, "outcome": "passed" }, "teardown": { "duration": 0.0002819998189806938, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "lineno": 302, "outcome": "passed", "keywords": [ "test_chat_streaming_tool_choice_required[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "case0" }, "setup": { "duration": 0.0707088652998209, "outcome": "passed" }, "call": { "duration": 1.5536296227946877, "outcome": "passed" }, "teardown": { "duration": 0.0002662409096956253, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "lineno": 302, "outcome": "failed", "keywords": [ "test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "case0" }, "setup": { "duration": 0.07114748656749725, "outcome": "passed" }, "call": { "duration": 0.7880472335964441, "outcome": "failed", "crash": { "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 688, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 321, "message": "" }, { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 688, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"], # Reusing existing case for now\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n tool_choice=\"required\", # Force tool call\n stream=True,\n )\n \n> _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:321: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = \n\n def _accumulate_streaming_tool_calls(stream):\n \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n tool_calls_buffer = {}\n current_id = None\n full_content = \"\" # Initialize content accumulator\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError" }, "teardown": { "duration": 0.00038287416100502014, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "lineno": 302, "outcome": "failed", "keywords": [ "test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "case0" }, "setup": { "duration": 0.0964375538751483, "outcome": "passed" }, "call": { "duration": 0.5333329876884818, "outcome": "failed", "crash": { "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 688, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 321, "message": "" }, { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 688, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"], # Reusing existing case for now\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n tool_choice=\"required\", # Force tool call\n stream=True,\n )\n \n> _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:321: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = \n\n def _accumulate_streaming_tool_calls(stream):\n \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n tool_calls_buffer = {}\n current_id = None\n full_content = \"\" # Initialize content accumulator\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError" }, "teardown": { "duration": 0.0003780834376811981, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "lineno": 329, "outcome": "failed", "keywords": [ "test_chat_non_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "case0" }, "setup": { "duration": 0.0716709028929472, "outcome": "passed" }, "call": { "duration": 2.4488353775814176, "outcome": "failed", "crash": { "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 349, "message": "AssertionError: Expected no tool calls when tool_choice='none'\nassert [ChatCompletionMessageToolCall(id='call_efyus3lwok2l7czire0yxkqm', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)] is None\n + where [ChatCompletionMessageToolCall(id='call_efyus3lwok2l7czire0yxkqm', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_efyus3lwok2l7czire0yxkqm', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]).tool_calls\n + where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_efyus3lwok2l7czire0yxkqm', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_efyus3lwok2l7czire0yxkqm', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]), seed=10291057599279921000).message" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 349, "message": "AssertionError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"], # Reusing existing case for now\n ids=case_id_generator,\n )\n def test_chat_non_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n tool_choice=\"none\",\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert response.choices[0].message.tool_calls is None, \"Expected no tool calls when tool_choice='none'\"\nE AssertionError: Expected no tool calls when tool_choice='none'\nE assert [ChatCompletionMessageToolCall(id='call_efyus3lwok2l7czire0yxkqm', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)] is None\nE + where [ChatCompletionMessageToolCall(id='call_efyus3lwok2l7czire0yxkqm', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_efyus3lwok2l7czire0yxkqm', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]).tool_calls\nE + where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_efyus3lwok2l7czire0yxkqm', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_efyus3lwok2l7czire0yxkqm', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]), seed=10291057599279921000).message\n\ntests/verifications/openai_api/test_chat_completion.py:349: AssertionError" }, "teardown": { "duration": 0.00031497515738010406, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "lineno": 329, "outcome": "failed", "keywords": [ "test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "case0" }, "setup": { "duration": 0.07444119732826948, "outcome": "passed" }, "call": { "duration": 0.6588975815102458, "outcome": "failed", "crash": { "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 349, "message": "AssertionError: Expected no tool calls when tool_choice='none'\nassert [ChatCompletionMessageToolCall(id='call_biqe1tvadyupbqpgarqi91rn', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)] is None\n + where [ChatCompletionMessageToolCall(id='call_biqe1tvadyupbqpgarqi91rn', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_biqe1tvadyupbqpgarqi91rn', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]).tool_calls\n + where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_biqe1tvadyupbqpgarqi91rn', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_biqe1tvadyupbqpgarqi91rn', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]), seed=None).message" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 349, "message": "AssertionError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"], # Reusing existing case for now\n ids=case_id_generator,\n )\n def test_chat_non_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n tool_choice=\"none\",\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert response.choices[0].message.tool_calls is None, \"Expected no tool calls when tool_choice='none'\"\nE AssertionError: Expected no tool calls when tool_choice='none'\nE assert [ChatCompletionMessageToolCall(id='call_biqe1tvadyupbqpgarqi91rn', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)] is None\nE + where [ChatCompletionMessageToolCall(id='call_biqe1tvadyupbqpgarqi91rn', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_biqe1tvadyupbqpgarqi91rn', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]).tool_calls\nE + where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_biqe1tvadyupbqpgarqi91rn', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_biqe1tvadyupbqpgarqi91rn', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]), seed=None).message\n\ntests/verifications/openai_api/test_chat_completion.py:349: AssertionError" }, "teardown": { "duration": 0.000378509983420372, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "lineno": 329, "outcome": "failed", "keywords": [ "test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "case0" }, "setup": { "duration": 0.07218985445797443, "outcome": "passed" }, "call": { "duration": 0.46723131835460663, "outcome": "failed", "crash": { "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 349, "message": "AssertionError: Expected no tool calls when tool_choice='none'\nassert [ChatCompletionMessageToolCall(id='call_ie8s7wjdypmhv2zzqeqkrif5', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] is None\n + where [ChatCompletionMessageToolCall(id='call_ie8s7wjdypmhv2zzqeqkrif5', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_ie8s7wjdypmhv2zzqeqkrif5', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]).tool_calls\n + where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_ie8s7wjdypmhv2zzqeqkrif5', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_ie8s7wjdypmhv2zzqeqkrif5', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]), seed=None).message" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 349, "message": "AssertionError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"], # Reusing existing case for now\n ids=case_id_generator,\n )\n def test_chat_non_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n tool_choice=\"none\",\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert response.choices[0].message.tool_calls is None, \"Expected no tool calls when tool_choice='none'\"\nE AssertionError: Expected no tool calls when tool_choice='none'\nE assert [ChatCompletionMessageToolCall(id='call_ie8s7wjdypmhv2zzqeqkrif5', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] is None\nE + where [ChatCompletionMessageToolCall(id='call_ie8s7wjdypmhv2zzqeqkrif5', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_ie8s7wjdypmhv2zzqeqkrif5', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]).tool_calls\nE + where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_ie8s7wjdypmhv2zzqeqkrif5', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_ie8s7wjdypmhv2zzqeqkrif5', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]), seed=None).message\n\ntests/verifications/openai_api/test_chat_completion.py:349: AssertionError" }, "teardown": { "duration": 0.00030298903584480286, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "lineno": 352, "outcome": "failed", "keywords": [ "test_chat_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "case0" }, "setup": { "duration": 0.07074183039367199, "outcome": "passed" }, "call": { "duration": 0.5427122255787253, "outcome": "failed", "crash": { "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 376, "message": "AssertionError: Expected no tool call chunks when tool_choice='none'\nassert not [ChoiceDeltaToolCall(index=0, id='call_b3357o23munvhfy8cqg7wwwd', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\n + where [ChoiceDeltaToolCall(index=0, id='call_b3357o23munvhfy8cqg7wwwd', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_b3357o23munvhfy8cqg7wwwd', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 376, "message": "AssertionError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"], # Reusing existing case for now\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n tool_choice=\"none\",\n stream=True,\n )\n \n content = \"\"\n for chunk in stream:\n delta = chunk.choices[0].delta\n if delta.content:\n content += delta.content\n> assert not delta.tool_calls, \"Expected no tool call chunks when tool_choice='none'\"\nE AssertionError: Expected no tool call chunks when tool_choice='none'\nE assert not [ChoiceDeltaToolCall(index=0, id='call_b3357o23munvhfy8cqg7wwwd', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\nE + where [ChoiceDeltaToolCall(index=0, id='call_b3357o23munvhfy8cqg7wwwd', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_b3357o23munvhfy8cqg7wwwd', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:376: AssertionError" }, "teardown": { "duration": 0.00037453509867191315, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "lineno": 352, "outcome": "failed", "keywords": [ "test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "case0" }, "setup": { "duration": 0.07261296082288027, "outcome": "passed" }, "call": { "duration": 0.44528466928750277, "outcome": "failed", "crash": { "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 376, "message": "AssertionError: Expected no tool call chunks when tool_choice='none'\nassert not [ChoiceDeltaToolCall(index=0, id='call_00xpv69hsw7zrbkwnbq9ewxw', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\n + where [ChoiceDeltaToolCall(index=0, id='call_00xpv69hsw7zrbkwnbq9ewxw', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_00xpv69hsw7zrbkwnbq9ewxw', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 376, "message": "AssertionError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"], # Reusing existing case for now\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n tool_choice=\"none\",\n stream=True,\n )\n \n content = \"\"\n for chunk in stream:\n delta = chunk.choices[0].delta\n if delta.content:\n content += delta.content\n> assert not delta.tool_calls, \"Expected no tool call chunks when tool_choice='none'\"\nE AssertionError: Expected no tool call chunks when tool_choice='none'\nE assert not [ChoiceDeltaToolCall(index=0, id='call_00xpv69hsw7zrbkwnbq9ewxw', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\nE + where [ChoiceDeltaToolCall(index=0, id='call_00xpv69hsw7zrbkwnbq9ewxw', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_00xpv69hsw7zrbkwnbq9ewxw', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:376: AssertionError" }, "teardown": { "duration": 0.00035339873284101486, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "lineno": 352, "outcome": "failed", "keywords": [ "test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "case0" }, "setup": { "duration": 0.0712411506101489, "outcome": "passed" }, "call": { "duration": 0.792656259611249, "outcome": "failed", "crash": { "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 376, "message": "AssertionError: Expected no tool call chunks when tool_choice='none'\nassert not [ChoiceDeltaToolCall(index=0, id='call_kiye5b1nrq2b9yf0vy5feki8', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\n + where [ChoiceDeltaToolCall(index=0, id='call_kiye5b1nrq2b9yf0vy5feki8', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_kiye5b1nrq2b9yf0vy5feki8', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 376, "message": "AssertionError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"], # Reusing existing case for now\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n tool_choice=\"none\",\n stream=True,\n )\n \n content = \"\"\n for chunk in stream:\n delta = chunk.choices[0].delta\n if delta.content:\n content += delta.content\n> assert not delta.tool_calls, \"Expected no tool call chunks when tool_choice='none'\"\nE AssertionError: Expected no tool call chunks when tool_choice='none'\nE assert not [ChoiceDeltaToolCall(index=0, id='call_kiye5b1nrq2b9yf0vy5feki8', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\nE + where [ChoiceDeltaToolCall(index=0, id='call_kiye5b1nrq2b9yf0vy5feki8', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_kiye5b1nrq2b9yf0vy5feki8', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:376: AssertionError" }, "teardown": { "duration": 0.0003104889765381813, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]", "lineno": 380, "outcome": "failed", "keywords": [ "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "text_then_weather_tool" }, "setup": { "duration": 0.07114225905388594, "outcome": "passed" }, "call": { "duration": 0.9180215634405613, "outcome": "failed", "crash": { "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 439, "message": "AssertionError: Expected 0 tool calls, but got 1\nassert 1 == 0\n + where 1 = len(([ChatCompletionMessageToolCall(id='call_2qsbu2opq85hsx2fyb643xix', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)]))\n + where [ChatCompletionMessageToolCall(id='call_2qsbu2opq85hsx2fyb643xix', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_2qsbu2opq85hsx2fyb643xix', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)]).tool_calls" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 439, "message": "AssertionError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n> assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\nE AssertionError: Expected 0 tool calls, but got 1\nE assert 1 == 0\nE + where 1 = len(([ChatCompletionMessageToolCall(id='call_2qsbu2opq85hsx2fyb643xix', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)]))\nE + where [ChatCompletionMessageToolCall(id='call_2qsbu2opq85hsx2fyb643xix', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_2qsbu2opq85hsx2fyb643xix', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)]).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError" }, "teardown": { "duration": 0.00033766962587833405, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]", "lineno": 380, "outcome": "passed", "keywords": [ "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "weather_tool_then_text" }, "setup": { "duration": 0.07061670627444983, "outcome": "passed" }, "call": { "duration": 1.9790025427937508, "outcome": "passed" }, "teardown": { "duration": 0.0002498161047697067, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool]", "lineno": 380, "outcome": "passed", "keywords": [ "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "add_product_tool" }, "setup": { "duration": 0.07382979057729244, "outcome": "passed" }, "call": { "duration": 0.8887521298602223, "outcome": "passed" }, "teardown": { "duration": 0.000309688039124012, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]", "lineno": 380, "outcome": "passed", "keywords": [ "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "get_then_create_event_tool" }, "setup": { "duration": 0.07273934967815876, "outcome": "passed" }, "call": { "duration": 4.823556405492127, "outcome": "passed" }, "teardown": { "duration": 0.00023336336016654968, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]", "lineno": 380, "outcome": "passed", "keywords": [ "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "compare_monthly_expense_tool" }, "setup": { "duration": 0.07417754456400871, "outcome": "passed" }, "call": { "duration": 15.95331169757992, "outcome": "passed" }, "teardown": { "duration": 0.00023916177451610565, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]", "lineno": 380, "outcome": "failed", "keywords": [ "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "text_then_weather_tool" }, "setup": { "duration": 0.07702007982879877, "outcome": "passed" }, "call": { "duration": 0.735025598667562, "outcome": "failed", "crash": { "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 467, "message": "AssertionError: Expected one of ['sol'] in content, but got: 'I cannot perform this task as it requires additional functionality that is not available in the given functions.'\nassert False\n + where False = any(. at 0x7f3127d3f610>)" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 467, "message": "AssertionError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\n \n if num_tool_calls > 0:\n tool_call = assistant_message.tool_calls[0]\n assert tool_call.function.name == expected[\"tool_name\"], (\n f\"Expected tool '{expected['tool_name']}', got '{tool_call.function.name}'\"\n )\n # Parse the JSON string arguments before comparing\n actual_arguments = json.loads(tool_call.function.arguments)\n assert actual_arguments == expected[\"tool_arguments\"], (\n f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n )\n \n # Prepare and append the tool response for the next turn\n tool_response = tool_responses.pop(0)\n messages.append(\n {\n \"role\": \"tool\",\n \"tool_call_id\": tool_call.id,\n \"content\": tool_response[\"response\"],\n }\n )\n else:\n assert assistant_message.content is not None, \"Expected content, but none received.\"\n expected_answers = expected[\"answer\"] # This is now a list\n content_lower = assistant_message.content.lower()\n> assert any(ans.lower() in content_lower for ans in expected_answers), (\n f\"Expected one of {expected_answers} in content, but got: '{assistant_message.content}'\"\n )\nE AssertionError: Expected one of ['sol'] in content, but got: 'I cannot perform this task as it requires additional functionality that is not available in the given functions.'\nE assert False\nE + where False = any(. at 0x7f3127d3f610>)\n\ntests/verifications/openai_api/test_chat_completion.py:467: AssertionError" }, "teardown": { "duration": 0.00030991993844509125, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]", "lineno": 380, "outcome": "passed", "keywords": [ "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "weather_tool_then_text" }, "setup": { "duration": 0.07215368840843439, "outcome": "passed" }, "call": { "duration": 2.01631036773324, "outcome": "passed" }, "teardown": { "duration": 0.0002633333206176758, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]", "lineno": 380, "outcome": "passed", "keywords": [ "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "add_product_tool" }, "setup": { "duration": 0.0734679875895381, "outcome": "passed" }, "call": { "duration": 1.122296486981213, "outcome": "passed" }, "teardown": { "duration": 0.0003703571856021881, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]", "lineno": 380, "outcome": "passed", "keywords": [ "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "get_then_create_event_tool" }, "setup": { "duration": 0.07337972242385149, "outcome": "passed" }, "call": { "duration": 5.233728421851993, "outcome": "passed" }, "teardown": { "duration": 0.00024013593792915344, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]", "lineno": 380, "outcome": "passed", "keywords": [ "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "compare_monthly_expense_tool" }, "setup": { "duration": 0.07165702432394028, "outcome": "passed" }, "call": { "duration": 2.579101173207164, "outcome": "passed" }, "teardown": { "duration": 0.00022159796208143234, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]", "lineno": 380, "outcome": "failed", "keywords": [ "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "text_then_weather_tool" }, "setup": { "duration": 0.07499713078141212, "outcome": "passed" }, "call": { "duration": 0.345451476983726, "outcome": "failed", "crash": { "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 467, "message": "AssertionError: Expected one of ['sol'] in content, but got: '{\"name\": null, \"parameters\": null}'\nassert False\n + where False = any(. at 0x7f3127fc2ab0>)" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 467, "message": "AssertionError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\n \n if num_tool_calls > 0:\n tool_call = assistant_message.tool_calls[0]\n assert tool_call.function.name == expected[\"tool_name\"], (\n f\"Expected tool '{expected['tool_name']}', got '{tool_call.function.name}'\"\n )\n # Parse the JSON string arguments before comparing\n actual_arguments = json.loads(tool_call.function.arguments)\n assert actual_arguments == expected[\"tool_arguments\"], (\n f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n )\n \n # Prepare and append the tool response for the next turn\n tool_response = tool_responses.pop(0)\n messages.append(\n {\n \"role\": \"tool\",\n \"tool_call_id\": tool_call.id,\n \"content\": tool_response[\"response\"],\n }\n )\n else:\n assert assistant_message.content is not None, \"Expected content, but none received.\"\n expected_answers = expected[\"answer\"] # This is now a list\n content_lower = assistant_message.content.lower()\n> assert any(ans.lower() in content_lower for ans in expected_answers), (\n f\"Expected one of {expected_answers} in content, but got: '{assistant_message.content}'\"\n )\nE AssertionError: Expected one of ['sol'] in content, but got: '{\"name\": null, \"parameters\": null}'\nE assert False\nE + where False = any(. at 0x7f3127fc2ab0>)\n\ntests/verifications/openai_api/test_chat_completion.py:467: AssertionError" }, "teardown": { "duration": 0.0003060000017285347, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]", "lineno": 380, "outcome": "passed", "keywords": [ "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "weather_tool_then_text" }, "setup": { "duration": 0.07219678908586502, "outcome": "passed" }, "call": { "duration": 2.1842003548517823, "outcome": "passed" }, "teardown": { "duration": 0.00023157335817813873, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]", "lineno": 380, "outcome": "passed", "keywords": [ "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "add_product_tool" }, "setup": { "duration": 0.07263681385666132, "outcome": "passed" }, "call": { "duration": 2.064652710221708, "outcome": "passed" }, "teardown": { "duration": 0.0002187909558415413, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]", "lineno": 380, "outcome": "passed", "keywords": [ "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "get_then_create_event_tool" }, "setup": { "duration": 0.07442002464085817, "outcome": "passed" }, "call": { "duration": 4.3001746302470565, "outcome": "passed" }, "teardown": { "duration": 0.0002673650160431862, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]", "lineno": 380, "outcome": "passed", "keywords": [ "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "compare_monthly_expense_tool" }, "setup": { "duration": 0.0752437636256218, "outcome": "passed" }, "call": { "duration": 3.1100223967805505, "outcome": "passed" }, "teardown": { "duration": 0.00023829564452171326, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]", "lineno": 471, "outcome": "failed", "keywords": [ "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "text_then_weather_tool" }, "setup": { "duration": 0.07147279102355242, "outcome": "passed" }, "call": { "duration": 0.7697121743112803, "outcome": "failed", "crash": { "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 521, "message": "AssertionError: Expected 0 tool calls, but got 1\nassert 1 == 0\n + where 1 = len(([{'function': {'arguments': '{\"location\":\"San Francisco, CA\"}', 'name': 'get_weather'}, 'id': 'call_kpb5gnyu9klkx2yzft43dxez', 'type': 'function'}]))" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 521, "message": "AssertionError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n> assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\nE AssertionError: Expected 0 tool calls, but got 1\nE assert 1 == 0\nE + where 1 = len(([{'function': {'arguments': '{\"location\":\"San Francisco, CA\"}', 'name': 'get_weather'}, 'id': 'call_kpb5gnyu9klkx2yzft43dxez', 'type': 'function'}]))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError" }, "teardown": { "duration": 0.0003948565572500229, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]", "lineno": 471, "outcome": "failed", "keywords": [ "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "weather_tool_then_text" }, "setup": { "duration": 0.0739708598703146, "outcome": "passed" }, "call": { "duration": 0.8291563373059034, "outcome": "failed", "crash": { "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 547, "message": "AssertionError: Expected content, but none received.\nassert ('' is not None and '' != '')" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 547, "message": "AssertionError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\n \n if num_tool_calls > 0:\n # Use the first accumulated tool call for assertion\n tool_call = accumulated_tool_calls[0]\n assert tool_call[\"function\"][\"name\"] == expected[\"tool_name\"], (\n f\"Expected tool '{expected['tool_name']}', got '{tool_call['function']['name']}'\"\n )\n # Parse the accumulated arguments string for comparison\n actual_arguments = json.loads(tool_call[\"function\"][\"arguments\"])\n assert actual_arguments == expected[\"tool_arguments\"], (\n f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n )\n \n # Prepare and append the tool response for the next turn\n tool_response = tool_responses.pop(0)\n messages.append(\n {\n \"role\": \"tool\",\n \"tool_call_id\": tool_call[\"id\"],\n \"content\": tool_response[\"response\"],\n }\n )\n else:\n> assert accumulated_content is not None and accumulated_content != \"\", \"Expected content, but none received.\"\nE AssertionError: Expected content, but none received.\nE assert ('' is not None and '' != '')\n\ntests/verifications/openai_api/test_chat_completion.py:547: AssertionError" }, "teardown": { "duration": 0.0003177514299750328, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool]", "lineno": 471, "outcome": "passed", "keywords": [ "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "add_product_tool" }, "setup": { "duration": 0.07326000090688467, "outcome": "passed" }, "call": { "duration": 2.20385564584285, "outcome": "passed" }, "teardown": { "duration": 0.00035414285957813263, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]", "lineno": 471, "outcome": "failed", "keywords": [ "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "get_then_create_event_tool" }, "setup": { "duration": 0.10739517118781805, "outcome": "passed" }, "call": { "duration": 3.1176233775913715, "outcome": "failed", "crash": { "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 547, "message": "AssertionError: Expected content, but none received.\nassert ('' is not None and '' != '')" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 547, "message": "AssertionError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\n \n if num_tool_calls > 0:\n # Use the first accumulated tool call for assertion\n tool_call = accumulated_tool_calls[0]\n assert tool_call[\"function\"][\"name\"] == expected[\"tool_name\"], (\n f\"Expected tool '{expected['tool_name']}', got '{tool_call['function']['name']}'\"\n )\n # Parse the accumulated arguments string for comparison\n actual_arguments = json.loads(tool_call[\"function\"][\"arguments\"])\n assert actual_arguments == expected[\"tool_arguments\"], (\n f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n )\n \n # Prepare and append the tool response for the next turn\n tool_response = tool_responses.pop(0)\n messages.append(\n {\n \"role\": \"tool\",\n \"tool_call_id\": tool_call[\"id\"],\n \"content\": tool_response[\"response\"],\n }\n )\n else:\n> assert accumulated_content is not None and accumulated_content != \"\", \"Expected content, but none received.\"\nE AssertionError: Expected content, but none received.\nE assert ('' is not None and '' != '')\n\ntests/verifications/openai_api/test_chat_completion.py:547: AssertionError" }, "teardown": { "duration": 0.0003009289503097534, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]", "lineno": 471, "outcome": "failed", "keywords": [ "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "compare_monthly_expense_tool" }, "setup": { "duration": 0.07599783595651388, "outcome": "passed" }, "call": { "duration": 2.0640214709565043, "outcome": "failed", "crash": { "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 547, "message": "AssertionError: Expected content, but none received.\nassert ('' is not None and '' != '')" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 547, "message": "AssertionError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\n \n if num_tool_calls > 0:\n # Use the first accumulated tool call for assertion\n tool_call = accumulated_tool_calls[0]\n assert tool_call[\"function\"][\"name\"] == expected[\"tool_name\"], (\n f\"Expected tool '{expected['tool_name']}', got '{tool_call['function']['name']}'\"\n )\n # Parse the accumulated arguments string for comparison\n actual_arguments = json.loads(tool_call[\"function\"][\"arguments\"])\n assert actual_arguments == expected[\"tool_arguments\"], (\n f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n )\n \n # Prepare and append the tool response for the next turn\n tool_response = tool_responses.pop(0)\n messages.append(\n {\n \"role\": \"tool\",\n \"tool_call_id\": tool_call[\"id\"],\n \"content\": tool_response[\"response\"],\n }\n )\n else:\n> assert accumulated_content is not None and accumulated_content != \"\", \"Expected content, but none received.\"\nE AssertionError: Expected content, but none received.\nE assert ('' is not None and '' != '')\n\ntests/verifications/openai_api/test_chat_completion.py:547: AssertionError" }, "teardown": { "duration": 0.00029294565320014954, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]", "lineno": 471, "outcome": "failed", "keywords": [ "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "text_then_weather_tool" }, "setup": { "duration": 0.07202461268752813, "outcome": "passed" }, "call": { "duration": 0.6093930723145604, "outcome": "failed", "crash": { "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 688, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 506, "message": "" }, { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 688, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n> accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = \n\n def _accumulate_streaming_tool_calls(stream):\n \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n tool_calls_buffer = {}\n current_id = None\n full_content = \"\" # Initialize content accumulator\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError" }, "teardown": { "duration": 0.00030322466045618057, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]", "lineno": 471, "outcome": "failed", "keywords": [ "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "weather_tool_then_text" }, "setup": { "duration": 0.06955079920589924, "outcome": "passed" }, "call": { "duration": 0.4394088052213192, "outcome": "failed", "crash": { "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 688, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 506, "message": "" }, { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 688, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n> accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = \n\n def _accumulate_streaming_tool_calls(stream):\n \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n tool_calls_buffer = {}\n current_id = None\n full_content = \"\" # Initialize content accumulator\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError" }, "teardown": { "duration": 0.00038521457463502884, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]", "lineno": 471, "outcome": "failed", "keywords": [ "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "add_product_tool" }, "setup": { "duration": 0.07154521346092224, "outcome": "passed" }, "call": { "duration": 4.631643522530794, "outcome": "failed", "crash": { "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 688, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 506, "message": "" }, { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 688, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n> accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = \n\n def _accumulate_streaming_tool_calls(stream):\n \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n tool_calls_buffer = {}\n current_id = None\n full_content = \"\" # Initialize content accumulator\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError" }, "teardown": { "duration": 0.0003909459337592125, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]", "lineno": 471, "outcome": "failed", "keywords": [ "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "get_then_create_event_tool" }, "setup": { "duration": 0.07234212290495634, "outcome": "passed" }, "call": { "duration": 0.5346909575164318, "outcome": "failed", "crash": { "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 688, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 506, "message": "" }, { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 688, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n> accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = \n\n def _accumulate_streaming_tool_calls(stream):\n \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n tool_calls_buffer = {}\n current_id = None\n full_content = \"\" # Initialize content accumulator\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError" }, "teardown": { "duration": 0.00038490165024995804, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]", "lineno": 471, "outcome": "failed", "keywords": [ "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "compare_monthly_expense_tool" }, "setup": { "duration": 0.0718430494889617, "outcome": "passed" }, "call": { "duration": 0.57699906360358, "outcome": "failed", "crash": { "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 688, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 506, "message": "" }, { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 688, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n> accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = \n\n def _accumulate_streaming_tool_calls(stream):\n \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n tool_calls_buffer = {}\n current_id = None\n full_content = \"\" # Initialize content accumulator\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError" }, "teardown": { "duration": 0.0003002053126692772, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]", "lineno": 471, "outcome": "failed", "keywords": [ "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "text_then_weather_tool" }, "setup": { "duration": 0.07102924212813377, "outcome": "passed" }, "call": { "duration": 0.3103123838081956, "outcome": "failed", "crash": { "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 688, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 506, "message": "" }, { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 688, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n> accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = \n\n def _accumulate_streaming_tool_calls(stream):\n \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n tool_calls_buffer = {}\n current_id = None\n full_content = \"\" # Initialize content accumulator\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError" }, "teardown": { "duration": 0.000398833304643631, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]", "lineno": 471, "outcome": "failed", "keywords": [ "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "weather_tool_then_text" }, "setup": { "duration": 0.07346561551094055, "outcome": "passed" }, "call": { "duration": 2.515005356632173, "outcome": "failed", "crash": { "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 688, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 506, "message": "" }, { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 688, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n> accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = \n\n def _accumulate_streaming_tool_calls(stream):\n \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n tool_calls_buffer = {}\n current_id = None\n full_content = \"\" # Initialize content accumulator\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError" }, "teardown": { "duration": 0.0003784680739045143, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]", "lineno": 471, "outcome": "failed", "keywords": [ "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "add_product_tool" }, "setup": { "duration": 0.07150219846516848, "outcome": "passed" }, "call": { "duration": 0.770132390782237, "outcome": "failed", "crash": { "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 688, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 506, "message": "" }, { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 688, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n> accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = \n\n def _accumulate_streaming_tool_calls(stream):\n \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n tool_calls_buffer = {}\n current_id = None\n full_content = \"\" # Initialize content accumulator\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError" }, "teardown": { "duration": 0.00032351352274417877, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]", "lineno": 471, "outcome": "failed", "keywords": [ "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "get_then_create_event_tool" }, "setup": { "duration": 0.07132976595312357, "outcome": "passed" }, "call": { "duration": 0.5259293485432863, "outcome": "failed", "crash": { "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 688, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 506, "message": "" }, { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 688, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n> accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = \n\n def _accumulate_streaming_tool_calls(stream):\n \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n tool_calls_buffer = {}\n current_id = None\n full_content = \"\" # Initialize content accumulator\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError" }, "teardown": { "duration": 0.00037543755024671555, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]", "lineno": 471, "outcome": "failed", "keywords": [ "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "compare_monthly_expense_tool" }, "setup": { "duration": 0.07170393783599138, "outcome": "passed" }, "call": { "duration": 0.4805993800982833, "outcome": "failed", "crash": { "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 688, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 506, "message": "" }, { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 688, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n> accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = \n\n def _accumulate_streaming_tool_calls(stream):\n \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n tool_calls_buffer = {}\n current_id = None\n full_content = \"\" # Initialize content accumulator\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError" }, "teardown": { "duration": 0.0003568241372704506, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=False]", "lineno": 554, "outcome": "skipped", "keywords": [ "test_chat_multi_turn_multiple_images[meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=False]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=False", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "stream=False" }, "setup": { "duration": 0.07332183048129082, "outcome": "passed" }, "call": { "duration": 0.00027661025524139404, "outcome": "skipped", "longrepr": "('/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 561, 'Skipped: Skipping test_chat_multi_turn_multiple_images for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')" }, "teardown": { "duration": 0.00043700821697711945, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=True]", "lineno": 554, "outcome": "skipped", "keywords": [ "test_chat_multi_turn_multiple_images[meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=True]", "parametrize", "pytestmark", "meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=True", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "case_id": "stream=True" }, "setup": { "duration": 0.07106236275285482, "outcome": "passed" }, "call": { "duration": 0.0002557719126343727, "outcome": "skipped", "longrepr": "('/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 561, 'Skipped: Skipping test_chat_multi_turn_multiple_images for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')" }, "teardown": { "duration": 0.00020366813987493515, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=False]", "lineno": 554, "outcome": "passed", "keywords": [ "test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=False]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=False", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "stream=False" }, "setup": { "duration": 0.07133481372147799, "outcome": "passed" }, "call": { "duration": 2.875141463242471, "outcome": "passed" }, "teardown": { "duration": 0.00029349327087402344, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=True]", "lineno": 554, "outcome": "failed", "keywords": [ "test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=True]", "parametrize", "pytestmark", "meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=True", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "case_id": "stream=True" }, "setup": { "duration": 0.07148486748337746, "outcome": "passed" }, "call": { "duration": 1.4055311204865575, "outcome": "failed", "crash": { "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 596, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 596, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\nmulti_image_data = ['data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAIBAQEBAQIBAQECAgICAgQDAgICAgUEBAMEBgUGBgYFBgYGBwkIBgcJBwYGC...6pH9jaTzNv7vfRRXzubfxj9f8Pv8AkTz/AMX/ALbEz5Ly38lfMk/5Z/u64PxhqEZh+z/6rzvn2UUV5EvgPuzy/wAc6p5dt5ccibJpNkkdFFFec27mZ//Z']\nstream = True\n\n @pytest.mark.parametrize(\"stream\", [False, True], ids=[\"stream=False\", \"stream=True\"])\n def test_chat_multi_turn_multiple_images(\n request, openai_client, model, provider, verification_config, multi_image_data, stream\n ):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages_turn1 = [\n {\n \"role\": \"user\",\n \"content\": [\n {\n \"type\": \"image_url\",\n \"image_url\": {\n \"url\": multi_image_data[0],\n },\n },\n {\n \"type\": \"image_url\",\n \"image_url\": {\n \"url\": multi_image_data[1],\n },\n },\n {\n \"type\": \"text\",\n \"text\": \"What furniture is in the first image that is not in the second image?\",\n },\n ],\n },\n ]\n \n # First API call\n response1 = openai_client.chat.completions.create(\n model=model,\n messages=messages_turn1,\n stream=stream,\n )\n if stream:\n message_content1 = \"\"\n for chunk in response1:\n> message_content1 += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:596: IndexError" }, "teardown": { "duration": 0.00031107570976018906, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=False]", "lineno": 554, "outcome": "passed", "keywords": [ "test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=False]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=False", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "stream=False" }, "setup": { "duration": 0.07265154179185629, "outcome": "passed" }, "call": { "duration": 6.755587887018919, "outcome": "passed" }, "teardown": { "duration": 0.00022470485419034958, "outcome": "passed" } }, { "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=True]", "lineno": 554, "outcome": "failed", "keywords": [ "test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=True]", "parametrize", "pytestmark", "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=True", "test_chat_completion.py", "openai_api", "verifications", "tests", "llama-stack", "" ], "metadata": { "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "case_id": "stream=True" }, "setup": { "duration": 0.07265954371541739, "outcome": "passed" }, "call": { "duration": 1.973216057755053, "outcome": "failed", "crash": { "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", "lineno": 596, "message": "IndexError: list index out of range" }, "traceback": [ { "path": "tests/verifications/openai_api/test_chat_completion.py", "lineno": 596, "message": "IndexError" } ], "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\nmulti_image_data = ['data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAIBAQEBAQIBAQECAgICAgQDAgICAgUEBAMEBgUGBgYFBgYGBwkIBgcJBwYGC...6pH9jaTzNv7vfRRXzubfxj9f8Pv8AkTz/AMX/ALbEz5Ly38lfMk/5Z/u64PxhqEZh+z/6rzvn2UUV5EvgPuzy/wAc6p5dt5ccibJpNkkdFFFec27mZ//Z']\nstream = True\n\n @pytest.mark.parametrize(\"stream\", [False, True], ids=[\"stream=False\", \"stream=True\"])\n def test_chat_multi_turn_multiple_images(\n request, openai_client, model, provider, verification_config, multi_image_data, stream\n ):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages_turn1 = [\n {\n \"role\": \"user\",\n \"content\": [\n {\n \"type\": \"image_url\",\n \"image_url\": {\n \"url\": multi_image_data[0],\n },\n },\n {\n \"type\": \"image_url\",\n \"image_url\": {\n \"url\": multi_image_data[1],\n },\n },\n {\n \"type\": \"text\",\n \"text\": \"What furniture is in the first image that is not in the second image?\",\n },\n ],\n },\n ]\n \n # First API call\n response1 = openai_client.chat.completions.create(\n model=model,\n messages=messages_turn1,\n stream=stream,\n )\n if stream:\n message_content1 = \"\"\n for chunk in response1:\n> message_content1 += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:596: IndexError" }, "teardown": { "duration": 0.0017197001725435257, "outcome": "passed" } } ], "run_timestamp": 1744915514 }