llama-stack-mirror/tests/verifications/test_results/fireworks.json
ehhuang 0ed41aafbf
test: add multi_image test (#1972)
# What does this PR do?


## Test Plan
pytest tests/verifications/openai_api/test_chat_completion.py --provider
openai -k 'test_chat_multiple_images'
2025-04-17 12:51:42 -07:00

3751 lines
292 KiB
JSON

{
"created": 1744918448.686489,
"duration": 254.68238854408264,
"exitcode": 1,
"root": "/home/erichuang/llama-stack",
"environment": {},
"summary": {
"passed": 40,
"skipped": 4,
"failed": 40,
"total": 84,
"collected": 84
},
"collectors": [
{
"nodeid": "",
"outcome": "passed",
"result": [
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py",
"type": "Module"
}
]
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py",
"outcome": "passed",
"result": [
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]",
"type": "Function",
"lineno": 95
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]",
"type": "Function",
"lineno": 95
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]",
"type": "Function",
"lineno": 95
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]",
"type": "Function",
"lineno": 95
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]",
"type": "Function",
"lineno": 95
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]",
"type": "Function",
"lineno": 95
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]",
"type": "Function",
"lineno": 114
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]",
"type": "Function",
"lineno": 114
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]",
"type": "Function",
"lineno": 114
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]",
"type": "Function",
"lineno": 114
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]",
"type": "Function",
"lineno": 114
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]",
"type": "Function",
"lineno": 114
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
"type": "Function",
"lineno": 138
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
"type": "Function",
"lineno": 138
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
"type": "Function",
"lineno": 138
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
"type": "Function",
"lineno": 157
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
"type": "Function",
"lineno": 157
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
"type": "Function",
"lineno": 157
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]",
"type": "Function",
"lineno": 181
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]",
"type": "Function",
"lineno": 181
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]",
"type": "Function",
"lineno": 181
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]",
"type": "Function",
"lineno": 181
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]",
"type": "Function",
"lineno": 181
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]",
"type": "Function",
"lineno": 181
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]",
"type": "Function",
"lineno": 204
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]",
"type": "Function",
"lineno": 204
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]",
"type": "Function",
"lineno": 204
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]",
"type": "Function",
"lineno": 204
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]",
"type": "Function",
"lineno": 204
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]",
"type": "Function",
"lineno": 204
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
"type": "Function",
"lineno": 226
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
"type": "Function",
"lineno": 226
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
"type": "Function",
"lineno": 226
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
"type": "Function",
"lineno": 250
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
"type": "Function",
"lineno": 250
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
"type": "Function",
"lineno": 250
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
"type": "Function",
"lineno": 278
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
"type": "Function",
"lineno": 278
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
"type": "Function",
"lineno": 278
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
"type": "Function",
"lineno": 302
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
"type": "Function",
"lineno": 302
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
"type": "Function",
"lineno": 302
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
"type": "Function",
"lineno": 329
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
"type": "Function",
"lineno": 329
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
"type": "Function",
"lineno": 329
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
"type": "Function",
"lineno": 352
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
"type": "Function",
"lineno": 352
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
"type": "Function",
"lineno": 352
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool]",
"type": "Function",
"lineno": 380
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text]",
"type": "Function",
"lineno": 380
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool]",
"type": "Function",
"lineno": 380
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool]",
"type": "Function",
"lineno": 380
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool]",
"type": "Function",
"lineno": 380
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool]",
"type": "Function",
"lineno": 380
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text]",
"type": "Function",
"lineno": 380
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool]",
"type": "Function",
"lineno": 380
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool]",
"type": "Function",
"lineno": 380
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool]",
"type": "Function",
"lineno": 380
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool]",
"type": "Function",
"lineno": 380
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text]",
"type": "Function",
"lineno": 380
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool]",
"type": "Function",
"lineno": 380
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool]",
"type": "Function",
"lineno": 380
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool]",
"type": "Function",
"lineno": 380
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool]",
"type": "Function",
"lineno": 471
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text]",
"type": "Function",
"lineno": 471
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool]",
"type": "Function",
"lineno": 471
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool]",
"type": "Function",
"lineno": 471
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool]",
"type": "Function",
"lineno": 471
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool]",
"type": "Function",
"lineno": 471
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text]",
"type": "Function",
"lineno": 471
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool]",
"type": "Function",
"lineno": 471
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool]",
"type": "Function",
"lineno": 471
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool]",
"type": "Function",
"lineno": 471
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool]",
"type": "Function",
"lineno": 471
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text]",
"type": "Function",
"lineno": 471
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool]",
"type": "Function",
"lineno": 471
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool]",
"type": "Function",
"lineno": 471
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool]",
"type": "Function",
"lineno": 471
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama-v3p3-70b-instruct-stream=False]",
"type": "Function",
"lineno": 554
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama-v3p3-70b-instruct-stream=True]",
"type": "Function",
"lineno": 554
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-scout-instruct-basic-stream=False]",
"type": "Function",
"lineno": 554
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-scout-instruct-basic-stream=True]",
"type": "Function",
"lineno": 554
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-maverick-instruct-basic-stream=False]",
"type": "Function",
"lineno": 554
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-maverick-instruct-basic-stream=True]",
"type": "Function",
"lineno": 554
}
]
}
],
"tests": [
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]",
"lineno": 95,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama-v3p3-70b-instruct-earth",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
"case_id": "earth"
},
"setup": {
"duration": 0.13845239393413067,
"outcome": "passed"
},
"call": {
"duration": 1.3300942620262504,
"outcome": "passed"
},
"teardown": {
"duration": 0.00025453977286815643,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]",
"lineno": 95,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama-v3p3-70b-instruct-saturn",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
"case_id": "saturn"
},
"setup": {
"duration": 0.0806605163961649,
"outcome": "passed"
},
"call": {
"duration": 0.6202042903751135,
"outcome": "passed"
},
"teardown": {
"duration": 0.00026358477771282196,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]",
"lineno": 95,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-scout-instruct-basic-earth",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-scout-instruct-basic",
"case_id": "earth"
},
"setup": {
"duration": 0.07190297450870275,
"outcome": "passed"
},
"call": {
"duration": 0.7458920907229185,
"outcome": "passed"
},
"teardown": {
"duration": 0.00024067144840955734,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]",
"lineno": 95,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-scout-instruct-basic-saturn",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-scout-instruct-basic",
"case_id": "saturn"
},
"setup": {
"duration": 0.07551384158432484,
"outcome": "passed"
},
"call": {
"duration": 0.6140249809250236,
"outcome": "passed"
},
"teardown": {
"duration": 0.00024476367980241776,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]",
"lineno": 95,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-maverick-instruct-basic-earth",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
"case_id": "earth"
},
"setup": {
"duration": 0.07434738799929619,
"outcome": "passed"
},
"call": {
"duration": 1.6738943997770548,
"outcome": "passed"
},
"teardown": {
"duration": 0.000227426178753376,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]",
"lineno": 95,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-maverick-instruct-basic-saturn",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
"case_id": "saturn"
},
"setup": {
"duration": 0.07130288146436214,
"outcome": "passed"
},
"call": {
"duration": 1.337895905598998,
"outcome": "passed"
},
"teardown": {
"duration": 0.00028038304299116135,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]",
"lineno": 114,
"outcome": "passed",
"keywords": [
"test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama-v3p3-70b-instruct-earth",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
"case_id": "earth"
},
"setup": {
"duration": 0.0727478675544262,
"outcome": "passed"
},
"call": {
"duration": 0.7670011632144451,
"outcome": "passed"
},
"teardown": {
"duration": 0.00023174844682216644,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]",
"lineno": 114,
"outcome": "passed",
"keywords": [
"test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama-v3p3-70b-instruct-saturn",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
"case_id": "saturn"
},
"setup": {
"duration": 0.07163545861840248,
"outcome": "passed"
},
"call": {
"duration": 0.7582714259624481,
"outcome": "passed"
},
"teardown": {
"duration": 0.00028524454683065414,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]",
"lineno": 114,
"outcome": "passed",
"keywords": [
"test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-scout-instruct-basic-earth",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-scout-instruct-basic",
"case_id": "earth"
},
"setup": {
"duration": 0.08122281823307276,
"outcome": "passed"
},
"call": {
"duration": 0.6061851140111685,
"outcome": "passed"
},
"teardown": {
"duration": 0.0002497304230928421,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]",
"lineno": 114,
"outcome": "passed",
"keywords": [
"test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-scout-instruct-basic-saturn",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-scout-instruct-basic",
"case_id": "saturn"
},
"setup": {
"duration": 0.07185561209917068,
"outcome": "passed"
},
"call": {
"duration": 0.7516075978055596,
"outcome": "passed"
},
"teardown": {
"duration": 0.00026526860892772675,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]",
"lineno": 114,
"outcome": "passed",
"keywords": [
"test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-maverick-instruct-basic-earth",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
"case_id": "earth"
},
"setup": {
"duration": 0.07012896798551083,
"outcome": "passed"
},
"call": {
"duration": 1.8946502823382616,
"outcome": "passed"
},
"teardown": {
"duration": 0.0002452842891216278,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]",
"lineno": 114,
"outcome": "passed",
"keywords": [
"test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-maverick-instruct-basic-saturn",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
"case_id": "saturn"
},
"setup": {
"duration": 0.06955648958683014,
"outcome": "passed"
},
"call": {
"duration": 1.0446623722091317,
"outcome": "passed"
},
"teardown": {
"duration": 0.00023738667368888855,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
"lineno": 138,
"outcome": "skipped",
"keywords": [
"test_chat_non_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama-v3p3-70b-instruct-case0",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
"case_id": "case0"
},
"setup": {
"duration": 0.07077906839549541,
"outcome": "passed"
},
"call": {
"duration": 0.00021365191787481308,
"outcome": "skipped",
"longrepr": "('/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 147, 'Skipped: Skipping test_chat_non_streaming_image for model accounts/fireworks/models/llama-v3p3-70b-instruct on provider fireworks based on config.')"
},
"teardown": {
"duration": 0.00018982868641614914,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
"lineno": 138,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-scout-instruct-basic-case0",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-scout-instruct-basic",
"case_id": "case0"
},
"setup": {
"duration": 0.07118859142065048,
"outcome": "passed"
},
"call": {
"duration": 4.20654855389148,
"outcome": "passed"
},
"teardown": {
"duration": 0.00023640412837266922,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
"lineno": 138,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-maverick-instruct-basic-case0",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
"case_id": "case0"
},
"setup": {
"duration": 0.07351029943674803,
"outcome": "passed"
},
"call": {
"duration": 4.875292049720883,
"outcome": "passed"
},
"teardown": {
"duration": 0.0002571679651737213,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
"lineno": 157,
"outcome": "skipped",
"keywords": [
"test_chat_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama-v3p3-70b-instruct-case0",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
"case_id": "case0"
},
"setup": {
"duration": 0.07474396284669638,
"outcome": "passed"
},
"call": {
"duration": 0.0002510417252779007,
"outcome": "skipped",
"longrepr": "('/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 166, 'Skipped: Skipping test_chat_streaming_image for model accounts/fireworks/models/llama-v3p3-70b-instruct on provider fireworks based on config.')"
},
"teardown": {
"duration": 0.00020200759172439575,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
"lineno": 157,
"outcome": "passed",
"keywords": [
"test_chat_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-scout-instruct-basic-case0",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-scout-instruct-basic",
"case_id": "case0"
},
"setup": {
"duration": 0.07380561903119087,
"outcome": "passed"
},
"call": {
"duration": 2.0082657346501946,
"outcome": "passed"
},
"teardown": {
"duration": 0.0002522030845284462,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
"lineno": 157,
"outcome": "passed",
"keywords": [
"test_chat_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-maverick-instruct-basic-case0",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
"case_id": "case0"
},
"setup": {
"duration": 0.07040839456021786,
"outcome": "passed"
},
"call": {
"duration": 4.871666649356484,
"outcome": "passed"
},
"teardown": {
"duration": 0.0002490682527422905,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]",
"lineno": 181,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama-v3p3-70b-instruct-calendar",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
"case_id": "calendar"
},
"setup": {
"duration": 0.07167178671807051,
"outcome": "passed"
},
"call": {
"duration": 0.9903911761939526,
"outcome": "passed"
},
"teardown": {
"duration": 0.0002704570069909096,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]",
"lineno": 181,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama-v3p3-70b-instruct-math",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
"case_id": "math"
},
"setup": {
"duration": 0.07073096185922623,
"outcome": "passed"
},
"call": {
"duration": 3.9858130905777216,
"outcome": "passed"
},
"teardown": {
"duration": 0.00024665892124176025,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]",
"lineno": 181,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-scout-instruct-basic-calendar",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-scout-instruct-basic",
"case_id": "calendar"
},
"setup": {
"duration": 0.07138721086084843,
"outcome": "passed"
},
"call": {
"duration": 1.1312237158417702,
"outcome": "passed"
},
"teardown": {
"duration": 0.00027671270072460175,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]",
"lineno": 181,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-scout-instruct-basic-math",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-scout-instruct-basic",
"case_id": "math"
},
"setup": {
"duration": 0.08204951789230108,
"outcome": "passed"
},
"call": {
"duration": 2.7500197598710656,
"outcome": "passed"
},
"teardown": {
"duration": 0.00024303700774908066,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]",
"lineno": 181,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-maverick-instruct-basic-calendar",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
"case_id": "calendar"
},
"setup": {
"duration": 0.07405088562518358,
"outcome": "passed"
},
"call": {
"duration": 1.238045932725072,
"outcome": "passed"
},
"teardown": {
"duration": 0.00024984683841466904,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]",
"lineno": 181,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-maverick-instruct-basic-math",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
"case_id": "math"
},
"setup": {
"duration": 0.07009329181164503,
"outcome": "passed"
},
"call": {
"duration": 3.55908961314708,
"outcome": "passed"
},
"teardown": {
"duration": 0.00026627909392118454,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]",
"lineno": 204,
"outcome": "passed",
"keywords": [
"test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama-v3p3-70b-instruct-calendar",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
"case_id": "calendar"
},
"setup": {
"duration": 0.07596437353640795,
"outcome": "passed"
},
"call": {
"duration": 1.0093460381031036,
"outcome": "passed"
},
"teardown": {
"duration": 0.0002171723172068596,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]",
"lineno": 204,
"outcome": "passed",
"keywords": [
"test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama-v3p3-70b-instruct-math",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
"case_id": "math"
},
"setup": {
"duration": 0.06995268166065216,
"outcome": "passed"
},
"call": {
"duration": 2.617857910692692,
"outcome": "passed"
},
"teardown": {
"duration": 0.00024063047021627426,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]",
"lineno": 204,
"outcome": "passed",
"keywords": [
"test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-scout-instruct-basic-calendar",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-scout-instruct-basic",
"case_id": "calendar"
},
"setup": {
"duration": 0.0729895168915391,
"outcome": "passed"
},
"call": {
"duration": 0.9500969992950559,
"outcome": "passed"
},
"teardown": {
"duration": 0.000257221981883049,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]",
"lineno": 204,
"outcome": "passed",
"keywords": [
"test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-scout-instruct-basic-math",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-scout-instruct-basic",
"case_id": "math"
},
"setup": {
"duration": 0.07070339564234018,
"outcome": "passed"
},
"call": {
"duration": 2.6405998673290014,
"outcome": "passed"
},
"teardown": {
"duration": 0.0002397783100605011,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]",
"lineno": 204,
"outcome": "passed",
"keywords": [
"test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-maverick-instruct-basic-calendar",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
"case_id": "calendar"
},
"setup": {
"duration": 0.07140882592648268,
"outcome": "passed"
},
"call": {
"duration": 0.7515814090147614,
"outcome": "passed"
},
"teardown": {
"duration": 0.0002773841843008995,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]",
"lineno": 204,
"outcome": "passed",
"keywords": [
"test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-maverick-instruct-basic-math",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
"case_id": "math"
},
"setup": {
"duration": 0.07105506956577301,
"outcome": "passed"
},
"call": {
"duration": 3.091084435582161,
"outcome": "passed"
},
"teardown": {
"duration": 0.0002588946372270584,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
"lineno": 226,
"outcome": "failed",
"keywords": [
"test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama-v3p3-70b-instruct-case0",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
"case_id": "case0"
},
"setup": {
"duration": 0.07215945608913898,
"outcome": "passed"
},
"call": {
"duration": 1.13668860681355,
"outcome": "failed",
"crash": {
"path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
"lineno": 245,
"message": "TypeError: object of type 'NoneType' has no len()"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
"lineno": 245,
"message": "TypeError"
}
],
"longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdbd0430>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert len(response.choices[0].message.tool_calls) > 0\nE TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:245: TypeError"
},
"teardown": {
"duration": 0.0003727646544575691,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
"lineno": 226,
"outcome": "failed",
"keywords": [
"test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-scout-instruct-basic-case0",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-scout-instruct-basic",
"case_id": "case0"
},
"setup": {
"duration": 0.07085339725017548,
"outcome": "passed"
},
"call": {
"duration": 6.564900263212621,
"outcome": "failed",
"crash": {
"path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
"lineno": 245,
"message": "TypeError: object of type 'NoneType' has no len()"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
"lineno": 245,
"message": "TypeError"
}
],
"longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f1acda3cdf0>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert len(response.choices[0].message.tool_calls) > 0\nE TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:245: TypeError"
},
"teardown": {
"duration": 0.00036074407398700714,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
"lineno": 226,
"outcome": "failed",
"keywords": [
"test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-maverick-instruct-basic-case0",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
"case_id": "case0"
},
"setup": {
"duration": 0.07105840742588043,
"outcome": "passed"
},
"call": {
"duration": 1.9664474660530686,
"outcome": "failed",
"crash": {
"path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
"lineno": 245,
"message": "TypeError: object of type 'NoneType' has no len()"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
"lineno": 245,
"message": "TypeError"
}
],
"longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdb6ee60>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert len(response.choices[0].message.tool_calls) > 0\nE TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:245: TypeError"
},
"teardown": {
"duration": 0.0003125220537185669,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
"lineno": 250,
"outcome": "failed",
"keywords": [
"test_chat_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama-v3p3-70b-instruct-case0",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
"case_id": "case0"
},
"setup": {
"duration": 0.07491886802017689,
"outcome": "passed"
},
"call": {
"duration": 1.6239055208861828,
"outcome": "failed",
"crash": {
"path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
"lineno": 269,
"message": "assert 0 == 1\n + where 0 = len([])"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
"lineno": 269,
"message": "AssertionError"
}
],
"longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f1acda56740>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=True,\n )\n \n _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n> assert len(tool_calls_buffer) == 1\nE assert 0 == 1\nE + where 0 = len([])\n\ntests/verifications/openai_api/test_chat_completion.py:269: AssertionError"
},
"teardown": {
"duration": 0.0003996873274445534,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
"lineno": 250,
"outcome": "failed",
"keywords": [
"test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-scout-instruct-basic-case0",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-scout-instruct-basic",
"case_id": "case0"
},
"setup": {
"duration": 0.07084537390619516,
"outcome": "passed"
},
"call": {
"duration": 7.175910825841129,
"outcome": "failed",
"crash": {
"path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
"lineno": 269,
"message": "assert 0 == 1\n + where 0 = len([])"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
"lineno": 269,
"message": "AssertionError"
}
],
"longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdb51360>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=True,\n )\n \n _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n> assert len(tool_calls_buffer) == 1\nE assert 0 == 1\nE + where 0 = len([])\n\ntests/verifications/openai_api/test_chat_completion.py:269: AssertionError"
},
"teardown": {
"duration": 0.0003013862296938896,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
"lineno": 250,
"outcome": "failed",
"keywords": [
"test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-maverick-instruct-basic-case0",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
"case_id": "case0"
},
"setup": {
"duration": 0.07152015157043934,
"outcome": "passed"
},
"call": {
"duration": 9.749054622836411,
"outcome": "failed",
"crash": {
"path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
"lineno": 269,
"message": "assert 0 == 1\n + where 0 = len([])"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
"lineno": 269,
"message": "AssertionError"
}
],
"longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f1acda32bc0>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=True,\n )\n \n _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n> assert len(tool_calls_buffer) == 1\nE assert 0 == 1\nE + where 0 = len([])\n\ntests/verifications/openai_api/test_chat_completion.py:269: AssertionError"
},
"teardown": {
"duration": 0.0002990690991282463,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
"lineno": 278,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama-v3p3-70b-instruct-case0",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
"case_id": "case0"
},
"setup": {
"duration": 0.07075500208884478,
"outcome": "passed"
},
"call": {
"duration": 0.9870151281356812,
"outcome": "passed"
},
"teardown": {
"duration": 0.00022785458713769913,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
"lineno": 278,
"outcome": "failed",
"keywords": [
"test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-scout-instruct-basic-case0",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-scout-instruct-basic",
"case_id": "case0"
},
"setup": {
"duration": 0.0698307491838932,
"outcome": "passed"
},
"call": {
"duration": 4.061793921515346,
"outcome": "failed",
"crash": {
"path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
"lineno": 298,
"message": "TypeError: object of type 'NoneType' has no len()"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
"lineno": 298,
"message": "TypeError"
}
],
"longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama4-scout-instruct-basic-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdb678e0>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"], # Reusing existing case for now\n ids=case_id_generator,\n )\n def test_chat_non_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n tool_choice=\"required\", # Force tool call\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert len(response.choices[0].message.tool_calls) > 0, \"Expected tool call when tool_choice='required'\"\nE TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:298: TypeError"
},
"teardown": {
"duration": 0.00028742197901010513,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
"lineno": 278,
"outcome": "failed",
"keywords": [
"test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-maverick-instruct-basic-case0",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
"case_id": "case0"
},
"setup": {
"duration": 0.07069965451955795,
"outcome": "passed"
},
"call": {
"duration": 24.973835667595267,
"outcome": "failed",
"crash": {
"path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
"lineno": 298,
"message": "TypeError: object of type 'NoneType' has no len()"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
"lineno": 298,
"message": "TypeError"
}
],
"longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdab3430>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"], # Reusing existing case for now\n ids=case_id_generator,\n )\n def test_chat_non_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n tool_choice=\"required\", # Force tool call\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert len(response.choices[0].message.tool_calls) > 0, \"Expected tool call when tool_choice='required'\"\nE TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:298: TypeError"
},
"teardown": {
"duration": 0.00034868158400058746,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
"lineno": 302,
"outcome": "passed",
"keywords": [
"test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama-v3p3-70b-instruct-case0",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
"case_id": "case0"
},
"setup": {
"duration": 0.07031871005892754,
"outcome": "passed"
},
"call": {
"duration": 0.7874777475371957,
"outcome": "passed"
},
"teardown": {
"duration": 0.00027067307382822037,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
"lineno": 302,
"outcome": "failed",
"keywords": [
"test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-scout-instruct-basic-case0",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-scout-instruct-basic",
"case_id": "case0"
},
"setup": {
"duration": 0.07194838207215071,
"outcome": "passed"
},
"call": {
"duration": 5.034253670834005,
"outcome": "failed",
"crash": {
"path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
"lineno": 323,
"message": "AssertionError: Expected tool call when tool_choice='required'\nassert 0 > 0\n + where 0 = len([])"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
"lineno": 323,
"message": "AssertionError"
}
],
"longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama4-scout-instruct-basic-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f1acda29390>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"], # Reusing existing case for now\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n tool_choice=\"required\", # Force tool call\n stream=True,\n )\n \n _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n \n> assert len(tool_calls_buffer) > 0, \"Expected tool call when tool_choice='required'\"\nE AssertionError: Expected tool call when tool_choice='required'\nE assert 0 > 0\nE + where 0 = len([])\n\ntests/verifications/openai_api/test_chat_completion.py:323: AssertionError"
},
"teardown": {
"duration": 0.00030618347227573395,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
"lineno": 302,
"outcome": "failed",
"keywords": [
"test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-maverick-instruct-basic-case0",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
"case_id": "case0"
},
"setup": {
"duration": 0.07107715681195259,
"outcome": "passed"
},
"call": {
"duration": 6.841737313196063,
"outcome": "failed",
"crash": {
"path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
"lineno": 323,
"message": "AssertionError: Expected tool call when tool_choice='required'\nassert 0 > 0\n + where 0 = len([])"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
"lineno": 323,
"message": "AssertionError"
}
],
"longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdab73d0>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"], # Reusing existing case for now\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n tool_choice=\"required\", # Force tool call\n stream=True,\n )\n \n _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n \n> assert len(tool_calls_buffer) > 0, \"Expected tool call when tool_choice='required'\"\nE AssertionError: Expected tool call when tool_choice='required'\nE assert 0 > 0\nE + where 0 = len([])\n\ntests/verifications/openai_api/test_chat_completion.py:323: AssertionError"
},
"teardown": {
"duration": 0.0003354279324412346,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
"lineno": 329,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_tool_choice_none[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama-v3p3-70b-instruct-case0",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
"case_id": "case0"
},
"setup": {
"duration": 0.0726231737062335,
"outcome": "passed"
},
"call": {
"duration": 0.7659661257639527,
"outcome": "passed"
},
"teardown": {
"duration": 0.0003337552770972252,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
"lineno": 329,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_tool_choice_none[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-scout-instruct-basic-case0",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-scout-instruct-basic",
"case_id": "case0"
},
"setup": {
"duration": 0.09297824744135141,
"outcome": "passed"
},
"call": {
"duration": 3.257608976215124,
"outcome": "passed"
},
"teardown": {
"duration": 0.00022768322378396988,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
"lineno": 329,
"outcome": "passed",
"keywords": [
"test_chat_non_streaming_tool_choice_none[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-maverick-instruct-basic-case0",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
"case_id": "case0"
},
"setup": {
"duration": 0.0726541867479682,
"outcome": "passed"
},
"call": {
"duration": 4.5413802824914455,
"outcome": "passed"
},
"teardown": {
"duration": 0.00026340410113334656,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
"lineno": 352,
"outcome": "passed",
"keywords": [
"test_chat_streaming_tool_choice_none[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama-v3p3-70b-instruct-case0",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
"case_id": "case0"
},
"setup": {
"duration": 0.07666508108377457,
"outcome": "passed"
},
"call": {
"duration": 0.5535151390358806,
"outcome": "passed"
},
"teardown": {
"duration": 0.0003251638263463974,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
"lineno": 352,
"outcome": "passed",
"keywords": [
"test_chat_streaming_tool_choice_none[accounts/fireworks/models/llama4-scout-instruct-basic-case0]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-scout-instruct-basic-case0",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-scout-instruct-basic",
"case_id": "case0"
},
"setup": {
"duration": 0.09550460614264011,
"outcome": "passed"
},
"call": {
"duration": 1.171110725030303,
"outcome": "passed"
},
"teardown": {
"duration": 0.0002604629844427109,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
"lineno": 352,
"outcome": "passed",
"keywords": [
"test_chat_streaming_tool_choice_none[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-maverick-instruct-basic-case0",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
"case_id": "case0"
},
"setup": {
"duration": 0.07114547491073608,
"outcome": "passed"
},
"call": {
"duration": 27.369331603869796,
"outcome": "passed"
},
"teardown": {
"duration": 0.00023956969380378723,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool]",
"lineno": 380,
"outcome": "failed",
"keywords": [
"test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
"case_id": "text_then_weather_tool"
},
"setup": {
"duration": 0.07612851448357105,
"outcome": "passed"
},
"call": {
"duration": 2.10164753254503,
"outcome": "failed",
"crash": {
"path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
"lineno": 467,
"message": "AssertionError: Expected one of ['sol'] in content, but got: 'I cannot perform this task as it requires additional functionality that is not available in the given functions.'\nassert False\n + where False = any(<generator object test_chat_non_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f1acda87ca0>)"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
"lineno": 467,
"message": "AssertionError"
}
],
"longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acda57190>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\n \n if num_tool_calls > 0:\n tool_call = assistant_message.tool_calls[0]\n assert tool_call.function.name == expected[\"tool_name\"], (\n f\"Expected tool '{expected['tool_name']}', got '{tool_call.function.name}'\"\n )\n # Parse the JSON string arguments before comparing\n actual_arguments = json.loads(tool_call.function.arguments)\n assert actual_arguments == expected[\"tool_arguments\"], (\n f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n )\n \n # Prepare and append the tool response for the next turn\n tool_response = tool_responses.pop(0)\n messages.append(\n {\n \"role\": \"tool\",\n \"tool_call_id\": tool_call.id,\n \"content\": tool_response[\"response\"],\n }\n )\n else:\n assert assistant_message.content is not None, \"Expected content, but none received.\"\n expected_answers = expected[\"answer\"] # This is now a list\n content_lower = assistant_message.content.lower()\n> assert any(ans.lower() in content_lower for ans in expected_answers), (\n f\"Expected one of {expected_answers} in content, but got: '{assistant_message.content}'\"\n )\nE AssertionError: Expected one of ['sol'] in content, but got: 'I cannot perform this task as it requires additional functionality that is not available in the given functions.'\nE assert False\nE + where False = any(<generator object test_chat_non_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f1acda87ca0>)\n\ntests/verifications/openai_api/test_chat_completion.py:467: AssertionError"
},
"teardown": {
"duration": 0.00030514132231473923,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text]",
"lineno": 380,
"outcome": "failed",
"keywords": [
"test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
"case_id": "weather_tool_then_text"
},
"setup": {
"duration": 0.07009781803935766,
"outcome": "passed"
},
"call": {
"duration": 2.49614445772022,
"outcome": "failed",
"crash": {
"path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
"lineno": 439,
"message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len((None or []))\n + where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"get_weather\", \"parameters\": {\"location\": \"San Francisco, CA\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
"lineno": 439,
"message": "AssertionError"
}
],
"longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdb50490>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n> assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len((None or []))\nE + where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"get_weather\", \"parameters\": {\"location\": \"San Francisco, CA\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError"
},
"teardown": {
"duration": 0.00035297591239213943,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool]",
"lineno": 380,
"outcome": "failed",
"keywords": [
"test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
"case_id": "add_product_tool"
},
"setup": {
"duration": 0.0719120567664504,
"outcome": "passed"
},
"call": {
"duration": 1.181352874264121,
"outcome": "failed",
"crash": {
"path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
"lineno": 439,
"message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len((None or []))\n + where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"addProduct\", \"parameters\": {\"name\": \"Widget\", \"price\": \"19.99\", \"inStock\": \"true\", \"tags\": \"[\\\\\"new\\\\\", \\\\\"sale\\\\\"]\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
"lineno": 439,
"message": "AssertionError"
}
],
"longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdc0c550>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n> assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len((None or []))\nE + where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"addProduct\", \"parameters\": {\"name\": \"Widget\", \"price\": \"19.99\", \"inStock\": \"true\", \"tags\": \"[\\\\\"new\\\\\", \\\\\"sale\\\\\"]\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError"
},
"teardown": {
"duration": 0.000303901731967926,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool]",
"lineno": 380,
"outcome": "failed",
"keywords": [
"test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
"case_id": "get_then_create_event_tool"
},
"setup": {
"duration": 0.07158921286463737,
"outcome": "passed"
},
"call": {
"duration": 3.7202864307910204,
"outcome": "failed",
"crash": {
"path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
"lineno": 439,
"message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len((None or []))\n + where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
"lineno": 439,
"message": "AssertionError"
}
],
"longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdae22f0>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n> assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len((None or []))\nE + where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError"
},
"teardown": {
"duration": 0.0003700554370880127,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool]",
"lineno": 380,
"outcome": "failed",
"keywords": [
"test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
"case_id": "compare_monthly_expense_tool"
},
"setup": {
"duration": 0.07388217654079199,
"outcome": "passed"
},
"call": {
"duration": 0.6030126195400953,
"outcome": "failed",
"crash": {
"path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
"lineno": 439,
"message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len((None or []))\n + where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": \"1\", \"year\": \"2025\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
"lineno": 439,
"message": "AssertionError"
}
],
"longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdca8670>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n> assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len((None or []))\nE + where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": \"1\", \"year\": \"2025\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError"
},
"teardown": {
"duration": 0.0003188345581293106,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool]",
"lineno": 380,
"outcome": "failed",
"keywords": [
"test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-scout-instruct-basic",
"case_id": "text_then_weather_tool"
},
"setup": {
"duration": 0.07314795535057783,
"outcome": "passed"
},
"call": {
"duration": 1.0849075820297003,
"outcome": "failed",
"crash": {
"path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
"lineno": 467,
"message": "AssertionError: Expected one of ['sol'] in content, but got: '{\"name\": \"get_weather\", \"parameters\": {\"description\": \"Get the current weather\", \"parameters\": {\"type\": \"object\", \"properties\": {\"location\": {\"description\": \"The city and state (both required). e.g. San Francisco, CA.\", \"type\": \"string\"}}}}'\nassert False\n + where False = any(<generator object test_chat_non_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f1acdad8970>)"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
"lineno": 467,
"message": "AssertionError"
}
],
"longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acda560e0>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\n \n if num_tool_calls > 0:\n tool_call = assistant_message.tool_calls[0]\n assert tool_call.function.name == expected[\"tool_name\"], (\n f\"Expected tool '{expected['tool_name']}', got '{tool_call.function.name}'\"\n )\n # Parse the JSON string arguments before comparing\n actual_arguments = json.loads(tool_call.function.arguments)\n assert actual_arguments == expected[\"tool_arguments\"], (\n f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n )\n \n # Prepare and append the tool response for the next turn\n tool_response = tool_responses.pop(0)\n messages.append(\n {\n \"role\": \"tool\",\n \"tool_call_id\": tool_call.id,\n \"content\": tool_response[\"response\"],\n }\n )\n else:\n assert assistant_message.content is not None, \"Expected content, but none received.\"\n expected_answers = expected[\"answer\"] # This is now a list\n content_lower = assistant_message.content.lower()\n> assert any(ans.lower() in content_lower for ans in expected_answers), (\n f\"Expected one of {expected_answers} in content, but got: '{assistant_message.content}'\"\n )\nE AssertionError: Expected one of ['sol'] in content, but got: '{\"name\": \"get_weather\", \"parameters\": {\"description\": \"Get the current weather\", \"parameters\": {\"type\": \"object\", \"properties\": {\"location\": {\"description\": \"The city and state (both required). e.g. San Francisco, CA.\", \"type\": \"string\"}}}}'\nE assert False\nE + where False = any(<generator object test_chat_non_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f1acdad8970>)\n\ntests/verifications/openai_api/test_chat_completion.py:467: AssertionError"
},
"teardown": {
"duration": 0.00032442156225442886,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text]",
"lineno": 380,
"outcome": "failed",
"keywords": [
"test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-scout-instruct-basic",
"case_id": "weather_tool_then_text"
},
"setup": {
"duration": 0.07257637288421392,
"outcome": "passed"
},
"call": {
"duration": 1.1364115234464407,
"outcome": "failed",
"crash": {
"path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
"lineno": 439,
"message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len((None or []))\n + where None = ChatCompletionMessage(content='{\"name\": \"get_weather\", \"parameters\": {\"description\": \"Get the current weather\", \"parameters\": {\"type\": \"object\", \"properties\": {\"location\": {\"description\": \"The city and state (both required)\", \"type\": \"string\"}}}, \"required\": [\"location\"]}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
"lineno": 439,
"message": "AssertionError"
}
],
"longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text]>>\nopenai_client = <openai.OpenAI object at 0x7f1acda30c70>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n> assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len((None or []))\nE + where None = ChatCompletionMessage(content='{\"name\": \"get_weather\", \"parameters\": {\"description\": \"Get the current weather\", \"parameters\": {\"type\": \"object\", \"properties\": {\"location\": {\"description\": \"The city and state (both required)\", \"type\": \"string\"}}}, \"required\": [\"location\"]}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError"
},
"teardown": {
"duration": 0.0003107702359557152,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool]",
"lineno": 380,
"outcome": "failed",
"keywords": [
"test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-scout-instruct-basic",
"case_id": "add_product_tool"
},
"setup": {
"duration": 0.0716616166755557,
"outcome": "passed"
},
"call": {
"duration": 1.6755285635590553,
"outcome": "failed",
"crash": {
"path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
"lineno": 439,
"message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len((None or []))\n + where None = ChatCompletionMessage(content='{\"name\": \"addProduct\", \"parameters\": {\"name\": {\"type\": \"string\", \"value\": \"Widget\"}, \"description\": {\"type\": \"string\", \"value\": \"Name of the product\"}, \"price\": {\"type\": \"number\", \"value\": 19.99}, \"inStock\": {\"type\": \"boolean\", \"value\": true}, \"tags\": {\"type\": \"array\", \"value\": [\"new\", \"sale\"]}}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
"lineno": 439,
"message": "AssertionError"
}
],
"longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdb6f850>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n> assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len((None or []))\nE + where None = ChatCompletionMessage(content='{\"name\": \"addProduct\", \"parameters\": {\"name\": {\"type\": \"string\", \"value\": \"Widget\"}, \"description\": {\"type\": \"string\", \"value\": \"Name of the product\"}, \"price\": {\"type\": \"number\", \"value\": 19.99}, \"inStock\": {\"type\": \"boolean\", \"value\": true}, \"tags\": {\"type\": \"array\", \"value\": [\"new\", \"sale\"]}}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError"
},
"teardown": {
"duration": 0.0003323536366224289,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool]",
"lineno": 380,
"outcome": "failed",
"keywords": [
"test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-scout-instruct-basic",
"case_id": "get_then_create_event_tool"
},
"setup": {
"duration": 0.07031949236989021,
"outcome": "passed"
},
"call": {
"duration": 2.363899651914835,
"outcome": "failed",
"crash": {
"path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
"lineno": 439,
"message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len((None or []))\n + where None = ChatCompletionMessage(content='{\"name\": \"get_event\", \"parameters\": {\"date\": {\"date\": \"March 3rd\"}, \"time\": {\"time\": \"10 am\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"date\": \"2025-03-03\"}, \"time\": {\"time\": \"10:00\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"date\": \"2025-03-03\"}, \"time\": {\"time\": \"10:00\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"date\": \"2025-03-03\"}, \"time\": {\"time\": \"10:00\"}}}assistant\\n\\nThe function provided is not sufficient for me to answer the question.assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"date\": \"2025-03-03\"}, \"time\": {\"time\": \"10:00\"}}}assistant\\n\\nThe function provided is not sufficient for me to answer the question.', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
"lineno": 439,
"message": "AssertionError"
}
],
"longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acda3dff0>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n> assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len((None or []))\nE + where None = ChatCompletionMessage(content='{\"name\": \"get_event\", \"parameters\": {\"date\": {\"date\": \"March 3rd\"}, \"time\": {\"time\": \"10 am\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"date\": \"2025-03-03\"}, \"time\": {\"time\": \"10:00\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"date\": \"2025-03-03\"}, \"time\": {\"time\": \"10:00\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"date\": \"2025-03-03\"}, \"time\": {\"time\": \"10:00\"}}}assistant\\n\\nThe function provided is not sufficient for me to answer the question.assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"date\": \"2025-03-03\"}, \"time\": {\"time\": \"10:00\"}}}assistant\\n\\nThe function provided is not sufficient for me to answer the question.', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError"
},
"teardown": {
"duration": 0.0003245687112212181,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool]",
"lineno": 380,
"outcome": "failed",
"keywords": [
"test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-scout-instruct-basic",
"case_id": "compare_monthly_expense_tool"
},
"setup": {
"duration": 0.07069017831236124,
"outcome": "passed"
},
"call": {
"duration": 1.8757586162537336,
"outcome": "failed",
"crash": {
"path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
"lineno": 439,
"message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len((None or []))\n + where None = ChatCompletionMessage(content='{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": {\"description\": \"Month of the year (1-12)\", \"type\": \"integer\"}, \"year\": {\"description\": \"Year\", \"type\": \"integer\"}}}assistant\\n\\n{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": {\"description\": \"Month of the year (1-12)\", \"type\": \"integer\"}, \"year\": {\"description\": \"Year\", \"type\": \"integer\"}}}assistant\\n\\n{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": {\"description\": \"Month of the year (1-12)\", \"type\": \"integer\", \"value\": 1}, \"year\": {\"description\": \"Year\", \"type\": \"integer\", \"value\": 2025}}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
"lineno": 439,
"message": "AssertionError"
}
],
"longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acda3d5a0>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n> assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len((None or []))\nE + where None = ChatCompletionMessage(content='{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": {\"description\": \"Month of the year (1-12)\", \"type\": \"integer\"}, \"year\": {\"description\": \"Year\", \"type\": \"integer\"}}}assistant\\n\\n{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": {\"description\": \"Month of the year (1-12)\", \"type\": \"integer\"}, \"year\": {\"description\": \"Year\", \"type\": \"integer\"}}}assistant\\n\\n{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": {\"description\": \"Month of the year (1-12)\", \"type\": \"integer\", \"value\": 1}, \"year\": {\"description\": \"Year\", \"type\": \"integer\", \"value\": 2025}}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError"
},
"teardown": {
"duration": 0.00030215736478567123,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool]",
"lineno": 380,
"outcome": "failed",
"keywords": [
"test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
"case_id": "text_then_weather_tool"
},
"setup": {
"duration": 0.07024750486016273,
"outcome": "passed"
},
"call": {
"duration": 2.9532439298927784,
"outcome": "failed",
"crash": {
"path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
"lineno": 467,
"message": "AssertionError: Expected one of ['sol'] in content, but got: 'Since there's no function defined to directly answer \"What's the name of the Sun in latin?\", I'll assume there's a general knowledge or information retrieval function available. Let's call it \"get_general_knowledge\". \n \n Here is a potential JSON response for a function call:\n \n {\"name\": \"get_general_knowledge\", \"parameters\": {\"query\": \"Latin name of the Sun\"}} \n \n However, the exact function and parameter names might vary based on the actual function definitions available. If we consider the given function \"get_weather\" and its parameters, it doesn't fit the prompt. Therefore, based on a hypothetical \"get_general_knowledge\" function, the response is provided. \n \n If the actual available functions were listed, a more accurate response could be provided. \n \n For the sake of the given prompt and assuming the presence of a \"get_general_knowledge\" function, the response is:\n \n {\"name\": \"get_general_knowledge\", \"parameters\": {\"query\": \"Latin name of the Sun\"}}'\nassert False\n + where False = any(<generator object test_chat_non_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f1acd9d54d0>)"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
"lineno": 467,
"message": "AssertionError"
}
],
"longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acda3e230>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\n \n if num_tool_calls > 0:\n tool_call = assistant_message.tool_calls[0]\n assert tool_call.function.name == expected[\"tool_name\"], (\n f\"Expected tool '{expected['tool_name']}', got '{tool_call.function.name}'\"\n )\n # Parse the JSON string arguments before comparing\n actual_arguments = json.loads(tool_call.function.arguments)\n assert actual_arguments == expected[\"tool_arguments\"], (\n f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n )\n \n # Prepare and append the tool response for the next turn\n tool_response = tool_responses.pop(0)\n messages.append(\n {\n \"role\": \"tool\",\n \"tool_call_id\": tool_call.id,\n \"content\": tool_response[\"response\"],\n }\n )\n else:\n assert assistant_message.content is not None, \"Expected content, but none received.\"\n expected_answers = expected[\"answer\"] # This is now a list\n content_lower = assistant_message.content.lower()\n> assert any(ans.lower() in content_lower for ans in expected_answers), (\n f\"Expected one of {expected_answers} in content, but got: '{assistant_message.content}'\"\n )\nE AssertionError: Expected one of ['sol'] in content, but got: 'Since there's no function defined to directly answer \"What's the name of the Sun in latin?\", I'll assume there's a general knowledge or information retrieval function available. Let's call it \"get_general_knowledge\". \nE \nE Here is a potential JSON response for a function call:\nE \nE {\"name\": \"get_general_knowledge\", \"parameters\": {\"query\": \"Latin name of the Sun\"}} \nE \nE However, the exact function and parameter names might vary based on the actual function definitions available. If we consider the given function \"get_weather\" and its parameters, it doesn't fit the prompt. Therefore, based on a hypothetical \"get_general_knowledge\" function, the response is provided. \nE \nE If the actual available functions were listed, a more accurate response could be provided. \nE \nE For the sake of the given prompt and assuming the presence of a \"get_general_knowledge\" function, the response is:\nE \nE {\"name\": \"get_general_knowledge\", \"parameters\": {\"query\": \"Latin name of the Sun\"}}'\nE assert False\nE + where False = any(<generator object test_chat_non_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f1acd9d54d0>)\n\ntests/verifications/openai_api/test_chat_completion.py:467: AssertionError"
},
"teardown": {
"duration": 0.00038253143429756165,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text]",
"lineno": 380,
"outcome": "failed",
"keywords": [
"test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
"case_id": "weather_tool_then_text"
},
"setup": {
"duration": 0.07193771284073591,
"outcome": "passed"
},
"call": {
"duration": 0.9909431086853147,
"outcome": "failed",
"crash": {
"path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
"lineno": 439,
"message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len((None or []))\n + where None = ChatCompletionMessage(content='{\"name\": \"get_weather\", \"parameters\": {\"location\": \"San Francisco, CA\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
"lineno": 439,
"message": "AssertionError"
}
],
"longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdb91570>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n> assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len((None or []))\nE + where None = ChatCompletionMessage(content='{\"name\": \"get_weather\", \"parameters\": {\"location\": \"San Francisco, CA\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError"
},
"teardown": {
"duration": 0.0003658318892121315,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool]",
"lineno": 380,
"outcome": "failed",
"keywords": [
"test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
"case_id": "add_product_tool"
},
"setup": {
"duration": 0.0702557684853673,
"outcome": "passed"
},
"call": {
"duration": 0.8836336443200707,
"outcome": "failed",
"crash": {
"path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
"lineno": 439,
"message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len((None or []))\n + where None = ChatCompletionMessage(content='{\"name\": \"addProduct\", \"parameters\": {\"name\": \"Widget\", \"price\": 19.99, \"inStock\": true, \"tags\": [\"new\", \"sale\"]}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
"lineno": 439,
"message": "AssertionError"
}
],
"longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdbec2e0>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n> assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len((None or []))\nE + where None = ChatCompletionMessage(content='{\"name\": \"addProduct\", \"parameters\": {\"name\": \"Widget\", \"price\": 19.99, \"inStock\": true, \"tags\": [\"new\", \"sale\"]}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError"
},
"teardown": {
"duration": 0.00036840979009866714,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool]",
"lineno": 380,
"outcome": "failed",
"keywords": [
"test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
"case_id": "get_then_create_event_tool"
},
"setup": {
"duration": 0.07019469328224659,
"outcome": "passed"
},
"call": {
"duration": 7.394101745449007,
"outcome": "failed",
"crash": {
"path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
"lineno": 439,
"message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len((None or []))\n + where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}} \"\\n\\n{\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}assistant\\n\\n{\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}assistant\\n\\nimport json\\noutput = json.loads(\"{\\\\\"type\\\\\": \\\\\"function\\\\\", \\\\\"name\\\\\": \\\\\"get_event\\\\\", \\\\\"parameters\\\\\": {\\\\\"date\\\\\": \\\\\"2025-03-03\\\\\", \\\\\"time\\\\\": \\\\\"10:00\\\\\"}}\")\\nprint(output)assistant\\n\\nimport json\\n\\n# Define the input data\\ndata = \\'[{\"type\": \"function\", \"name\": \"create_event\", \"parameters\": {\"name\": \"Meeting\", \"date\": \"2025-03-03\", \"time\": \"10:00\", \"location\": \"Conference Room\", \"participants\": [\"John\", \"Jane\"]}}, {\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}]\\'\\n\\n# Parse the JSON string into a Python object\\njson_data = json.loads(data)\\n\\n# Print the parsed data\\nprint(json_data)assistant\\n\\nimport json\\n\\ndef check_meeting(data):\\n # Parse the JSON string into a Python object\\n json_data = json.loads(data)\\n\\n # Check if there is a meeting on March 3rd at 10 am\\n for item in json_data:\\n if item[\\'type\\'] == \\'function\\' and item[\\'name\\'] == \\'get_event\\':\\n date = item[\\'parameters\\'][\\'date\\']\\n time = item[\\'parameters\\'][\\'time\\']\\n # Assuming you have a function to check if there is a meeting at the given date and time\\n # For simplicity, let\\'s assume the function is called \\'has_meeting\\'\\n # if has_meeting(date, time):\\n # return \\'Yes\\'\\n # else:\\n # return \\'No\\'\\n return \\'Yes\\' # For demonstration purposes, let\\'s assume there is a meeting\\n\\n return \\'No\\'\\n\\ndata = \\'[{\"type\": \"function\", \"name\": \"create_event\", \"parameters\": {\"name\": \"Meeting\", \"date\": \"2025-03-03\", \"time\": \"10:00\", \"location\": \"Conference Room\", \"participants\": [\"John\", \"Jane\"]}}, {\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}]\\'\\nprint(check_meeting(data))assistant\\n\\nYes.', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
"lineno": 439,
"message": "AssertionError"
}
],
"longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdd76110>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n> assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len((None or []))\nE + where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}} \"\\n\\n{\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}assistant\\n\\n{\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}assistant\\n\\nimport json\\noutput = json.loads(\"{\\\\\"type\\\\\": \\\\\"function\\\\\", \\\\\"name\\\\\": \\\\\"get_event\\\\\", \\\\\"parameters\\\\\": {\\\\\"date\\\\\": \\\\\"2025-03-03\\\\\", \\\\\"time\\\\\": \\\\\"10:00\\\\\"}}\")\\nprint(output)assistant\\n\\nimport json\\n\\n# Define the input data\\ndata = \\'[{\"type\": \"function\", \"name\": \"create_event\", \"parameters\": {\"name\": \"Meeting\", \"date\": \"2025-03-03\", \"time\": \"10:00\", \"location\": \"Conference Room\", \"participants\": [\"John\", \"Jane\"]}}, {\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}]\\'\\n\\n# Parse the JSON string into a Python object\\njson_data = json.loads(data)\\n\\n# Print the parsed data\\nprint(json_data)assistant\\n\\nimport json\\n\\ndef check_meeting(data):\\n # Parse the JSON string into a Python object\\n json_data = json.loads(data)\\n\\n # Check if there is a meeting on March 3rd at 10 am\\n for item in json_data:\\n if item[\\'type\\'] == \\'function\\' and item[\\'name\\'] == \\'get_event\\':\\n date = item[\\'parameters\\'][\\'date\\']\\n time = item[\\'parameters\\'][\\'time\\']\\n # Assuming you have a function to check if there is a meeting at the given date and time\\n # For simplicity, let\\'s assume the function is called \\'has_meeting\\'\\n # if has_meeting(date, time):\\n # return \\'Yes\\'\\n # else:\\n # return \\'No\\'\\n return \\'Yes\\' # For demonstration purposes, let\\'s assume there is a meeting\\n\\n return \\'No\\'\\n\\ndata = \\'[{\"type\": \"function\", \"name\": \"create_event\", \"parameters\": {\"name\": \"Meeting\", \"date\": \"2025-03-03\", \"time\": \"10:00\", \"location\": \"Conference Room\", \"participants\": [\"John\", \"Jane\"]}}, {\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}]\\'\\nprint(check_meeting(data))assistant\\n\\nYes.', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError"
},
"teardown": {
"duration": 0.0003475993871688843,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool]",
"lineno": 380,
"outcome": "failed",
"keywords": [
"test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
"case_id": "compare_monthly_expense_tool"
},
"setup": {
"duration": 0.07140176557004452,
"outcome": "passed"
},
"call": {
"duration": 1.5649437978863716,
"outcome": "failed",
"crash": {
"path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
"lineno": 439,
"message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len((None or []))\n + where None = ChatCompletionMessage(content='{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": 1, \"year\": 2024}}\"\" \"\" \" \"\"\"\"\"\"\"\"\"\"\"\"\" \"\" \"\"\" \"}\",\"\" \" \"}\",\"\" \" \"}\",\"\" \" \"{\" \"name\" \": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": 1, \"year\": 2024}}\"', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
"lineno": 439,
"message": "AssertionError"
}
],
"longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acd9b4640>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n> assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len((None or []))\nE + where None = ChatCompletionMessage(content='{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": 1, \"year\": 2024}}\"\" \"\" \" \"\"\"\"\"\"\"\"\"\"\"\"\" \"\" \"\"\" \"}\",\"\" \" \"}\",\"\" \" \"}\",\"\" \" \"{\" \"name\" \": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": 1, \"year\": 2024}}\"', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError"
},
"teardown": {
"duration": 0.00034684035927057266,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool]",
"lineno": 471,
"outcome": "failed",
"keywords": [
"test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
"case_id": "text_then_weather_tool"
},
"setup": {
"duration": 0.07161083538085222,
"outcome": "passed"
},
"call": {
"duration": 0.972024847753346,
"outcome": "failed",
"crash": {
"path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
"lineno": 550,
"message": "AssertionError: Expected one of ['sol'] in content, but got: 'I cannot perform this task as it requires additional functionality that is not available in the given functions.'\nassert False\n + where False = any(<generator object test_chat_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f1acd9d4510>)"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
"lineno": 550,
"message": "AssertionError"
}
],
"longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdab0c10>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\n \n if num_tool_calls > 0:\n # Use the first accumulated tool call for assertion\n tool_call = accumulated_tool_calls[0]\n assert tool_call[\"function\"][\"name\"] == expected[\"tool_name\"], (\n f\"Expected tool '{expected['tool_name']}', got '{tool_call['function']['name']}'\"\n )\n # Parse the accumulated arguments string for comparison\n actual_arguments = json.loads(tool_call[\"function\"][\"arguments\"])\n assert actual_arguments == expected[\"tool_arguments\"], (\n f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n )\n \n # Prepare and append the tool response for the next turn\n tool_response = tool_responses.pop(0)\n messages.append(\n {\n \"role\": \"tool\",\n \"tool_call_id\": tool_call[\"id\"],\n \"content\": tool_response[\"response\"],\n }\n )\n else:\n assert accumulated_content is not None and accumulated_content != \"\", \"Expected content, but none received.\"\n expected_answers = expected[\"answer\"]\n content_lower = accumulated_content.lower()\n> assert any(ans.lower() in content_lower for ans in expected_answers), (\n f\"Expected one of {expected_answers} in content, but got: '{accumulated_content}'\"\n )\nE AssertionError: Expected one of ['sol'] in content, but got: 'I cannot perform this task as it requires additional functionality that is not available in the given functions.'\nE assert False\nE + where False = any(<generator object test_chat_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f1acd9d4510>)\n\ntests/verifications/openai_api/test_chat_completion.py:550: AssertionError"
},
"teardown": {
"duration": 0.0003080591559410095,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text]",
"lineno": 471,
"outcome": "failed",
"keywords": [
"test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
"case_id": "weather_tool_then_text"
},
"setup": {
"duration": 0.07267874106764793,
"outcome": "passed"
},
"call": {
"duration": 0.632216920144856,
"outcome": "failed",
"crash": {
"path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
"lineno": 521,
"message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len(([] or []))"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
"lineno": 521,
"message": "AssertionError"
}
],
"longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdbfbc70>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n> assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError"
},
"teardown": {
"duration": 0.0003350367769598961,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool]",
"lineno": 471,
"outcome": "failed",
"keywords": [
"test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
"case_id": "add_product_tool"
},
"setup": {
"duration": 0.0707720061764121,
"outcome": "passed"
},
"call": {
"duration": 0.9429405080154538,
"outcome": "failed",
"crash": {
"path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
"lineno": 521,
"message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len(([] or []))"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
"lineno": 521,
"message": "AssertionError"
}
],
"longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdac0130>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n> assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError"
},
"teardown": {
"duration": 0.0002858620136976242,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool]",
"lineno": 471,
"outcome": "failed",
"keywords": [
"test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
"case_id": "get_then_create_event_tool"
},
"setup": {
"duration": 0.06923680566251278,
"outcome": "passed"
},
"call": {
"duration": 0.7107308339327574,
"outcome": "failed",
"crash": {
"path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
"lineno": 521,
"message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len(([] or []))"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
"lineno": 521,
"message": "AssertionError"
}
],
"longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdaaeb60>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n> assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError"
},
"teardown": {
"duration": 0.0003181472420692444,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool]",
"lineno": 471,
"outcome": "failed",
"keywords": [
"test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
"case_id": "compare_monthly_expense_tool"
},
"setup": {
"duration": 0.07021687645465136,
"outcome": "passed"
},
"call": {
"duration": 0.7717038569971919,
"outcome": "failed",
"crash": {
"path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
"lineno": 521,
"message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len(([] or []))"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
"lineno": 521,
"message": "AssertionError"
}
],
"longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdbd04f0>\nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n> assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError"
},
"teardown": {
"duration": 0.00030398648232221603,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool]",
"lineno": 471,
"outcome": "failed",
"keywords": [
"test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-scout-instruct-basic",
"case_id": "text_then_weather_tool"
},
"setup": {
"duration": 0.07320436742156744,
"outcome": "passed"
},
"call": {
"duration": 1.2869794629514217,
"outcome": "failed",
"crash": {
"path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
"lineno": 550,
"message": "AssertionError: Expected one of ['sol'] in content, but got: '{\"name\": \"get_weather\", \"parameters\": {\"description\": \"Get the current weather\", \"parameters\": {\"type\": \"object\", \"properties\": {\"location\": {\"description\": \"The city and state (both required) (e.g. San Francisco, CA.\", \"type\": \"string\"}}}, \"required\": [\"location\"]}}'\nassert False\n + where False = any(<generator object test_chat_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f1acd9b8e40>)"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
"lineno": 550,
"message": "AssertionError"
}
],
"longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acda57a60>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\n \n if num_tool_calls > 0:\n # Use the first accumulated tool call for assertion\n tool_call = accumulated_tool_calls[0]\n assert tool_call[\"function\"][\"name\"] == expected[\"tool_name\"], (\n f\"Expected tool '{expected['tool_name']}', got '{tool_call['function']['name']}'\"\n )\n # Parse the accumulated arguments string for comparison\n actual_arguments = json.loads(tool_call[\"function\"][\"arguments\"])\n assert actual_arguments == expected[\"tool_arguments\"], (\n f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n )\n \n # Prepare and append the tool response for the next turn\n tool_response = tool_responses.pop(0)\n messages.append(\n {\n \"role\": \"tool\",\n \"tool_call_id\": tool_call[\"id\"],\n \"content\": tool_response[\"response\"],\n }\n )\n else:\n assert accumulated_content is not None and accumulated_content != \"\", \"Expected content, but none received.\"\n expected_answers = expected[\"answer\"]\n content_lower = accumulated_content.lower()\n> assert any(ans.lower() in content_lower for ans in expected_answers), (\n f\"Expected one of {expected_answers} in content, but got: '{accumulated_content}'\"\n )\nE AssertionError: Expected one of ['sol'] in content, but got: '{\"name\": \"get_weather\", \"parameters\": {\"description\": \"Get the current weather\", \"parameters\": {\"type\": \"object\", \"properties\": {\"location\": {\"description\": \"The city and state (both required) (e.g. San Francisco, CA.\", \"type\": \"string\"}}}, \"required\": [\"location\"]}}'\nE assert False\nE + where False = any(<generator object test_chat_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f1acd9b8e40>)\n\ntests/verifications/openai_api/test_chat_completion.py:550: AssertionError"
},
"teardown": {
"duration": 0.0003076540306210518,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text]",
"lineno": 471,
"outcome": "failed",
"keywords": [
"test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-scout-instruct-basic",
"case_id": "weather_tool_then_text"
},
"setup": {
"duration": 0.0732570867985487,
"outcome": "passed"
},
"call": {
"duration": 0.9204158475622535,
"outcome": "failed",
"crash": {
"path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
"lineno": 521,
"message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len(([] or []))"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
"lineno": 521,
"message": "AssertionError"
}
],
"longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdaaf1c0>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n> assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError"
},
"teardown": {
"duration": 0.000310627743601799,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool]",
"lineno": 471,
"outcome": "failed",
"keywords": [
"test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-scout-instruct-basic",
"case_id": "add_product_tool"
},
"setup": {
"duration": 0.07232664246112108,
"outcome": "passed"
},
"call": {
"duration": 3.829266043379903,
"outcome": "failed",
"crash": {
"path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
"lineno": 521,
"message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len(([] or []))"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
"lineno": 521,
"message": "AssertionError"
}
],
"longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdbbc220>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n> assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError"
},
"teardown": {
"duration": 0.00034091807901859283,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool]",
"lineno": 471,
"outcome": "failed",
"keywords": [
"test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-scout-instruct-basic",
"case_id": "get_then_create_event_tool"
},
"setup": {
"duration": 0.07045515719801188,
"outcome": "passed"
},
"call": {
"duration": 6.550140863284469,
"outcome": "failed",
"crash": {
"path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
"lineno": 521,
"message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len(([] or []))"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
"lineno": 521,
"message": "AssertionError"
}
],
"longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdc0d3f0>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n> assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError"
},
"teardown": {
"duration": 0.0003092316910624504,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool]",
"lineno": 471,
"outcome": "failed",
"keywords": [
"test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-scout-instruct-basic",
"case_id": "compare_monthly_expense_tool"
},
"setup": {
"duration": 0.07400601450353861,
"outcome": "passed"
},
"call": {
"duration": 3.142588397487998,
"outcome": "failed",
"crash": {
"path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
"lineno": 521,
"message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len(([] or []))"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
"lineno": 521,
"message": "AssertionError"
}
],
"longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdb52ce0>\nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n> assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError"
},
"teardown": {
"duration": 0.0003124792128801346,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool]",
"lineno": 471,
"outcome": "failed",
"keywords": [
"test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
"case_id": "text_then_weather_tool"
},
"setup": {
"duration": 0.07049713470041752,
"outcome": "passed"
},
"call": {
"duration": 4.074657499790192,
"outcome": "failed",
"crash": {
"path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
"lineno": 550,
"message": "AssertionError: Expected one of ['sol'] in content, but got: 'Since the provided text describes a JSON schema for a function call to get the weather, and the prompt asks for the name of the Sun in Latin, we need to identify a suitable function that can provide this information. However, the given schema is for a \"get_weather\" function, which doesn't directly relate to the question about the Sun's name in Latin.\n \n Assuming there's another function available that can provide information about celestial bodies or their names in different languages, we might look for something like \"get_celestial_body_info\" or a similar function.\n \n However, based on the given format and the information provided, it seems there's an implication that we should directly provide a response in the specified JSON format for a hypothetical or related function. Let's assume a function named \"get_celestial_body_name\" that takes parameters like \"body\" and \"language\".\n \n Given the constraint of the format and assuming a function that fits, we might construct a response like:\n \n ```json\n {\n \"name\": \"get_celestial_body_name\",\n \"parameters\": {\n \"body\": \"Sun\",\n \"language\": \"Latin\"\n }\n }\n ```\n \n This response implies the existence of a function \"get_celestial_body_name\" that can take the name of a celestial body and a language as input and return the name of the celestial body in that language. \n \n So, the response is:\n {\"name\": \"get_celestial_body_name\", \"parameters\": {\"body\": \"Sun\", \"language\": \"Latin\"}}'\nassert False\n + where False = any(<generator object test_chat_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f1acdaba030>)"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
"lineno": 550,
"message": "AssertionError"
}
],
"longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acda32d70>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\n \n if num_tool_calls > 0:\n # Use the first accumulated tool call for assertion\n tool_call = accumulated_tool_calls[0]\n assert tool_call[\"function\"][\"name\"] == expected[\"tool_name\"], (\n f\"Expected tool '{expected['tool_name']}', got '{tool_call['function']['name']}'\"\n )\n # Parse the accumulated arguments string for comparison\n actual_arguments = json.loads(tool_call[\"function\"][\"arguments\"])\n assert actual_arguments == expected[\"tool_arguments\"], (\n f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n )\n \n # Prepare and append the tool response for the next turn\n tool_response = tool_responses.pop(0)\n messages.append(\n {\n \"role\": \"tool\",\n \"tool_call_id\": tool_call[\"id\"],\n \"content\": tool_response[\"response\"],\n }\n )\n else:\n assert accumulated_content is not None and accumulated_content != \"\", \"Expected content, but none received.\"\n expected_answers = expected[\"answer\"]\n content_lower = accumulated_content.lower()\n> assert any(ans.lower() in content_lower for ans in expected_answers), (\n f\"Expected one of {expected_answers} in content, but got: '{accumulated_content}'\"\n )\nE AssertionError: Expected one of ['sol'] in content, but got: 'Since the provided text describes a JSON schema for a function call to get the weather, and the prompt asks for the name of the Sun in Latin, we need to identify a suitable function that can provide this information. However, the given schema is for a \"get_weather\" function, which doesn't directly relate to the question about the Sun's name in Latin.\nE \nE Assuming there's another function available that can provide information about celestial bodies or their names in different languages, we might look for something like \"get_celestial_body_info\" or a similar function.\nE \nE However, based on the given format and the information provided, it seems there's an implication that we should directly provide a response in the specified JSON format for a hypothetical or related function. Let's assume a function named \"get_celestial_body_name\" that takes parameters like \"body\" and \"language\".\nE \nE Given the constraint of the format and assuming a function that fits, we might construct a response like:\nE \nE ```json\nE {\nE \"name\": \"get_celestial_body_name\",\nE \"parameters\": {\nE \"body\": \"Sun\",\nE \"language\": \"Latin\"\nE }\nE }\nE ```\nE \nE This response implies the existence of a function \"get_celestial_body_name\" that can take the name of a celestial body and a language as input and return the name of the celestial body in that language. \nE \nE So, the response is:\nE {\"name\": \"get_celestial_body_name\", \"parameters\": {\"body\": \"Sun\", \"language\": \"Latin\"}}'\nE assert False\nE + where False = any(<generator object test_chat_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f1acdaba030>)\n\ntests/verifications/openai_api/test_chat_completion.py:550: AssertionError"
},
"teardown": {
"duration": 0.00031174439936876297,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text]",
"lineno": 471,
"outcome": "failed",
"keywords": [
"test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
"case_id": "weather_tool_then_text"
},
"setup": {
"duration": 0.07156828418374062,
"outcome": "passed"
},
"call": {
"duration": 0.6585372854024172,
"outcome": "failed",
"crash": {
"path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
"lineno": 521,
"message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len(([] or []))"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
"lineno": 521,
"message": "AssertionError"
}
],
"longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdb6cca0>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n> assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError"
},
"teardown": {
"duration": 0.0003233151510357857,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool]",
"lineno": 471,
"outcome": "failed",
"keywords": [
"test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
"case_id": "add_product_tool"
},
"setup": {
"duration": 0.07135927956551313,
"outcome": "passed"
},
"call": {
"duration": 1.0483367526903749,
"outcome": "failed",
"crash": {
"path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
"lineno": 521,
"message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len(([] or []))"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
"lineno": 521,
"message": "AssertionError"
}
],
"longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acda577c0>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n> assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError"
},
"teardown": {
"duration": 0.00028971116989851,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool]",
"lineno": 471,
"outcome": "failed",
"keywords": [
"test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
"case_id": "get_then_create_event_tool"
},
"setup": {
"duration": 0.07051362749189138,
"outcome": "passed"
},
"call": {
"duration": 4.592376064509153,
"outcome": "failed",
"crash": {
"path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
"lineno": 521,
"message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len(([] or []))"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
"lineno": 521,
"message": "AssertionError"
}
],
"longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acd9f5f30>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n> assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError"
},
"teardown": {
"duration": 0.00029074493795633316,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool]",
"lineno": 471,
"outcome": "failed",
"keywords": [
"test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
"case_id": "compare_monthly_expense_tool"
},
"setup": {
"duration": 0.07347700279206038,
"outcome": "passed"
},
"call": {
"duration": 1.5335856154561043,
"outcome": "failed",
"crash": {
"path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
"lineno": 521,
"message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len(([] or []))"
},
"traceback": [
{
"path": "tests/verifications/openai_api/test_chat_completion.py",
"lineno": 521,
"message": "AssertionError"
}
],
"longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f1acdbd1360>\nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n> assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError"
},
"teardown": {
"duration": 0.0003180811181664467,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama-v3p3-70b-instruct-stream=False]",
"lineno": 554,
"outcome": "skipped",
"keywords": [
"test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama-v3p3-70b-instruct-stream=False]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama-v3p3-70b-instruct-stream=False",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
"case_id": "stream=False"
},
"setup": {
"duration": 0.07250582799315453,
"outcome": "passed"
},
"call": {
"duration": 0.00022417306900024414,
"outcome": "skipped",
"longrepr": "('/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 561, 'Skipped: Skipping test_chat_multi_turn_multiple_images for model accounts/fireworks/models/llama-v3p3-70b-instruct on provider fireworks based on config.')"
},
"teardown": {
"duration": 0.0036543207243084908,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama-v3p3-70b-instruct-stream=True]",
"lineno": 554,
"outcome": "skipped",
"keywords": [
"test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama-v3p3-70b-instruct-stream=True]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama-v3p3-70b-instruct-stream=True",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama-v3p3-70b-instruct",
"case_id": "stream=True"
},
"setup": {
"duration": 0.07320290431380272,
"outcome": "passed"
},
"call": {
"duration": 0.0002203313633799553,
"outcome": "skipped",
"longrepr": "('/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 561, 'Skipped: Skipping test_chat_multi_turn_multiple_images for model accounts/fireworks/models/llama-v3p3-70b-instruct on provider fireworks based on config.')"
},
"teardown": {
"duration": 0.00035103876143693924,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-scout-instruct-basic-stream=False]",
"lineno": 554,
"outcome": "passed",
"keywords": [
"test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-scout-instruct-basic-stream=False]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-scout-instruct-basic-stream=False",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-scout-instruct-basic",
"case_id": "stream=False"
},
"setup": {
"duration": 0.07001570798456669,
"outcome": "passed"
},
"call": {
"duration": 6.779760396108031,
"outcome": "passed"
},
"teardown": {
"duration": 0.00023057777434587479,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-scout-instruct-basic-stream=True]",
"lineno": 554,
"outcome": "passed",
"keywords": [
"test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-scout-instruct-basic-stream=True]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-scout-instruct-basic-stream=True",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-scout-instruct-basic",
"case_id": "stream=True"
},
"setup": {
"duration": 0.07039657514542341,
"outcome": "passed"
},
"call": {
"duration": 4.335017805919051,
"outcome": "passed"
},
"teardown": {
"duration": 0.00023656059056520462,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-maverick-instruct-basic-stream=False]",
"lineno": 554,
"outcome": "passed",
"keywords": [
"test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-maverick-instruct-basic-stream=False]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-maverick-instruct-basic-stream=False",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
"case_id": "stream=False"
},
"setup": {
"duration": 0.07107001543045044,
"outcome": "passed"
},
"call": {
"duration": 5.857806807383895,
"outcome": "passed"
},
"teardown": {
"duration": 0.00028312671929597855,
"outcome": "passed"
}
},
{
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-maverick-instruct-basic-stream=True]",
"lineno": 554,
"outcome": "passed",
"keywords": [
"test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-maverick-instruct-basic-stream=True]",
"parametrize",
"pytestmark",
"accounts/fireworks/models/llama4-maverick-instruct-basic-stream=True",
"test_chat_completion.py",
"openai_api",
"verifications",
"tests",
"llama-stack",
""
],
"metadata": {
"model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
"case_id": "stream=True"
},
"setup": {
"duration": 0.07257402781397104,
"outcome": "passed"
},
"call": {
"duration": 5.412369452416897,
"outcome": "passed"
},
"teardown": {
"duration": 0.0018147435039281845,
"outcome": "passed"
}
}
],
"run_timestamp": 1744918193
}