mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-30 20:20:01 +00:00
# What does this PR do? ## Test Plan # What does this PR do? ## Test Plan # What does this PR do? ## Test Plan # What does this PR do? ## Test Plan
1420 lines
62 KiB
JSON
1420 lines
62 KiB
JSON
{
|
|
"created": 1744264304.064288,
|
|
"duration": 42.470197916030884,
|
|
"exitcode": 1,
|
|
"root": "/Users/erichuang/projects/llama-stack",
|
|
"environment": {},
|
|
"summary": {
|
|
"passed": 21,
|
|
"failed": 10,
|
|
"skipped": 2,
|
|
"total": 33,
|
|
"collected": 33
|
|
},
|
|
"collectors": [
|
|
{
|
|
"nodeid": "",
|
|
"outcome": "passed",
|
|
"result": [
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py",
|
|
"type": "Module"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py",
|
|
"outcome": "passed",
|
|
"result": [
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]",
|
|
"type": "Function",
|
|
"lineno": 72
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]",
|
|
"type": "Function",
|
|
"lineno": 72
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
|
|
"type": "Function",
|
|
"lineno": 72
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
|
|
"type": "Function",
|
|
"lineno": 72
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]",
|
|
"type": "Function",
|
|
"lineno": 72
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]",
|
|
"type": "Function",
|
|
"lineno": 72
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]",
|
|
"type": "Function",
|
|
"lineno": 91
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]",
|
|
"type": "Function",
|
|
"lineno": 91
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
|
|
"type": "Function",
|
|
"lineno": 91
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
|
|
"type": "Function",
|
|
"lineno": 91
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]",
|
|
"type": "Function",
|
|
"lineno": 91
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]",
|
|
"type": "Function",
|
|
"lineno": 91
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
|
|
"type": "Function",
|
|
"lineno": 115
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
|
|
"type": "Function",
|
|
"lineno": 115
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
|
|
"type": "Function",
|
|
"lineno": 115
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
|
|
"type": "Function",
|
|
"lineno": 134
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
|
|
"type": "Function",
|
|
"lineno": 134
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
|
|
"type": "Function",
|
|
"lineno": 134
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]",
|
|
"type": "Function",
|
|
"lineno": 158
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]",
|
|
"type": "Function",
|
|
"lineno": 158
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
|
|
"type": "Function",
|
|
"lineno": 158
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
|
|
"type": "Function",
|
|
"lineno": 158
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]",
|
|
"type": "Function",
|
|
"lineno": 158
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]",
|
|
"type": "Function",
|
|
"lineno": 158
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]",
|
|
"type": "Function",
|
|
"lineno": 181
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]",
|
|
"type": "Function",
|
|
"lineno": 181
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
|
|
"type": "Function",
|
|
"lineno": 181
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
|
|
"type": "Function",
|
|
"lineno": 181
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]",
|
|
"type": "Function",
|
|
"lineno": 181
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]",
|
|
"type": "Function",
|
|
"lineno": 181
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
|
|
"type": "Function",
|
|
"lineno": 203
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
|
|
"type": "Function",
|
|
"lineno": 203
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
|
|
"type": "Function",
|
|
"lineno": 203
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"tests": [
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]",
|
|
"lineno": 72,
|
|
"outcome": "passed",
|
|
"keywords": [
|
|
"test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]",
|
|
"parametrize",
|
|
"pytestmark",
|
|
"meta-llama/Llama-3.3-70B-Instruct-Turbo-earth",
|
|
"test_chat_completion.py",
|
|
"openai_api",
|
|
"verifications",
|
|
"tests",
|
|
"llama-stack",
|
|
""
|
|
],
|
|
"metadata": {
|
|
"model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
|
|
"case_id": "earth"
|
|
},
|
|
"setup": {
|
|
"duration": 0.06113254197407514,
|
|
"outcome": "passed"
|
|
},
|
|
"call": {
|
|
"duration": 1.0720349580515176,
|
|
"outcome": "passed"
|
|
},
|
|
"teardown": {
|
|
"duration": 0.00015966698992997408,
|
|
"outcome": "passed"
|
|
}
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]",
|
|
"lineno": 72,
|
|
"outcome": "passed",
|
|
"keywords": [
|
|
"test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]",
|
|
"parametrize",
|
|
"pytestmark",
|
|
"meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn",
|
|
"test_chat_completion.py",
|
|
"openai_api",
|
|
"verifications",
|
|
"tests",
|
|
"llama-stack",
|
|
""
|
|
],
|
|
"metadata": {
|
|
"model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
|
|
"case_id": "saturn"
|
|
},
|
|
"setup": {
|
|
"duration": 0.006908083101734519,
|
|
"outcome": "passed"
|
|
},
|
|
"call": {
|
|
"duration": 0.5013210839824751,
|
|
"outcome": "passed"
|
|
},
|
|
"teardown": {
|
|
"duration": 0.0005375830223783851,
|
|
"outcome": "passed"
|
|
}
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
|
|
"lineno": 72,
|
|
"outcome": "passed",
|
|
"keywords": [
|
|
"test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
|
|
"parametrize",
|
|
"pytestmark",
|
|
"meta-llama/Llama-4-Scout-17B-16E-Instruct-earth",
|
|
"test_chat_completion.py",
|
|
"openai_api",
|
|
"verifications",
|
|
"tests",
|
|
"llama-stack",
|
|
""
|
|
],
|
|
"metadata": {
|
|
"model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
|
|
"case_id": "earth"
|
|
},
|
|
"setup": {
|
|
"duration": 0.006910792086273432,
|
|
"outcome": "passed"
|
|
},
|
|
"call": {
|
|
"duration": 0.5142245410243049,
|
|
"outcome": "passed"
|
|
},
|
|
"teardown": {
|
|
"duration": 0.0004069580463692546,
|
|
"outcome": "passed"
|
|
}
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
|
|
"lineno": 72,
|
|
"outcome": "passed",
|
|
"keywords": [
|
|
"test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
|
|
"parametrize",
|
|
"pytestmark",
|
|
"meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn",
|
|
"test_chat_completion.py",
|
|
"openai_api",
|
|
"verifications",
|
|
"tests",
|
|
"llama-stack",
|
|
""
|
|
],
|
|
"metadata": {
|
|
"model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
|
|
"case_id": "saturn"
|
|
},
|
|
"setup": {
|
|
"duration": 0.009730000048875809,
|
|
"outcome": "passed"
|
|
},
|
|
"call": {
|
|
"duration": 0.40133179200347513,
|
|
"outcome": "passed"
|
|
},
|
|
"teardown": {
|
|
"duration": 0.0004558749496936798,
|
|
"outcome": "passed"
|
|
}
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]",
|
|
"lineno": 72,
|
|
"outcome": "passed",
|
|
"keywords": [
|
|
"test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]",
|
|
"parametrize",
|
|
"pytestmark",
|
|
"meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth",
|
|
"test_chat_completion.py",
|
|
"openai_api",
|
|
"verifications",
|
|
"tests",
|
|
"llama-stack",
|
|
""
|
|
],
|
|
"metadata": {
|
|
"model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
|
|
"case_id": "earth"
|
|
},
|
|
"setup": {
|
|
"duration": 0.008247417048551142,
|
|
"outcome": "passed"
|
|
},
|
|
"call": {
|
|
"duration": 0.7914331250358373,
|
|
"outcome": "passed"
|
|
},
|
|
"teardown": {
|
|
"duration": 0.00020262505859136581,
|
|
"outcome": "passed"
|
|
}
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]",
|
|
"lineno": 72,
|
|
"outcome": "passed",
|
|
"keywords": [
|
|
"test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]",
|
|
"parametrize",
|
|
"pytestmark",
|
|
"meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn",
|
|
"test_chat_completion.py",
|
|
"openai_api",
|
|
"verifications",
|
|
"tests",
|
|
"llama-stack",
|
|
""
|
|
],
|
|
"metadata": {
|
|
"model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
|
|
"case_id": "saturn"
|
|
},
|
|
"setup": {
|
|
"duration": 0.00922900007572025,
|
|
"outcome": "passed"
|
|
},
|
|
"call": {
|
|
"duration": 1.2742049579974264,
|
|
"outcome": "passed"
|
|
},
|
|
"teardown": {
|
|
"duration": 0.000688415952026844,
|
|
"outcome": "passed"
|
|
}
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]",
|
|
"lineno": 91,
|
|
"outcome": "passed",
|
|
"keywords": [
|
|
"test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]",
|
|
"parametrize",
|
|
"pytestmark",
|
|
"meta-llama/Llama-3.3-70B-Instruct-Turbo-earth",
|
|
"test_chat_completion.py",
|
|
"openai_api",
|
|
"verifications",
|
|
"tests",
|
|
"llama-stack",
|
|
""
|
|
],
|
|
"metadata": {
|
|
"model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
|
|
"case_id": "earth"
|
|
},
|
|
"setup": {
|
|
"duration": 0.006949124974198639,
|
|
"outcome": "passed"
|
|
},
|
|
"call": {
|
|
"duration": 0.4681705000111833,
|
|
"outcome": "passed"
|
|
},
|
|
"teardown": {
|
|
"duration": 0.00017795804888010025,
|
|
"outcome": "passed"
|
|
}
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]",
|
|
"lineno": 91,
|
|
"outcome": "passed",
|
|
"keywords": [
|
|
"test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]",
|
|
"parametrize",
|
|
"pytestmark",
|
|
"meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn",
|
|
"test_chat_completion.py",
|
|
"openai_api",
|
|
"verifications",
|
|
"tests",
|
|
"llama-stack",
|
|
""
|
|
],
|
|
"metadata": {
|
|
"model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
|
|
"case_id": "saturn"
|
|
},
|
|
"setup": {
|
|
"duration": 0.008564374991692603,
|
|
"outcome": "passed"
|
|
},
|
|
"call": {
|
|
"duration": 1.7430362500017509,
|
|
"outcome": "passed"
|
|
},
|
|
"teardown": {
|
|
"duration": 0.00015312491450458765,
|
|
"outcome": "passed"
|
|
}
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
|
|
"lineno": 91,
|
|
"outcome": "failed",
|
|
"keywords": [
|
|
"test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
|
|
"parametrize",
|
|
"pytestmark",
|
|
"meta-llama/Llama-4-Scout-17B-16E-Instruct-earth",
|
|
"test_chat_completion.py",
|
|
"openai_api",
|
|
"verifications",
|
|
"tests",
|
|
"llama-stack",
|
|
""
|
|
],
|
|
"metadata": {
|
|
"model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
|
|
"case_id": "earth"
|
|
},
|
|
"setup": {
|
|
"duration": 0.007404124946333468,
|
|
"outcome": "passed"
|
|
},
|
|
"call": {
|
|
"duration": 0.515926624997519,
|
|
"outcome": "failed",
|
|
"crash": {
|
|
"path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
|
|
"lineno": 109,
|
|
"message": "IndexError: list index out of range"
|
|
},
|
|
"traceback": [
|
|
{
|
|
"path": "tests/verifications/openai_api/test_chat_completion.py",
|
|
"lineno": 109,
|
|
"message": "IndexError"
|
|
}
|
|
],
|
|
"longrepr": "request = <FixtureRequest for <Function test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]>>\nopenai_client = <openai.OpenAI object at 0x1059ecfa0>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'earth', 'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:109: IndexError"
|
|
},
|
|
"teardown": {
|
|
"duration": 0.0002389999572187662,
|
|
"outcome": "passed"
|
|
}
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
|
|
"lineno": 91,
|
|
"outcome": "failed",
|
|
"keywords": [
|
|
"test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
|
|
"parametrize",
|
|
"pytestmark",
|
|
"meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn",
|
|
"test_chat_completion.py",
|
|
"openai_api",
|
|
"verifications",
|
|
"tests",
|
|
"llama-stack",
|
|
""
|
|
],
|
|
"metadata": {
|
|
"model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
|
|
"case_id": "saturn"
|
|
},
|
|
"setup": {
|
|
"duration": 0.0071305419551208615,
|
|
"outcome": "passed"
|
|
},
|
|
"call": {
|
|
"duration": 0.37054662499576807,
|
|
"outcome": "failed",
|
|
"crash": {
|
|
"path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
|
|
"lineno": 109,
|
|
"message": "IndexError: list index out of range"
|
|
},
|
|
"traceback": [
|
|
{
|
|
"path": "tests/verifications/openai_api/test_chat_completion.py",
|
|
"lineno": 109,
|
|
"message": "IndexError"
|
|
}
|
|
],
|
|
"longrepr": "request = <FixtureRequest for <Function test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]>>\nopenai_client = <openai.OpenAI object at 0x105917430>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'saturn', 'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:109: IndexError"
|
|
},
|
|
"teardown": {
|
|
"duration": 0.0006014580139890313,
|
|
"outcome": "passed"
|
|
}
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]",
|
|
"lineno": 91,
|
|
"outcome": "failed",
|
|
"keywords": [
|
|
"test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]",
|
|
"parametrize",
|
|
"pytestmark",
|
|
"meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth",
|
|
"test_chat_completion.py",
|
|
"openai_api",
|
|
"verifications",
|
|
"tests",
|
|
"llama-stack",
|
|
""
|
|
],
|
|
"metadata": {
|
|
"model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
|
|
"case_id": "earth"
|
|
},
|
|
"setup": {
|
|
"duration": 0.007489709067158401,
|
|
"outcome": "passed"
|
|
},
|
|
"call": {
|
|
"duration": 0.7767745839664713,
|
|
"outcome": "failed",
|
|
"crash": {
|
|
"path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
|
|
"lineno": 109,
|
|
"message": "IndexError: list index out of range"
|
|
},
|
|
"traceback": [
|
|
{
|
|
"path": "tests/verifications/openai_api/test_chat_completion.py",
|
|
"lineno": 109,
|
|
"message": "IndexError"
|
|
}
|
|
],
|
|
"longrepr": "request = <FixtureRequest for <Function test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]>>\nopenai_client = <openai.OpenAI object at 0x1072015a0>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'earth', 'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:109: IndexError"
|
|
},
|
|
"teardown": {
|
|
"duration": 0.00025491707492619753,
|
|
"outcome": "passed"
|
|
}
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]",
|
|
"lineno": 91,
|
|
"outcome": "failed",
|
|
"keywords": [
|
|
"test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]",
|
|
"parametrize",
|
|
"pytestmark",
|
|
"meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn",
|
|
"test_chat_completion.py",
|
|
"openai_api",
|
|
"verifications",
|
|
"tests",
|
|
"llama-stack",
|
|
""
|
|
],
|
|
"metadata": {
|
|
"model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
|
|
"case_id": "saturn"
|
|
},
|
|
"setup": {
|
|
"duration": 0.006736499955877662,
|
|
"outcome": "passed"
|
|
},
|
|
"call": {
|
|
"duration": 0.43948554201051593,
|
|
"outcome": "failed",
|
|
"crash": {
|
|
"path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
|
|
"lineno": 109,
|
|
"message": "IndexError: list index out of range"
|
|
},
|
|
"traceback": [
|
|
{
|
|
"path": "tests/verifications/openai_api/test_chat_completion.py",
|
|
"lineno": 109,
|
|
"message": "IndexError"
|
|
}
|
|
],
|
|
"longrepr": "request = <FixtureRequest for <Function test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]>>\nopenai_client = <openai.OpenAI object at 0x10726a620>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'saturn', 'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:109: IndexError"
|
|
},
|
|
"teardown": {
|
|
"duration": 0.0002264160430058837,
|
|
"outcome": "passed"
|
|
}
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
|
|
"lineno": 115,
|
|
"outcome": "skipped",
|
|
"keywords": [
|
|
"test_chat_non_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
|
|
"parametrize",
|
|
"pytestmark",
|
|
"meta-llama/Llama-3.3-70B-Instruct-Turbo-case0",
|
|
"test_chat_completion.py",
|
|
"openai_api",
|
|
"verifications",
|
|
"tests",
|
|
"llama-stack",
|
|
""
|
|
],
|
|
"metadata": {
|
|
"model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
|
|
"case_id": "case0"
|
|
},
|
|
"setup": {
|
|
"duration": 0.007171708042733371,
|
|
"outcome": "passed"
|
|
},
|
|
"call": {
|
|
"duration": 0.00013554200995713472,
|
|
"outcome": "skipped",
|
|
"longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 124, 'Skipped: Skipping test_chat_non_streaming_image for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')"
|
|
},
|
|
"teardown": {
|
|
"duration": 0.0001235839445143938,
|
|
"outcome": "passed"
|
|
}
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
|
|
"lineno": 115,
|
|
"outcome": "passed",
|
|
"keywords": [
|
|
"test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
|
|
"parametrize",
|
|
"pytestmark",
|
|
"meta-llama/Llama-4-Scout-17B-16E-Instruct-case0",
|
|
"test_chat_completion.py",
|
|
"openai_api",
|
|
"verifications",
|
|
"tests",
|
|
"llama-stack",
|
|
""
|
|
],
|
|
"metadata": {
|
|
"model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
|
|
"case_id": "case0"
|
|
},
|
|
"setup": {
|
|
"duration": 0.008639499894343317,
|
|
"outcome": "passed"
|
|
},
|
|
"call": {
|
|
"duration": 1.4001279999502003,
|
|
"outcome": "passed"
|
|
},
|
|
"teardown": {
|
|
"duration": 0.00014812499284744263,
|
|
"outcome": "passed"
|
|
}
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
|
|
"lineno": 115,
|
|
"outcome": "passed",
|
|
"keywords": [
|
|
"test_chat_non_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
|
|
"parametrize",
|
|
"pytestmark",
|
|
"meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0",
|
|
"test_chat_completion.py",
|
|
"openai_api",
|
|
"verifications",
|
|
"tests",
|
|
"llama-stack",
|
|
""
|
|
],
|
|
"metadata": {
|
|
"model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
|
|
"case_id": "case0"
|
|
},
|
|
"setup": {
|
|
"duration": 0.015450250008143485,
|
|
"outcome": "passed"
|
|
},
|
|
"call": {
|
|
"duration": 3.3522649579681456,
|
|
"outcome": "passed"
|
|
},
|
|
"teardown": {
|
|
"duration": 0.00041629199404269457,
|
|
"outcome": "passed"
|
|
}
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
|
|
"lineno": 134,
|
|
"outcome": "skipped",
|
|
"keywords": [
|
|
"test_chat_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
|
|
"parametrize",
|
|
"pytestmark",
|
|
"meta-llama/Llama-3.3-70B-Instruct-Turbo-case0",
|
|
"test_chat_completion.py",
|
|
"openai_api",
|
|
"verifications",
|
|
"tests",
|
|
"llama-stack",
|
|
""
|
|
],
|
|
"metadata": {
|
|
"model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
|
|
"case_id": "case0"
|
|
},
|
|
"setup": {
|
|
"duration": 0.007634000037796795,
|
|
"outcome": "passed"
|
|
},
|
|
"call": {
|
|
"duration": 0.0001563339028507471,
|
|
"outcome": "skipped",
|
|
"longrepr": "('/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 143, 'Skipped: Skipping test_chat_streaming_image for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')"
|
|
},
|
|
"teardown": {
|
|
"duration": 0.0001324999611824751,
|
|
"outcome": "passed"
|
|
}
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
|
|
"lineno": 134,
|
|
"outcome": "failed",
|
|
"keywords": [
|
|
"test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
|
|
"parametrize",
|
|
"pytestmark",
|
|
"meta-llama/Llama-4-Scout-17B-16E-Instruct-case0",
|
|
"test_chat_completion.py",
|
|
"openai_api",
|
|
"verifications",
|
|
"tests",
|
|
"llama-stack",
|
|
""
|
|
],
|
|
"metadata": {
|
|
"model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
|
|
"case_id": "case0"
|
|
},
|
|
"setup": {
|
|
"duration": 0.007050334010273218,
|
|
"outcome": "passed"
|
|
},
|
|
"call": {
|
|
"duration": 1.7063317500287667,
|
|
"outcome": "failed",
|
|
"crash": {
|
|
"path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
|
|
"lineno": 152,
|
|
"message": "IndexError: list index out of range"
|
|
},
|
|
"traceback": [
|
|
{
|
|
"path": "tests/verifications/openai_api/test_chat_completion.py",
|
|
"lineno": 152,
|
|
"message": "IndexError"
|
|
}
|
|
],
|
|
"longrepr": "request = <FixtureRequest for <Function test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]>>\nopenai_client = <openai.OpenAI object at 0x10725e800>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_image(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:152: IndexError"
|
|
},
|
|
"teardown": {
|
|
"duration": 0.0002109999768435955,
|
|
"outcome": "passed"
|
|
}
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
|
|
"lineno": 134,
|
|
"outcome": "failed",
|
|
"keywords": [
|
|
"test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
|
|
"parametrize",
|
|
"pytestmark",
|
|
"meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0",
|
|
"test_chat_completion.py",
|
|
"openai_api",
|
|
"verifications",
|
|
"tests",
|
|
"llama-stack",
|
|
""
|
|
],
|
|
"metadata": {
|
|
"model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
|
|
"case_id": "case0"
|
|
},
|
|
"setup": {
|
|
"duration": 0.006729208980686963,
|
|
"outcome": "passed"
|
|
},
|
|
"call": {
|
|
"duration": 3.829621708020568,
|
|
"outcome": "failed",
|
|
"crash": {
|
|
"path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
|
|
"lineno": 152,
|
|
"message": "IndexError: list index out of range"
|
|
},
|
|
"traceback": [
|
|
{
|
|
"path": "tests/verifications/openai_api/test_chat_completion.py",
|
|
"lineno": 152,
|
|
"message": "IndexError"
|
|
}
|
|
],
|
|
"longrepr": "request = <FixtureRequest for <Function test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]>>\nopenai_client = <openai.OpenAI object at 0x10729bc10>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_image(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:152: IndexError"
|
|
},
|
|
"teardown": {
|
|
"duration": 0.0002882500411942601,
|
|
"outcome": "passed"
|
|
}
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]",
|
|
"lineno": 158,
|
|
"outcome": "passed",
|
|
"keywords": [
|
|
"test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]",
|
|
"parametrize",
|
|
"pytestmark",
|
|
"meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar",
|
|
"test_chat_completion.py",
|
|
"openai_api",
|
|
"verifications",
|
|
"tests",
|
|
"llama-stack",
|
|
""
|
|
],
|
|
"metadata": {
|
|
"model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
|
|
"case_id": "calendar"
|
|
},
|
|
"setup": {
|
|
"duration": 0.007713916013017297,
|
|
"outcome": "passed"
|
|
},
|
|
"call": {
|
|
"duration": 2.48285808309447,
|
|
"outcome": "passed"
|
|
},
|
|
"teardown": {
|
|
"duration": 0.00020350003615021706,
|
|
"outcome": "passed"
|
|
}
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]",
|
|
"lineno": 158,
|
|
"outcome": "passed",
|
|
"keywords": [
|
|
"test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]",
|
|
"parametrize",
|
|
"pytestmark",
|
|
"meta-llama/Llama-3.3-70B-Instruct-Turbo-math",
|
|
"test_chat_completion.py",
|
|
"openai_api",
|
|
"verifications",
|
|
"tests",
|
|
"llama-stack",
|
|
""
|
|
],
|
|
"metadata": {
|
|
"model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
|
|
"case_id": "math"
|
|
},
|
|
"setup": {
|
|
"duration": 0.010098082944750786,
|
|
"outcome": "passed"
|
|
},
|
|
"call": {
|
|
"duration": 1.6994713749736547,
|
|
"outcome": "passed"
|
|
},
|
|
"teardown": {
|
|
"duration": 0.00014512497000396252,
|
|
"outcome": "passed"
|
|
}
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
|
|
"lineno": 158,
|
|
"outcome": "passed",
|
|
"keywords": [
|
|
"test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
|
|
"parametrize",
|
|
"pytestmark",
|
|
"meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar",
|
|
"test_chat_completion.py",
|
|
"openai_api",
|
|
"verifications",
|
|
"tests",
|
|
"llama-stack",
|
|
""
|
|
],
|
|
"metadata": {
|
|
"model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
|
|
"case_id": "calendar"
|
|
},
|
|
"setup": {
|
|
"duration": 0.006934792036190629,
|
|
"outcome": "passed"
|
|
},
|
|
"call": {
|
|
"duration": 1.277176082949154,
|
|
"outcome": "passed"
|
|
},
|
|
"teardown": {
|
|
"duration": 0.0004985419800505042,
|
|
"outcome": "passed"
|
|
}
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
|
|
"lineno": 158,
|
|
"outcome": "passed",
|
|
"keywords": [
|
|
"test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
|
|
"parametrize",
|
|
"pytestmark",
|
|
"meta-llama/Llama-4-Scout-17B-16E-Instruct-math",
|
|
"test_chat_completion.py",
|
|
"openai_api",
|
|
"verifications",
|
|
"tests",
|
|
"llama-stack",
|
|
""
|
|
],
|
|
"metadata": {
|
|
"model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
|
|
"case_id": "math"
|
|
},
|
|
"setup": {
|
|
"duration": 0.012558708898723125,
|
|
"outcome": "passed"
|
|
},
|
|
"call": {
|
|
"duration": 2.442075416096486,
|
|
"outcome": "passed"
|
|
},
|
|
"teardown": {
|
|
"duration": 0.0003505420172587037,
|
|
"outcome": "passed"
|
|
}
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]",
|
|
"lineno": 158,
|
|
"outcome": "passed",
|
|
"keywords": [
|
|
"test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]",
|
|
"parametrize",
|
|
"pytestmark",
|
|
"meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar",
|
|
"test_chat_completion.py",
|
|
"openai_api",
|
|
"verifications",
|
|
"tests",
|
|
"llama-stack",
|
|
""
|
|
],
|
|
"metadata": {
|
|
"model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
|
|
"case_id": "calendar"
|
|
},
|
|
"setup": {
|
|
"duration": 0.012642999994568527,
|
|
"outcome": "passed"
|
|
},
|
|
"call": {
|
|
"duration": 0.9305703329155222,
|
|
"outcome": "passed"
|
|
},
|
|
"teardown": {
|
|
"duration": 0.00016004196368157864,
|
|
"outcome": "passed"
|
|
}
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]",
|
|
"lineno": 158,
|
|
"outcome": "passed",
|
|
"keywords": [
|
|
"test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]",
|
|
"parametrize",
|
|
"pytestmark",
|
|
"meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math",
|
|
"test_chat_completion.py",
|
|
"openai_api",
|
|
"verifications",
|
|
"tests",
|
|
"llama-stack",
|
|
""
|
|
],
|
|
"metadata": {
|
|
"model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
|
|
"case_id": "math"
|
|
},
|
|
"setup": {
|
|
"duration": 0.008792415959760547,
|
|
"outcome": "passed"
|
|
},
|
|
"call": {
|
|
"duration": 2.194098167004995,
|
|
"outcome": "passed"
|
|
},
|
|
"teardown": {
|
|
"duration": 0.0003667499404400587,
|
|
"outcome": "passed"
|
|
}
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]",
|
|
"lineno": 181,
|
|
"outcome": "passed",
|
|
"keywords": [
|
|
"test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]",
|
|
"parametrize",
|
|
"pytestmark",
|
|
"meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar",
|
|
"test_chat_completion.py",
|
|
"openai_api",
|
|
"verifications",
|
|
"tests",
|
|
"llama-stack",
|
|
""
|
|
],
|
|
"metadata": {
|
|
"model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
|
|
"case_id": "calendar"
|
|
},
|
|
"setup": {
|
|
"duration": 0.01219504198525101,
|
|
"outcome": "passed"
|
|
},
|
|
"call": {
|
|
"duration": 2.045097667025402,
|
|
"outcome": "passed"
|
|
},
|
|
"teardown": {
|
|
"duration": 0.00029958400409668684,
|
|
"outcome": "passed"
|
|
}
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]",
|
|
"lineno": 181,
|
|
"outcome": "passed",
|
|
"keywords": [
|
|
"test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]",
|
|
"parametrize",
|
|
"pytestmark",
|
|
"meta-llama/Llama-3.3-70B-Instruct-Turbo-math",
|
|
"test_chat_completion.py",
|
|
"openai_api",
|
|
"verifications",
|
|
"tests",
|
|
"llama-stack",
|
|
""
|
|
],
|
|
"metadata": {
|
|
"model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
|
|
"case_id": "math"
|
|
},
|
|
"setup": {
|
|
"duration": 0.014203459024429321,
|
|
"outcome": "passed"
|
|
},
|
|
"call": {
|
|
"duration": 1.3079068749211729,
|
|
"outcome": "passed"
|
|
},
|
|
"teardown": {
|
|
"duration": 0.0001914579188451171,
|
|
"outcome": "passed"
|
|
}
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
|
|
"lineno": 181,
|
|
"outcome": "failed",
|
|
"keywords": [
|
|
"test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
|
|
"parametrize",
|
|
"pytestmark",
|
|
"meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar",
|
|
"test_chat_completion.py",
|
|
"openai_api",
|
|
"verifications",
|
|
"tests",
|
|
"llama-stack",
|
|
""
|
|
],
|
|
"metadata": {
|
|
"model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
|
|
"case_id": "calendar"
|
|
},
|
|
"setup": {
|
|
"duration": 0.04714570892974734,
|
|
"outcome": "passed"
|
|
},
|
|
"call": {
|
|
"duration": 0.44743770791683346,
|
|
"outcome": "failed",
|
|
"crash": {
|
|
"path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
|
|
"lineno": 200,
|
|
"message": "IndexError: list index out of range"
|
|
},
|
|
"traceback": [
|
|
{
|
|
"path": "tests/verifications/openai_api/test_chat_completion.py",
|
|
"lineno": 200,
|
|
"message": "IndexError"
|
|
}
|
|
],
|
|
"longrepr": "request = <FixtureRequest for <Function test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]>>\nopenai_client = <openai.OpenAI object at 0x1072f7520>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'calendar', 'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'cont...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:200: IndexError"
|
|
},
|
|
"teardown": {
|
|
"duration": 0.00022199994418770075,
|
|
"outcome": "passed"
|
|
}
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
|
|
"lineno": 181,
|
|
"outcome": "failed",
|
|
"keywords": [
|
|
"test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
|
|
"parametrize",
|
|
"pytestmark",
|
|
"meta-llama/Llama-4-Scout-17B-16E-Instruct-math",
|
|
"test_chat_completion.py",
|
|
"openai_api",
|
|
"verifications",
|
|
"tests",
|
|
"llama-stack",
|
|
""
|
|
],
|
|
"metadata": {
|
|
"model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
|
|
"case_id": "math"
|
|
},
|
|
"setup": {
|
|
"duration": 0.012237709015607834,
|
|
"outcome": "passed"
|
|
},
|
|
"call": {
|
|
"duration": 3.180020791012794,
|
|
"outcome": "failed",
|
|
"crash": {
|
|
"path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
|
|
"lineno": 200,
|
|
"message": "IndexError: list index out of range"
|
|
},
|
|
"traceback": [
|
|
{
|
|
"path": "tests/verifications/openai_api/test_chat_completion.py",
|
|
"lineno": 200,
|
|
"message": "IndexError"
|
|
}
|
|
],
|
|
"longrepr": "request = <FixtureRequest for <Function test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]>>\nopenai_client = <openai.OpenAI object at 0x105916e00>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'math', 'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solut... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:200: IndexError"
|
|
},
|
|
"teardown": {
|
|
"duration": 0.000273333047516644,
|
|
"outcome": "passed"
|
|
}
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]",
|
|
"lineno": 181,
|
|
"outcome": "failed",
|
|
"keywords": [
|
|
"test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]",
|
|
"parametrize",
|
|
"pytestmark",
|
|
"meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar",
|
|
"test_chat_completion.py",
|
|
"openai_api",
|
|
"verifications",
|
|
"tests",
|
|
"llama-stack",
|
|
""
|
|
],
|
|
"metadata": {
|
|
"model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
|
|
"case_id": "calendar"
|
|
},
|
|
"setup": {
|
|
"duration": 0.013312208000570536,
|
|
"outcome": "passed"
|
|
},
|
|
"call": {
|
|
"duration": 0.4110311249969527,
|
|
"outcome": "failed",
|
|
"crash": {
|
|
"path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
|
|
"lineno": 200,
|
|
"message": "IndexError: list index out of range"
|
|
},
|
|
"traceback": [
|
|
{
|
|
"path": "tests/verifications/openai_api/test_chat_completion.py",
|
|
"lineno": 200,
|
|
"message": "IndexError"
|
|
}
|
|
],
|
|
"longrepr": "request = <FixtureRequest for <Function test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]>>\nopenai_client = <openai.OpenAI object at 0x1072983d0>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'calendar', 'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'cont...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:200: IndexError"
|
|
},
|
|
"teardown": {
|
|
"duration": 0.00022975006140768528,
|
|
"outcome": "passed"
|
|
}
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]",
|
|
"lineno": 181,
|
|
"outcome": "failed",
|
|
"keywords": [
|
|
"test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]",
|
|
"parametrize",
|
|
"pytestmark",
|
|
"meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math",
|
|
"test_chat_completion.py",
|
|
"openai_api",
|
|
"verifications",
|
|
"tests",
|
|
"llama-stack",
|
|
""
|
|
],
|
|
"metadata": {
|
|
"model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
|
|
"case_id": "math"
|
|
},
|
|
"setup": {
|
|
"duration": 0.006676917080767453,
|
|
"outcome": "passed"
|
|
},
|
|
"call": {
|
|
"duration": 2.316411833046004,
|
|
"outcome": "failed",
|
|
"crash": {
|
|
"path": "/Users/erichuang/projects/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
|
|
"lineno": 200,
|
|
"message": "IndexError: list index out of range"
|
|
},
|
|
"traceback": [
|
|
{
|
|
"path": "tests/verifications/openai_api/test_chat_completion.py",
|
|
"lineno": 200,
|
|
"message": "IndexError"
|
|
}
|
|
],
|
|
"longrepr": "request = <FixtureRequest for <Function test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]>>\nopenai_client = <openai.OpenAI object at 0x107280880>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...el_display_names': {'gpt-4o': 'gpt-4o', 'gpt-4o-mini': 'gpt-4o-mini'}, 'models': ['gpt-4o', 'gpt-4o-mini'], ...}, ...}}\ncase = {'case_id': 'math', 'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solut... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:200: IndexError"
|
|
},
|
|
"teardown": {
|
|
"duration": 0.000245374976657331,
|
|
"outcome": "passed"
|
|
}
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
|
|
"lineno": 203,
|
|
"outcome": "passed",
|
|
"keywords": [
|
|
"test_chat_non_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
|
|
"parametrize",
|
|
"pytestmark",
|
|
"meta-llama/Llama-3.3-70B-Instruct-Turbo-case0",
|
|
"test_chat_completion.py",
|
|
"openai_api",
|
|
"verifications",
|
|
"tests",
|
|
"llama-stack",
|
|
""
|
|
],
|
|
"metadata": {
|
|
"model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
|
|
"case_id": "case0"
|
|
},
|
|
"setup": {
|
|
"duration": 0.007064500008709729,
|
|
"outcome": "passed"
|
|
},
|
|
"call": {
|
|
"duration": 0.606806542025879,
|
|
"outcome": "passed"
|
|
},
|
|
"teardown": {
|
|
"duration": 0.00046320806723088026,
|
|
"outcome": "passed"
|
|
}
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
|
|
"lineno": 203,
|
|
"outcome": "passed",
|
|
"keywords": [
|
|
"test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
|
|
"parametrize",
|
|
"pytestmark",
|
|
"meta-llama/Llama-4-Scout-17B-16E-Instruct-case0",
|
|
"test_chat_completion.py",
|
|
"openai_api",
|
|
"verifications",
|
|
"tests",
|
|
"llama-stack",
|
|
""
|
|
],
|
|
"metadata": {
|
|
"model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
|
|
"case_id": "case0"
|
|
},
|
|
"setup": {
|
|
"duration": 0.009071375010535121,
|
|
"outcome": "passed"
|
|
},
|
|
"call": {
|
|
"duration": 0.41908070899080485,
|
|
"outcome": "passed"
|
|
},
|
|
"teardown": {
|
|
"duration": 0.00026074994821101427,
|
|
"outcome": "passed"
|
|
}
|
|
},
|
|
{
|
|
"nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
|
|
"lineno": 203,
|
|
"outcome": "passed",
|
|
"keywords": [
|
|
"test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
|
|
"parametrize",
|
|
"pytestmark",
|
|
"meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0",
|
|
"test_chat_completion.py",
|
|
"openai_api",
|
|
"verifications",
|
|
"tests",
|
|
"llama-stack",
|
|
""
|
|
],
|
|
"metadata": {
|
|
"model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
|
|
"case_id": "case0"
|
|
},
|
|
"setup": {
|
|
"duration": 0.0068333749659359455,
|
|
"outcome": "passed"
|
|
},
|
|
"call": {
|
|
"duration": 0.8904451669659466,
|
|
"outcome": "passed"
|
|
},
|
|
"teardown": {
|
|
"duration": 0.0005833340110257268,
|
|
"outcome": "passed"
|
|
}
|
|
}
|
|
]
|
|
}
|