mirror of
				https://github.com/meta-llama/llama-stack.git
				synced 2025-10-24 16:57:21 +00:00 
			
		
		
		
	# What does this PR do? ## Test Plan pytest tests/verifications/openai_api/test_chat_completion.py --provider openai -k 'test_chat_multiple_images'
		
			
				
	
	
		
			3821 lines
		
	
	
	
		
			254 KiB
		
	
	
	
		
			JSON
		
	
	
	
	
	
			
		
		
	
	
			3821 lines
		
	
	
	
		
			254 KiB
		
	
	
	
		
			JSON
		
	
	
	
	
	
| {
 | |
|   "created": 1744918192.9299376,
 | |
|   "duration": 126.91354608535767,
 | |
|   "exitcode": 1,
 | |
|   "root": "/home/erichuang/llama-stack",
 | |
|   "environment": {},
 | |
|   "summary": {
 | |
|     "passed": 40,
 | |
|     "failed": 40,
 | |
|     "skipped": 4,
 | |
|     "total": 84,
 | |
|     "collected": 84
 | |
|   },
 | |
|   "collectors": [
 | |
|     {
 | |
|       "nodeid": "",
 | |
|       "outcome": "passed",
 | |
|       "result": [
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|           "type": "Module"
 | |
|         }
 | |
|       ]
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|       "outcome": "passed",
 | |
|       "result": [
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]",
 | |
|           "type": "Function",
 | |
|           "lineno": 95
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]",
 | |
|           "type": "Function",
 | |
|           "lineno": 95
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
 | |
|           "type": "Function",
 | |
|           "lineno": 95
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
 | |
|           "type": "Function",
 | |
|           "lineno": 95
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]",
 | |
|           "type": "Function",
 | |
|           "lineno": 95
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]",
 | |
|           "type": "Function",
 | |
|           "lineno": 95
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]",
 | |
|           "type": "Function",
 | |
|           "lineno": 114
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]",
 | |
|           "type": "Function",
 | |
|           "lineno": 114
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
 | |
|           "type": "Function",
 | |
|           "lineno": 114
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
 | |
|           "type": "Function",
 | |
|           "lineno": 114
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]",
 | |
|           "type": "Function",
 | |
|           "lineno": 114
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]",
 | |
|           "type": "Function",
 | |
|           "lineno": 114
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
 | |
|           "type": "Function",
 | |
|           "lineno": 138
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
 | |
|           "type": "Function",
 | |
|           "lineno": 138
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
 | |
|           "type": "Function",
 | |
|           "lineno": 138
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
 | |
|           "type": "Function",
 | |
|           "lineno": 157
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
 | |
|           "type": "Function",
 | |
|           "lineno": 157
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
 | |
|           "type": "Function",
 | |
|           "lineno": 157
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]",
 | |
|           "type": "Function",
 | |
|           "lineno": 181
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]",
 | |
|           "type": "Function",
 | |
|           "lineno": 181
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
 | |
|           "type": "Function",
 | |
|           "lineno": 181
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
 | |
|           "type": "Function",
 | |
|           "lineno": 181
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]",
 | |
|           "type": "Function",
 | |
|           "lineno": 181
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]",
 | |
|           "type": "Function",
 | |
|           "lineno": 181
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]",
 | |
|           "type": "Function",
 | |
|           "lineno": 204
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]",
 | |
|           "type": "Function",
 | |
|           "lineno": 204
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
 | |
|           "type": "Function",
 | |
|           "lineno": 204
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
 | |
|           "type": "Function",
 | |
|           "lineno": 204
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]",
 | |
|           "type": "Function",
 | |
|           "lineno": 204
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]",
 | |
|           "type": "Function",
 | |
|           "lineno": 204
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
 | |
|           "type": "Function",
 | |
|           "lineno": 226
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
 | |
|           "type": "Function",
 | |
|           "lineno": 226
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
 | |
|           "type": "Function",
 | |
|           "lineno": 226
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
 | |
|           "type": "Function",
 | |
|           "lineno": 250
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
 | |
|           "type": "Function",
 | |
|           "lineno": 250
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
 | |
|           "type": "Function",
 | |
|           "lineno": 250
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
 | |
|           "type": "Function",
 | |
|           "lineno": 278
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
 | |
|           "type": "Function",
 | |
|           "lineno": 278
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
 | |
|           "type": "Function",
 | |
|           "lineno": 278
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
 | |
|           "type": "Function",
 | |
|           "lineno": 302
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
 | |
|           "type": "Function",
 | |
|           "lineno": 302
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
 | |
|           "type": "Function",
 | |
|           "lineno": 302
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
 | |
|           "type": "Function",
 | |
|           "lineno": 329
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
 | |
|           "type": "Function",
 | |
|           "lineno": 329
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
 | |
|           "type": "Function",
 | |
|           "lineno": 329
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
 | |
|           "type": "Function",
 | |
|           "lineno": 352
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
 | |
|           "type": "Function",
 | |
|           "lineno": 352
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
 | |
|           "type": "Function",
 | |
|           "lineno": 352
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]",
 | |
|           "type": "Function",
 | |
|           "lineno": 380
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]",
 | |
|           "type": "Function",
 | |
|           "lineno": 380
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool]",
 | |
|           "type": "Function",
 | |
|           "lineno": 380
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]",
 | |
|           "type": "Function",
 | |
|           "lineno": 380
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]",
 | |
|           "type": "Function",
 | |
|           "lineno": 380
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]",
 | |
|           "type": "Function",
 | |
|           "lineno": 380
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]",
 | |
|           "type": "Function",
 | |
|           "lineno": 380
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]",
 | |
|           "type": "Function",
 | |
|           "lineno": 380
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]",
 | |
|           "type": "Function",
 | |
|           "lineno": 380
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]",
 | |
|           "type": "Function",
 | |
|           "lineno": 380
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]",
 | |
|           "type": "Function",
 | |
|           "lineno": 380
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]",
 | |
|           "type": "Function",
 | |
|           "lineno": 380
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]",
 | |
|           "type": "Function",
 | |
|           "lineno": 380
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]",
 | |
|           "type": "Function",
 | |
|           "lineno": 380
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]",
 | |
|           "type": "Function",
 | |
|           "lineno": 380
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]",
 | |
|           "type": "Function",
 | |
|           "lineno": 471
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]",
 | |
|           "type": "Function",
 | |
|           "lineno": 471
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool]",
 | |
|           "type": "Function",
 | |
|           "lineno": 471
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]",
 | |
|           "type": "Function",
 | |
|           "lineno": 471
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]",
 | |
|           "type": "Function",
 | |
|           "lineno": 471
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]",
 | |
|           "type": "Function",
 | |
|           "lineno": 471
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]",
 | |
|           "type": "Function",
 | |
|           "lineno": 471
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]",
 | |
|           "type": "Function",
 | |
|           "lineno": 471
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]",
 | |
|           "type": "Function",
 | |
|           "lineno": 471
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]",
 | |
|           "type": "Function",
 | |
|           "lineno": 471
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]",
 | |
|           "type": "Function",
 | |
|           "lineno": 471
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]",
 | |
|           "type": "Function",
 | |
|           "lineno": 471
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]",
 | |
|           "type": "Function",
 | |
|           "lineno": 471
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]",
 | |
|           "type": "Function",
 | |
|           "lineno": 471
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]",
 | |
|           "type": "Function",
 | |
|           "lineno": 471
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=False]",
 | |
|           "type": "Function",
 | |
|           "lineno": 554
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=True]",
 | |
|           "type": "Function",
 | |
|           "lineno": 554
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=False]",
 | |
|           "type": "Function",
 | |
|           "lineno": 554
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=True]",
 | |
|           "type": "Function",
 | |
|           "lineno": 554
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=False]",
 | |
|           "type": "Function",
 | |
|           "lineno": 554
 | |
|         },
 | |
|         {
 | |
|           "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=True]",
 | |
|           "type": "Function",
 | |
|           "lineno": 554
 | |
|         }
 | |
|       ]
 | |
|     }
 | |
|   ],
 | |
|   "tests": [
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]",
 | |
|       "lineno": 95,
 | |
|       "outcome": "passed",
 | |
|       "keywords": [
 | |
|         "test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-3.3-70B-Instruct-Turbo-earth",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
 | |
|         "case_id": "earth"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.11939296405762434,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.6422080835327506,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0002934802323579788,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]",
 | |
|       "lineno": 95,
 | |
|       "outcome": "passed",
 | |
|       "keywords": [
 | |
|         "test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
 | |
|         "case_id": "saturn"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07340026367455721,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.6134521719068289,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.00031049735844135284,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
 | |
|       "lineno": 95,
 | |
|       "outcome": "passed",
 | |
|       "keywords": [
 | |
|         "test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Scout-17B-16E-Instruct-earth",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
 | |
|         "case_id": "earth"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07351398840546608,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.898847377859056,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0002735760062932968,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
 | |
|       "lineno": 95,
 | |
|       "outcome": "passed",
 | |
|       "keywords": [
 | |
|         "test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
 | |
|         "case_id": "saturn"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.08612977154552937,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.6511319326236844,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0003559151664376259,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]",
 | |
|       "lineno": 95,
 | |
|       "outcome": "passed",
 | |
|       "keywords": [
 | |
|         "test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
 | |
|         "case_id": "earth"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.08106738794595003,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 1.206272155046463,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0003584325313568115,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]",
 | |
|       "lineno": 95,
 | |
|       "outcome": "passed",
 | |
|       "keywords": [
 | |
|         "test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
 | |
|         "case_id": "saturn"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.0796442786231637,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.4815350500866771,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.00025806669145822525,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]",
 | |
|       "lineno": 114,
 | |
|       "outcome": "passed",
 | |
|       "keywords": [
 | |
|         "test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-3.3-70B-Instruct-Turbo-earth",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
 | |
|         "case_id": "earth"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07231954019516706,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 1.1521263290196657,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.00032721273601055145,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]",
 | |
|       "lineno": 114,
 | |
|       "outcome": "passed",
 | |
|       "keywords": [
 | |
|         "test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
 | |
|         "case_id": "saturn"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07364387530833483,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 1.0600289879366755,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.00028987880796194077,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
 | |
|       "lineno": 114,
 | |
|       "outcome": "failed",
 | |
|       "keywords": [
 | |
|         "test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Scout-17B-16E-Instruct-earth",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
 | |
|         "case_id": "earth"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07162868417799473,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.2930005770176649,
 | |
|         "outcome": "failed",
 | |
|         "crash": {
 | |
|           "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
 | |
|           "lineno": 132,
 | |
|           "message": "IndexError: list index out of range"
 | |
|         },
 | |
|         "traceback": [
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 132,
 | |
|             "message": "IndexError"
 | |
|           }
 | |
|         ],
 | |
|         "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]>>\nopenai_client = <openai.OpenAI object at 0x7f42743e7760>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'earth', 'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:132: IndexError"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0004123607650399208,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
 | |
|       "lineno": 114,
 | |
|       "outcome": "failed",
 | |
|       "keywords": [
 | |
|         "test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
 | |
|         "case_id": "saturn"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07553945016115904,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.4265708066523075,
 | |
|         "outcome": "failed",
 | |
|         "crash": {
 | |
|           "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
 | |
|           "lineno": 132,
 | |
|           "message": "IndexError: list index out of range"
 | |
|         },
 | |
|         "traceback": [
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 132,
 | |
|             "message": "IndexError"
 | |
|           }
 | |
|         ],
 | |
|         "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]>>\nopenai_client = <openai.OpenAI object at 0x7f42742571f0>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'saturn', 'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:132: IndexError"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0003767991438508034,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]",
 | |
|       "lineno": 114,
 | |
|       "outcome": "failed",
 | |
|       "keywords": [
 | |
|         "test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
 | |
|         "case_id": "earth"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07143466174602509,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 1.0281891459599137,
 | |
|         "outcome": "failed",
 | |
|         "crash": {
 | |
|           "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
 | |
|           "lineno": 132,
 | |
|           "message": "IndexError: list index out of range"
 | |
|         },
 | |
|         "traceback": [
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 132,
 | |
|             "message": "IndexError"
 | |
|           }
 | |
|         ],
 | |
|         "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]>>\nopenai_client = <openai.OpenAI object at 0x7f4274278310>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'earth', 'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:132: IndexError"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0003773234784603119,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]",
 | |
|       "lineno": 114,
 | |
|       "outcome": "failed",
 | |
|       "keywords": [
 | |
|         "test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
 | |
|         "case_id": "saturn"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07092289440333843,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.4124102909117937,
 | |
|         "outcome": "failed",
 | |
|         "crash": {
 | |
|           "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
 | |
|           "lineno": 132,
 | |
|           "message": "IndexError: list index out of range"
 | |
|         },
 | |
|         "traceback": [
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 132,
 | |
|             "message": "IndexError"
 | |
|           }
 | |
|         ],
 | |
|         "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]>>\nopenai_client = <openai.OpenAI object at 0x7f42743e7310>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'saturn', 'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:132: IndexError"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0003204820677638054,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
 | |
|       "lineno": 138,
 | |
|       "outcome": "skipped",
 | |
|       "keywords": [
 | |
|         "test_chat_non_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
 | |
|         "case_id": "case0"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07159135863184929,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.0002104705199599266,
 | |
|         "outcome": "skipped",
 | |
|         "longrepr": "('/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 147, 'Skipped: Skipping test_chat_non_streaming_image for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0003354400396347046,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
 | |
|       "lineno": 138,
 | |
|       "outcome": "passed",
 | |
|       "keywords": [
 | |
|         "test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
 | |
|         "case_id": "case0"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.0744061404839158,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 2.2864254424348474,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.000246487557888031,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
 | |
|       "lineno": 138,
 | |
|       "outcome": "passed",
 | |
|       "keywords": [
 | |
|         "test_chat_non_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
 | |
|         "case_id": "case0"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07066962588578463,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 4.47614302393049,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.00034836214035749435,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
 | |
|       "lineno": 157,
 | |
|       "outcome": "skipped",
 | |
|       "keywords": [
 | |
|         "test_chat_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
 | |
|         "case_id": "case0"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.09739464800804853,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.0003191335126757622,
 | |
|         "outcome": "skipped",
 | |
|         "longrepr": "('/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 166, 'Skipped: Skipping test_chat_streaming_image for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.00026350561529397964,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
 | |
|       "lineno": 157,
 | |
|       "outcome": "failed",
 | |
|       "keywords": [
 | |
|         "test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
 | |
|         "case_id": "case0"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.10561292432248592,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 2.6175378002226353,
 | |
|         "outcome": "failed",
 | |
|         "crash": {
 | |
|           "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
 | |
|           "lineno": 175,
 | |
|           "message": "IndexError: list index out of range"
 | |
|         },
 | |
|         "traceback": [
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 175,
 | |
|             "message": "IndexError"
 | |
|           }
 | |
|         ],
 | |
|         "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f427415f430>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_image(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:175: IndexError"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0003682933747768402,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
 | |
|       "lineno": 157,
 | |
|       "outcome": "failed",
 | |
|       "keywords": [
 | |
|         "test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
 | |
|         "case_id": "case0"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07195662055164576,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 3.2985631534829736,
 | |
|         "outcome": "failed",
 | |
|         "crash": {
 | |
|           "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
 | |
|           "lineno": 175,
 | |
|           "message": "IndexError: list index out of range"
 | |
|         },
 | |
|         "traceback": [
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 175,
 | |
|             "message": "IndexError"
 | |
|           }
 | |
|         ],
 | |
|         "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f42741c7550>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_image(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:175: IndexError"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0003777453675866127,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]",
 | |
|       "lineno": 181,
 | |
|       "outcome": "passed",
 | |
|       "keywords": [
 | |
|         "test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
 | |
|         "case_id": "calendar"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.0733196372166276,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.40959454514086246,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.00029125437140464783,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]",
 | |
|       "lineno": 181,
 | |
|       "outcome": "passed",
 | |
|       "keywords": [
 | |
|         "test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-3.3-70B-Instruct-Turbo-math",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
 | |
|         "case_id": "math"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07248916011303663,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 3.498455540277064,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.00023921672254800797,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
 | |
|       "lineno": 181,
 | |
|       "outcome": "passed",
 | |
|       "keywords": [
 | |
|         "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
 | |
|         "case_id": "calendar"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07911352813243866,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.6717434097081423,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.00025916099548339844,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
 | |
|       "lineno": 181,
 | |
|       "outcome": "passed",
 | |
|       "keywords": [
 | |
|         "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Scout-17B-16E-Instruct-math",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
 | |
|         "case_id": "math"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07156322989612818,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 3.698870756663382,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0002654632553458214,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]",
 | |
|       "lineno": 181,
 | |
|       "outcome": "passed",
 | |
|       "keywords": [
 | |
|         "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
 | |
|         "case_id": "calendar"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07457748707383871,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.8891718471422791,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0002395138144493103,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]",
 | |
|       "lineno": 181,
 | |
|       "outcome": "passed",
 | |
|       "keywords": [
 | |
|         "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
 | |
|         "case_id": "math"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07155069429427385,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 3.276700599119067,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0002568913623690605,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]",
 | |
|       "lineno": 204,
 | |
|       "outcome": "passed",
 | |
|       "keywords": [
 | |
|         "test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
 | |
|         "case_id": "calendar"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07365360390394926,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.7638470390811563,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.00027653202414512634,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]",
 | |
|       "lineno": 204,
 | |
|       "outcome": "passed",
 | |
|       "keywords": [
 | |
|         "test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-3.3-70B-Instruct-Turbo-math",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
 | |
|         "case_id": "math"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07424602191895247,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 3.622116087935865,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0002861013635993004,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
 | |
|       "lineno": 204,
 | |
|       "outcome": "failed",
 | |
|       "keywords": [
 | |
|         "test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
 | |
|         "case_id": "calendar"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07192372716963291,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.5049019353464246,
 | |
|         "outcome": "failed",
 | |
|         "crash": {
 | |
|           "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
 | |
|           "lineno": 223,
 | |
|           "message": "IndexError: list index out of range"
 | |
|         },
 | |
|         "traceback": [
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 223,
 | |
|             "message": "IndexError"
 | |
|           }
 | |
|         ],
 | |
|         "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]>>\nopenai_client = <openai.OpenAI object at 0x7f4274178c10>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'calendar', 'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'cont...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            response_format=case[\"input\"][\"response_format\"],\n            stream=True,\n        )\n        maybe_json_content = \"\"\n        for chunk in response:\n>           maybe_json_content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:223: IndexError"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.00036794692277908325,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
 | |
|       "lineno": 204,
 | |
|       "outcome": "failed",
 | |
|       "keywords": [
 | |
|         "test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Scout-17B-16E-Instruct-math",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
 | |
|         "case_id": "math"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07304532174021006,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 2.961389934644103,
 | |
|         "outcome": "failed",
 | |
|         "crash": {
 | |
|           "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
 | |
|           "lineno": 223,
 | |
|           "message": "IndexError: list index out of range"
 | |
|         },
 | |
|         "traceback": [
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 223,
 | |
|             "message": "IndexError"
 | |
|           }
 | |
|         ],
 | |
|         "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]>>\nopenai_client = <openai.OpenAI object at 0x7f42741786d0>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'math', 'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solut... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            response_format=case[\"input\"][\"response_format\"],\n            stream=True,\n        )\n        maybe_json_content = \"\"\n        for chunk in response:\n>           maybe_json_content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:223: IndexError"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0003312695771455765,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]",
 | |
|       "lineno": 204,
 | |
|       "outcome": "failed",
 | |
|       "keywords": [
 | |
|         "test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
 | |
|         "case_id": "calendar"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07350922282785177,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.6764275450259447,
 | |
|         "outcome": "failed",
 | |
|         "crash": {
 | |
|           "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
 | |
|           "lineno": 223,
 | |
|           "message": "IndexError: list index out of range"
 | |
|         },
 | |
|         "traceback": [
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 223,
 | |
|             "message": "IndexError"
 | |
|           }
 | |
|         ],
 | |
|         "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]>>\nopenai_client = <openai.OpenAI object at 0x7f427420ff40>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'calendar', 'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'cont...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            response_format=case[\"input\"][\"response_format\"],\n            stream=True,\n        )\n        maybe_json_content = \"\"\n        for chunk in response:\n>           maybe_json_content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:223: IndexError"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0003826189786195755,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]",
 | |
|       "lineno": 204,
 | |
|       "outcome": "failed",
 | |
|       "keywords": [
 | |
|         "test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
 | |
|         "case_id": "math"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07295230869203806,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 10.689278944395483,
 | |
|         "outcome": "failed",
 | |
|         "crash": {
 | |
|           "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
 | |
|           "lineno": 223,
 | |
|           "message": "IndexError: list index out of range"
 | |
|         },
 | |
|         "traceback": [
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 223,
 | |
|             "message": "IndexError"
 | |
|           }
 | |
|         ],
 | |
|         "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]>>\nopenai_client = <openai.OpenAI object at 0x7f427415eb60>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'math', 'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solut... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            response_format=case[\"input\"][\"response_format\"],\n            stream=True,\n        )\n        maybe_json_content = \"\"\n        for chunk in response:\n>           maybe_json_content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:223: IndexError"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0004014279693365097,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
 | |
|       "lineno": 226,
 | |
|       "outcome": "passed",
 | |
|       "keywords": [
 | |
|         "test_chat_non_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
 | |
|         "case_id": "case0"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.09202722646296024,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.8140280386433005,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0003595082089304924,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
 | |
|       "lineno": 226,
 | |
|       "outcome": "passed",
 | |
|       "keywords": [
 | |
|         "test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
 | |
|         "case_id": "case0"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.09484888892620802,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.3706049248576164,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0003290809690952301,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
 | |
|       "lineno": 226,
 | |
|       "outcome": "passed",
 | |
|       "keywords": [
 | |
|         "test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
 | |
|         "case_id": "case0"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.10521113499999046,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.36842701490968466,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.00031410157680511475,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
 | |
|       "lineno": 250,
 | |
|       "outcome": "passed",
 | |
|       "keywords": [
 | |
|         "test_chat_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
 | |
|         "case_id": "case0"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.10422383341938257,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.6454980997368693,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0002997415140271187,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
 | |
|       "lineno": 250,
 | |
|       "outcome": "failed",
 | |
|       "keywords": [
 | |
|         "test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
 | |
|         "case_id": "case0"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.09408890828490257,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.36066764686256647,
 | |
|         "outcome": "failed",
 | |
|         "crash": {
 | |
|           "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
 | |
|           "lineno": 688,
 | |
|           "message": "IndexError: list index out of range"
 | |
|         },
 | |
|         "traceback": [
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 268,
 | |
|             "message": ""
 | |
|           },
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 688,
 | |
|             "message": "IndexError"
 | |
|           }
 | |
|         ],
 | |
|         "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f42741c44f0>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=True,\n        )\n    \n>       _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:268: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f4274268760>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.00035039614886045456,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
 | |
|       "lineno": 250,
 | |
|       "outcome": "failed",
 | |
|       "keywords": [
 | |
|         "test_chat_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
 | |
|         "case_id": "case0"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07232134602963924,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.4706049496307969,
 | |
|         "outcome": "failed",
 | |
|         "crash": {
 | |
|           "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
 | |
|           "lineno": 688,
 | |
|           "message": "IndexError: list index out of range"
 | |
|         },
 | |
|         "traceback": [
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 268,
 | |
|             "message": ""
 | |
|           },
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 688,
 | |
|             "message": "IndexError"
 | |
|           }
 | |
|         ],
 | |
|         "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f427417ee60>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=True,\n        )\n    \n>       _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:268: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f427416d960>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.00039384420961141586,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
 | |
|       "lineno": 278,
 | |
|       "outcome": "passed",
 | |
|       "keywords": [
 | |
|         "test_chat_non_streaming_tool_choice_required[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
 | |
|         "case_id": "case0"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07465469185262918,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.4374591317027807,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0003099888563156128,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
 | |
|       "lineno": 278,
 | |
|       "outcome": "passed",
 | |
|       "keywords": [
 | |
|         "test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
 | |
|         "case_id": "case0"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07351493183523417,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.4368853671476245,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.00026369933038949966,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
 | |
|       "lineno": 278,
 | |
|       "outcome": "passed",
 | |
|       "keywords": [
 | |
|         "test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
 | |
|         "case_id": "case0"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07258845027536154,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.940508272498846,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.00032961275428533554,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
 | |
|       "lineno": 302,
 | |
|       "outcome": "passed",
 | |
|       "keywords": [
 | |
|         "test_chat_streaming_tool_choice_required[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
 | |
|         "case_id": "case0"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07273276895284653,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.6150273764505982,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0002876110374927521,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
 | |
|       "lineno": 302,
 | |
|       "outcome": "failed",
 | |
|       "keywords": [
 | |
|         "test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
 | |
|         "case_id": "case0"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07505382597446442,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.5026597818359733,
 | |
|         "outcome": "failed",
 | |
|         "crash": {
 | |
|           "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
 | |
|           "lineno": 688,
 | |
|           "message": "IndexError: list index out of range"
 | |
|         },
 | |
|         "traceback": [
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 321,
 | |
|             "message": ""
 | |
|           },
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 688,
 | |
|             "message": "IndexError"
 | |
|           }
 | |
|         ],
 | |
|         "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f42742aa050>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"required\",  # Force tool call\n            stream=True,\n        )\n    \n>       _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:321: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f42741e9810>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0003487151116132736,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
 | |
|       "lineno": 302,
 | |
|       "outcome": "failed",
 | |
|       "keywords": [
 | |
|         "test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
 | |
|         "case_id": "case0"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07343385275453329,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.720921658910811,
 | |
|         "outcome": "failed",
 | |
|         "crash": {
 | |
|           "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
 | |
|           "lineno": 688,
 | |
|           "message": "IndexError: list index out of range"
 | |
|         },
 | |
|         "traceback": [
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 321,
 | |
|             "message": ""
 | |
|           },
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 688,
 | |
|             "message": "IndexError"
 | |
|           }
 | |
|         ],
 | |
|         "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f427416dab0>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"required\",  # Force tool call\n            stream=True,\n        )\n    \n>       _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:321: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f427447c340>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0004109758883714676,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
 | |
|       "lineno": 329,
 | |
|       "outcome": "failed",
 | |
|       "keywords": [
 | |
|         "test_chat_non_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
 | |
|         "case_id": "case0"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07189673464745283,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.403152690269053,
 | |
|         "outcome": "failed",
 | |
|         "crash": {
 | |
|           "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
 | |
|           "lineno": 349,
 | |
|           "message": "AssertionError: Expected no tool calls when tool_choice='none'\nassert [ChatCompletionMessageToolCall(id='call_xx4eg2o4wladhs7i0gy8d2cb', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)] is None\n +  where [ChatCompletionMessageToolCall(id='call_xx4eg2o4wladhs7i0gy8d2cb', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_xx4eg2o4wladhs7i0gy8d2cb', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]).tool_calls\n +    where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_xx4eg2o4wladhs7i0gy8d2cb', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_xx4eg2o4wladhs7i0gy8d2cb', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]), seed=4867562177231181000).message"
 | |
|         },
 | |
|         "traceback": [
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 349,
 | |
|             "message": "AssertionError"
 | |
|           }
 | |
|         ],
 | |
|         "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f42741eb670>\nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"none\",\n            stream=False,\n        )\n    \n        assert response.choices[0].message.role == \"assistant\"\n>       assert response.choices[0].message.tool_calls is None, \"Expected no tool calls when tool_choice='none'\"\nE       AssertionError: Expected no tool calls when tool_choice='none'\nE       assert [ChatCompletionMessageToolCall(id='call_xx4eg2o4wladhs7i0gy8d2cb', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)] is None\nE        +  where [ChatCompletionMessageToolCall(id='call_xx4eg2o4wladhs7i0gy8d2cb', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_xx4eg2o4wladhs7i0gy8d2cb', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]).tool_calls\nE        +    where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_xx4eg2o4wladhs7i0gy8d2cb', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_xx4eg2o4wladhs7i0gy8d2cb', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]), seed=4867562177231181000).message\n\ntests/verifications/openai_api/test_chat_completion.py:349: AssertionError"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.00037758704274892807,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
 | |
|       "lineno": 329,
 | |
|       "outcome": "failed",
 | |
|       "keywords": [
 | |
|         "test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
 | |
|         "case_id": "case0"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07282305508852005,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.4538485202938318,
 | |
|         "outcome": "failed",
 | |
|         "crash": {
 | |
|           "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
 | |
|           "lineno": 349,
 | |
|           "message": "AssertionError: Expected no tool calls when tool_choice='none'\nassert [ChatCompletionMessageToolCall(id='call_6gehr7flf4gaqu65prmi1pca', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] is None\n +  where [ChatCompletionMessageToolCall(id='call_6gehr7flf4gaqu65prmi1pca', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_6gehr7flf4gaqu65prmi1pca', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]).tool_calls\n +    where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_6gehr7flf4gaqu65prmi1pca', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_6gehr7flf4gaqu65prmi1pca', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]), seed=None).message"
 | |
|         },
 | |
|         "traceback": [
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 349,
 | |
|             "message": "AssertionError"
 | |
|           }
 | |
|         ],
 | |
|         "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f4274247160>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"none\",\n            stream=False,\n        )\n    \n        assert response.choices[0].message.role == \"assistant\"\n>       assert response.choices[0].message.tool_calls is None, \"Expected no tool calls when tool_choice='none'\"\nE       AssertionError: Expected no tool calls when tool_choice='none'\nE       assert [ChatCompletionMessageToolCall(id='call_6gehr7flf4gaqu65prmi1pca', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] is None\nE        +  where [ChatCompletionMessageToolCall(id='call_6gehr7flf4gaqu65prmi1pca', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_6gehr7flf4gaqu65prmi1pca', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]).tool_calls\nE        +    where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_6gehr7flf4gaqu65prmi1pca', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_6gehr7flf4gaqu65prmi1pca', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]), seed=None).message\n\ntests/verifications/openai_api/test_chat_completion.py:349: AssertionError"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0003799665719270706,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
 | |
|       "lineno": 329,
 | |
|       "outcome": "failed",
 | |
|       "keywords": [
 | |
|         "test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
 | |
|         "case_id": "case0"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07050042506307364,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.3740060832351446,
 | |
|         "outcome": "failed",
 | |
|         "crash": {
 | |
|           "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
 | |
|           "lineno": 349,
 | |
|           "message": "AssertionError: Expected no tool calls when tool_choice='none'\nassert [ChatCompletionMessageToolCall(id='call_ngwnt1xmgxipkswdhdepisni', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] is None\n +  where [ChatCompletionMessageToolCall(id='call_ngwnt1xmgxipkswdhdepisni', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_ngwnt1xmgxipkswdhdepisni', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]).tool_calls\n +    where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_ngwnt1xmgxipkswdhdepisni', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_ngwnt1xmgxipkswdhdepisni', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]), seed=None).message"
 | |
|         },
 | |
|         "traceback": [
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 349,
 | |
|             "message": "AssertionError"
 | |
|           }
 | |
|         ],
 | |
|         "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f42742f3220>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"none\",\n            stream=False,\n        )\n    \n        assert response.choices[0].message.role == \"assistant\"\n>       assert response.choices[0].message.tool_calls is None, \"Expected no tool calls when tool_choice='none'\"\nE       AssertionError: Expected no tool calls when tool_choice='none'\nE       assert [ChatCompletionMessageToolCall(id='call_ngwnt1xmgxipkswdhdepisni', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] is None\nE        +  where [ChatCompletionMessageToolCall(id='call_ngwnt1xmgxipkswdhdepisni', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_ngwnt1xmgxipkswdhdepisni', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]).tool_calls\nE        +    where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_ngwnt1xmgxipkswdhdepisni', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_ngwnt1xmgxipkswdhdepisni', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]), seed=None).message\n\ntests/verifications/openai_api/test_chat_completion.py:349: AssertionError"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0003066370263695717,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
 | |
|       "lineno": 352,
 | |
|       "outcome": "failed",
 | |
|       "keywords": [
 | |
|         "test_chat_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
 | |
|         "case_id": "case0"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.06983672920614481,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.6774894064292312,
 | |
|         "outcome": "failed",
 | |
|         "crash": {
 | |
|           "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
 | |
|           "lineno": 376,
 | |
|           "message": "AssertionError: Expected no tool call chunks when tool_choice='none'\nassert not [ChoiceDeltaToolCall(index=0, id='call_emdpbpvm77rqbzz66arrzv5w', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\n +  where [ChoiceDeltaToolCall(index=0, id='call_emdpbpvm77rqbzz66arrzv5w', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_emdpbpvm77rqbzz66arrzv5w', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls"
 | |
|         },
 | |
|         "traceback": [
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 376,
 | |
|             "message": "AssertionError"
 | |
|           }
 | |
|         ],
 | |
|         "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f427430d480>\nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"none\",\n            stream=True,\n        )\n    \n        content = \"\"\n        for chunk in stream:\n            delta = chunk.choices[0].delta\n            if delta.content:\n                content += delta.content\n>           assert not delta.tool_calls, \"Expected no tool call chunks when tool_choice='none'\"\nE           AssertionError: Expected no tool call chunks when tool_choice='none'\nE           assert not [ChoiceDeltaToolCall(index=0, id='call_emdpbpvm77rqbzz66arrzv5w', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\nE            +  where [ChoiceDeltaToolCall(index=0, id='call_emdpbpvm77rqbzz66arrzv5w', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_emdpbpvm77rqbzz66arrzv5w', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:376: AssertionError"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0003580348566174507,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
 | |
|       "lineno": 352,
 | |
|       "outcome": "failed",
 | |
|       "keywords": [
 | |
|         "test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
 | |
|         "case_id": "case0"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07331710867583752,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.38044120091944933,
 | |
|         "outcome": "failed",
 | |
|         "crash": {
 | |
|           "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
 | |
|           "lineno": 376,
 | |
|           "message": "AssertionError: Expected no tool call chunks when tool_choice='none'\nassert not [ChoiceDeltaToolCall(index=0, id='call_g85q6ysacljgjczgq8r30tjv', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\n +  where [ChoiceDeltaToolCall(index=0, id='call_g85q6ysacljgjczgq8r30tjv', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_g85q6ysacljgjczgq8r30tjv', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls"
 | |
|         },
 | |
|         "traceback": [
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 376,
 | |
|             "message": "AssertionError"
 | |
|           }
 | |
|         ],
 | |
|         "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f42745f3970>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"none\",\n            stream=True,\n        )\n    \n        content = \"\"\n        for chunk in stream:\n            delta = chunk.choices[0].delta\n            if delta.content:\n                content += delta.content\n>           assert not delta.tool_calls, \"Expected no tool call chunks when tool_choice='none'\"\nE           AssertionError: Expected no tool call chunks when tool_choice='none'\nE           assert not [ChoiceDeltaToolCall(index=0, id='call_g85q6ysacljgjczgq8r30tjv', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\nE            +  where [ChoiceDeltaToolCall(index=0, id='call_g85q6ysacljgjczgq8r30tjv', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_g85q6ysacljgjczgq8r30tjv', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:376: AssertionError"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0003765234723687172,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
 | |
|       "lineno": 352,
 | |
|       "outcome": "failed",
 | |
|       "keywords": [
 | |
|         "test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
 | |
|         "case_id": "case0"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07194581907242537,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.37374384608119726,
 | |
|         "outcome": "failed",
 | |
|         "crash": {
 | |
|           "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
 | |
|           "lineno": 376,
 | |
|           "message": "AssertionError: Expected no tool call chunks when tool_choice='none'\nassert not [ChoiceDeltaToolCall(index=0, id='call_zq6x10vfu9pkxme6pm9zxouk', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\n +  where [ChoiceDeltaToolCall(index=0, id='call_zq6x10vfu9pkxme6pm9zxouk', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_zq6x10vfu9pkxme6pm9zxouk', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls"
 | |
|         },
 | |
|         "traceback": [
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 376,
 | |
|             "message": "AssertionError"
 | |
|           }
 | |
|         ],
 | |
|         "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f42741c4520>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"none\",\n            stream=True,\n        )\n    \n        content = \"\"\n        for chunk in stream:\n            delta = chunk.choices[0].delta\n            if delta.content:\n                content += delta.content\n>           assert not delta.tool_calls, \"Expected no tool call chunks when tool_choice='none'\"\nE           AssertionError: Expected no tool call chunks when tool_choice='none'\nE           assert not [ChoiceDeltaToolCall(index=0, id='call_zq6x10vfu9pkxme6pm9zxouk', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\nE            +  where [ChoiceDeltaToolCall(index=0, id='call_zq6x10vfu9pkxme6pm9zxouk', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_zq6x10vfu9pkxme6pm9zxouk', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:376: AssertionError"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0003813542425632477,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]",
 | |
|       "lineno": 380,
 | |
|       "outcome": "failed",
 | |
|       "keywords": [
 | |
|         "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
 | |
|         "case_id": "text_then_weather_tool"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07330320309847593,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.4314677305519581,
 | |
|         "outcome": "failed",
 | |
|         "crash": {
 | |
|           "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
 | |
|           "lineno": 439,
 | |
|           "message": "AssertionError: Expected 0 tool calls, but got 1\nassert 1 == 0\n +  where 1 = len(([ChatCompletionMessageToolCall(id='call_l05cckdk5mooai2iyfucg4s8', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)]))\n +    where [ChatCompletionMessageToolCall(id='call_l05cckdk5mooai2iyfucg4s8', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_l05cckdk5mooai2iyfucg4s8', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)]).tool_calls"
 | |
|         },
 | |
|         "traceback": [
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 439,
 | |
|             "message": "AssertionError"
 | |
|           }
 | |
|         ],
 | |
|         "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f4274148ca0>\nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n>           assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\nE           AssertionError: Expected 0 tool calls, but got 1\nE           assert 1 == 0\nE            +  where 1 = len(([ChatCompletionMessageToolCall(id='call_l05cckdk5mooai2iyfucg4s8', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)]))\nE            +    where [ChatCompletionMessageToolCall(id='call_l05cckdk5mooai2iyfucg4s8', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_l05cckdk5mooai2iyfucg4s8', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)]).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.00040314625948667526,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]",
 | |
|       "lineno": 380,
 | |
|       "outcome": "passed",
 | |
|       "keywords": [
 | |
|         "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
 | |
|         "case_id": "weather_tool_then_text"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07405277714133263,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.8350177155807614,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.00023361947387456894,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool]",
 | |
|       "lineno": 380,
 | |
|       "outcome": "passed",
 | |
|       "keywords": [
 | |
|         "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
 | |
|         "case_id": "add_product_tool"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07361320778727531,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 1.0619212854653597,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0002395985648036003,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]",
 | |
|       "lineno": 380,
 | |
|       "outcome": "passed",
 | |
|       "keywords": [
 | |
|         "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
 | |
|         "case_id": "get_then_create_event_tool"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07290417980402708,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 4.241749887354672,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.00027841050177812576,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]",
 | |
|       "lineno": 380,
 | |
|       "outcome": "passed",
 | |
|       "keywords": [
 | |
|         "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
 | |
|         "case_id": "compare_monthly_expense_tool"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07301546633243561,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 2.0520667918026447,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0002469858154654503,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]",
 | |
|       "lineno": 380,
 | |
|       "outcome": "failed",
 | |
|       "keywords": [
 | |
|         "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
 | |
|         "case_id": "text_then_weather_tool"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07405530381947756,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.48041669093072414,
 | |
|         "outcome": "failed",
 | |
|         "crash": {
 | |
|           "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
 | |
|           "lineno": 467,
 | |
|           "message": "AssertionError: Expected one of ['sol'] in content, but got: 'I am not able to complete this task as it falls outside of the scope of the functions I have been given.'\nassert False\n +  where False = any(<generator object test_chat_non_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f4274057610>)"
 | |
|         },
 | |
|         "traceback": [
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 467,
 | |
|             "message": "AssertionError"
 | |
|           }
 | |
|         ],
 | |
|         "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f42740f7700>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n            assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\n    \n            if num_tool_calls > 0:\n                tool_call = assistant_message.tool_calls[0]\n                assert tool_call.function.name == expected[\"tool_name\"], (\n                    f\"Expected tool '{expected['tool_name']}', got '{tool_call.function.name}'\"\n                )\n                # Parse the JSON string arguments before comparing\n                actual_arguments = json.loads(tool_call.function.arguments)\n                assert actual_arguments == expected[\"tool_arguments\"], (\n                    f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n                )\n    \n                # Prepare and append the tool response for the next turn\n                tool_response = tool_responses.pop(0)\n                messages.append(\n                    {\n                        \"role\": \"tool\",\n                        \"tool_call_id\": tool_call.id,\n                        \"content\": tool_response[\"response\"],\n                    }\n                )\n            else:\n                assert assistant_message.content is not None, \"Expected content, but none received.\"\n                expected_answers = expected[\"answer\"]  # This is now a list\n                content_lower = assistant_message.content.lower()\n>               assert any(ans.lower() in content_lower for ans in expected_answers), (\n                    f\"Expected one of {expected_answers} in content, but got: '{assistant_message.content}'\"\n                )\nE               AssertionError: Expected one of ['sol'] in content, but got: 'I am not able to complete this task as it falls outside of the scope of the functions I have been given.'\nE               assert False\nE                +  where False = any(<generator object test_chat_non_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f4274057610>)\n\ntests/verifications/openai_api/test_chat_completion.py:467: AssertionError"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.00035319291055202484,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]",
 | |
|       "lineno": 380,
 | |
|       "outcome": "passed",
 | |
|       "keywords": [
 | |
|         "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
 | |
|         "case_id": "weather_tool_then_text"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.0724497502669692,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.832760401070118,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.00026283878833055496,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]",
 | |
|       "lineno": 380,
 | |
|       "outcome": "passed",
 | |
|       "keywords": [
 | |
|         "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
 | |
|         "case_id": "add_product_tool"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07180811651051044,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 1.4359142612665892,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0002761436626315117,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]",
 | |
|       "lineno": 380,
 | |
|       "outcome": "passed",
 | |
|       "keywords": [
 | |
|         "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
 | |
|         "case_id": "get_then_create_event_tool"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07503274269402027,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 1.909641013480723,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0002613905817270279,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]",
 | |
|       "lineno": 380,
 | |
|       "outcome": "passed",
 | |
|       "keywords": [
 | |
|         "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
 | |
|         "case_id": "compare_monthly_expense_tool"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07153380755335093,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 2.695867782458663,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.00032124295830726624,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]",
 | |
|       "lineno": 380,
 | |
|       "outcome": "failed",
 | |
|       "keywords": [
 | |
|         "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
 | |
|         "case_id": "text_then_weather_tool"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07275318540632725,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.34551760647445917,
 | |
|         "outcome": "failed",
 | |
|         "crash": {
 | |
|           "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
 | |
|           "lineno": 467,
 | |
|           "message": "AssertionError: Expected one of ['sol'] in content, but got: '{\"name\": null, \"parameters\": null}'\nassert False\n +  where False = any(<generator object test_chat_non_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f42742dd4d0>)"
 | |
|         },
 | |
|         "traceback": [
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 467,
 | |
|             "message": "AssertionError"
 | |
|           }
 | |
|         ],
 | |
|         "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f427414b970>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n            assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\n    \n            if num_tool_calls > 0:\n                tool_call = assistant_message.tool_calls[0]\n                assert tool_call.function.name == expected[\"tool_name\"], (\n                    f\"Expected tool '{expected['tool_name']}', got '{tool_call.function.name}'\"\n                )\n                # Parse the JSON string arguments before comparing\n                actual_arguments = json.loads(tool_call.function.arguments)\n                assert actual_arguments == expected[\"tool_arguments\"], (\n                    f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n                )\n    \n                # Prepare and append the tool response for the next turn\n                tool_response = tool_responses.pop(0)\n                messages.append(\n                    {\n                        \"role\": \"tool\",\n                        \"tool_call_id\": tool_call.id,\n                        \"content\": tool_response[\"response\"],\n                    }\n                )\n            else:\n                assert assistant_message.content is not None, \"Expected content, but none received.\"\n                expected_answers = expected[\"answer\"]  # This is now a list\n                content_lower = assistant_message.content.lower()\n>               assert any(ans.lower() in content_lower for ans in expected_answers), (\n                    f\"Expected one of {expected_answers} in content, but got: '{assistant_message.content}'\"\n                )\nE               AssertionError: Expected one of ['sol'] in content, but got: '{\"name\": null, \"parameters\": null}'\nE               assert False\nE                +  where False = any(<generator object test_chat_non_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f42742dd4d0>)\n\ntests/verifications/openai_api/test_chat_completion.py:467: AssertionError"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0003842068836092949,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]",
 | |
|       "lineno": 380,
 | |
|       "outcome": "passed",
 | |
|       "keywords": [
 | |
|         "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
 | |
|         "case_id": "weather_tool_then_text"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07281951513141394,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 1.008104412816465,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.00026233773678541183,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]",
 | |
|       "lineno": 380,
 | |
|       "outcome": "passed",
 | |
|       "keywords": [
 | |
|         "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
 | |
|         "case_id": "add_product_tool"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07155719958245754,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 2.3485742239281535,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0002629430964589119,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]",
 | |
|       "lineno": 380,
 | |
|       "outcome": "failed",
 | |
|       "keywords": [
 | |
|         "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
 | |
|         "case_id": "get_then_create_event_tool"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07251190021634102,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 2.9882029946893454,
 | |
|         "outcome": "failed",
 | |
|         "crash": {
 | |
|           "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
 | |
|           "lineno": 450,
 | |
|           "message": "AssertionError: Expected arguments '{'name': 'Team Building', 'date': '2025-03-03', 'time': '10:00', 'location': 'Main Conference Room', 'participants': ['Alice', 'Bob', 'Charlie']}', got '{'date': '\"2025-03-03\"', 'location': '\"Main Conference Room\"', 'name': '\"Team Building\"', 'participants': ['Alice', 'Bob', 'Charlie'], 'time': '\"10:00\"'}'\nassert {'date': '\"20...harlie'], ...} == {'date': '202...harlie'], ...}\n  \n  Omitting 1 identical items, use -vv to show\n  Differing items:\n  {'date': '\"2025-03-03\"'} != {'date': '2025-03-03'}\n  {'name': '\"Team Building\"'} != {'name': 'Team Building'}\n  {'time': '\"10:00\"'} != {'time': '10:00'}\n  {'location': '\"Main Conference Room\"'} != {'location': 'Main Conference Room'}...\n  \n  ...Full output truncated (21 lines hidden), use '-vv' to show"
 | |
|         },
 | |
|         "traceback": [
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 450,
 | |
|             "message": "AssertionError"
 | |
|           }
 | |
|         ],
 | |
|         "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f4274027af0>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n            assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\n    \n            if num_tool_calls > 0:\n                tool_call = assistant_message.tool_calls[0]\n                assert tool_call.function.name == expected[\"tool_name\"], (\n                    f\"Expected tool '{expected['tool_name']}', got '{tool_call.function.name}'\"\n                )\n                # Parse the JSON string arguments before comparing\n                actual_arguments = json.loads(tool_call.function.arguments)\n>               assert actual_arguments == expected[\"tool_arguments\"], (\n                    f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n                )\nE               AssertionError: Expected arguments '{'name': 'Team Building', 'date': '2025-03-03', 'time': '10:00', 'location': 'Main Conference Room', 'participants': ['Alice', 'Bob', 'Charlie']}', got '{'date': '\"2025-03-03\"', 'location': '\"Main Conference Room\"', 'name': '\"Team Building\"', 'participants': ['Alice', 'Bob', 'Charlie'], 'time': '\"10:00\"'}'\nE               assert {'date': '\"20...harlie'], ...} == {'date': '202...harlie'], ...}\nE                 \nE                 Omitting 1 identical items, use -vv to show\nE                 Differing items:\nE                 {'date': '\"2025-03-03\"'} != {'date': '2025-03-03'}\nE                 {'name': '\"Team Building\"'} != {'name': 'Team Building'}\nE                 {'time': '\"10:00\"'} != {'time': '10:00'}\nE                 {'location': '\"Main Conference Room\"'} != {'location': 'Main Conference Room'}...\nE                 \nE                 ...Full output truncated (21 lines hidden), use '-vv' to show\n\ntests/verifications/openai_api/test_chat_completion.py:450: AssertionError"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0003328891471028328,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]",
 | |
|       "lineno": 380,
 | |
|       "outcome": "passed",
 | |
|       "keywords": [
 | |
|         "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
 | |
|         "case_id": "compare_monthly_expense_tool"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07363704219460487,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 4.031332626007497,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0002817586064338684,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]",
 | |
|       "lineno": 471,
 | |
|       "outcome": "failed",
 | |
|       "keywords": [
 | |
|         "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
 | |
|         "case_id": "text_then_weather_tool"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07673048228025436,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.3994998000562191,
 | |
|         "outcome": "failed",
 | |
|         "crash": {
 | |
|           "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
 | |
|           "lineno": 521,
 | |
|           "message": "AssertionError: Expected 0 tool calls, but got 1\nassert 1 == 0\n +  where 1 = len(([{'function': {'arguments': '{\"location\":\"San Francisco, CA\"}', 'name': 'get_weather'}, 'id': 'call_dqcu28a6iyxlobv36c23k0qp', 'type': 'function'}]))"
 | |
|         },
 | |
|         "traceback": [
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 521,
 | |
|             "message": "AssertionError"
 | |
|           }
 | |
|         ],
 | |
|         "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f4274179c30>\nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n>           assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\nE           AssertionError: Expected 0 tool calls, but got 1\nE           assert 1 == 0\nE            +  where 1 = len(([{'function': {'arguments': '{\"location\":\"San Francisco, CA\"}', 'name': 'get_weather'}, 'id': 'call_dqcu28a6iyxlobv36c23k0qp', 'type': 'function'}]))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0003687366843223572,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]",
 | |
|       "lineno": 471,
 | |
|       "outcome": "failed",
 | |
|       "keywords": [
 | |
|         "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
 | |
|         "case_id": "weather_tool_then_text"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07477510999888182,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.918418399989605,
 | |
|         "outcome": "failed",
 | |
|         "crash": {
 | |
|           "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
 | |
|           "lineno": 547,
 | |
|           "message": "AssertionError: Expected content, but none received.\nassert ('' is not None and '' != '')"
 | |
|         },
 | |
|         "traceback": [
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 547,
 | |
|             "message": "AssertionError"
 | |
|           }
 | |
|         ],
 | |
|         "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]>>\nopenai_client = <openai.OpenAI object at 0x7f427417a2c0>\nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\n    \n            if num_tool_calls > 0:\n                # Use the first accumulated tool call for assertion\n                tool_call = accumulated_tool_calls[0]\n                assert tool_call[\"function\"][\"name\"] == expected[\"tool_name\"], (\n                    f\"Expected tool '{expected['tool_name']}', got '{tool_call['function']['name']}'\"\n                )\n                # Parse the accumulated arguments string for comparison\n                actual_arguments = json.loads(tool_call[\"function\"][\"arguments\"])\n                assert actual_arguments == expected[\"tool_arguments\"], (\n                    f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n                )\n    \n                # Prepare and append the tool response for the next turn\n                tool_response = tool_responses.pop(0)\n                messages.append(\n                    {\n                        \"role\": \"tool\",\n                        \"tool_call_id\": tool_call[\"id\"],\n                        \"content\": tool_response[\"response\"],\n                    }\n                )\n            else:\n>               assert accumulated_content is not None and accumulated_content != \"\", \"Expected content, but none received.\"\nE               AssertionError: Expected content, but none received.\nE               assert ('' is not None and '' != '')\n\ntests/verifications/openai_api/test_chat_completion.py:547: AssertionError"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.00036141276359558105,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool]",
 | |
|       "lineno": 471,
 | |
|       "outcome": "passed",
 | |
|       "keywords": [
 | |
|         "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
 | |
|         "case_id": "add_product_tool"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07217607088387012,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 1.2676455974578857,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.00024215038865804672,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]",
 | |
|       "lineno": 471,
 | |
|       "outcome": "failed",
 | |
|       "keywords": [
 | |
|         "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
 | |
|         "case_id": "get_then_create_event_tool"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.0713065592572093,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 1.0453352769836783,
 | |
|         "outcome": "failed",
 | |
|         "crash": {
 | |
|           "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
 | |
|           "lineno": 547,
 | |
|           "message": "AssertionError: Expected content, but none received.\nassert ('' is not None and '' != '')"
 | |
|         },
 | |
|         "traceback": [
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 547,
 | |
|             "message": "AssertionError"
 | |
|           }
 | |
|         ],
 | |
|         "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f427415e0b0>\nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\n    \n            if num_tool_calls > 0:\n                # Use the first accumulated tool call for assertion\n                tool_call = accumulated_tool_calls[0]\n                assert tool_call[\"function\"][\"name\"] == expected[\"tool_name\"], (\n                    f\"Expected tool '{expected['tool_name']}', got '{tool_call['function']['name']}'\"\n                )\n                # Parse the accumulated arguments string for comparison\n                actual_arguments = json.loads(tool_call[\"function\"][\"arguments\"])\n                assert actual_arguments == expected[\"tool_arguments\"], (\n                    f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n                )\n    \n                # Prepare and append the tool response for the next turn\n                tool_response = tool_responses.pop(0)\n                messages.append(\n                    {\n                        \"role\": \"tool\",\n                        \"tool_call_id\": tool_call[\"id\"],\n                        \"content\": tool_response[\"response\"],\n                    }\n                )\n            else:\n>               assert accumulated_content is not None and accumulated_content != \"\", \"Expected content, but none received.\"\nE               AssertionError: Expected content, but none received.\nE               assert ('' is not None and '' != '')\n\ntests/verifications/openai_api/test_chat_completion.py:547: AssertionError"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.00030668359249830246,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]",
 | |
|       "lineno": 471,
 | |
|       "outcome": "failed",
 | |
|       "keywords": [
 | |
|         "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
 | |
|         "case_id": "compare_monthly_expense_tool"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07108221855014563,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 1.034472893923521,
 | |
|         "outcome": "failed",
 | |
|         "crash": {
 | |
|           "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
 | |
|           "lineno": 547,
 | |
|           "message": "AssertionError: Expected content, but none received.\nassert ('' is not None and '' != '')"
 | |
|         },
 | |
|         "traceback": [
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 547,
 | |
|             "message": "AssertionError"
 | |
|           }
 | |
|         ],
 | |
|         "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f42743b7a90>\nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\n    \n            if num_tool_calls > 0:\n                # Use the first accumulated tool call for assertion\n                tool_call = accumulated_tool_calls[0]\n                assert tool_call[\"function\"][\"name\"] == expected[\"tool_name\"], (\n                    f\"Expected tool '{expected['tool_name']}', got '{tool_call['function']['name']}'\"\n                )\n                # Parse the accumulated arguments string for comparison\n                actual_arguments = json.loads(tool_call[\"function\"][\"arguments\"])\n                assert actual_arguments == expected[\"tool_arguments\"], (\n                    f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n                )\n    \n                # Prepare and append the tool response for the next turn\n                tool_response = tool_responses.pop(0)\n                messages.append(\n                    {\n                        \"role\": \"tool\",\n                        \"tool_call_id\": tool_call[\"id\"],\n                        \"content\": tool_response[\"response\"],\n                    }\n                )\n            else:\n>               assert accumulated_content is not None and accumulated_content != \"\", \"Expected content, but none received.\"\nE               AssertionError: Expected content, but none received.\nE               assert ('' is not None and '' != '')\n\ntests/verifications/openai_api/test_chat_completion.py:547: AssertionError"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.00035398639738559723,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]",
 | |
|       "lineno": 471,
 | |
|       "outcome": "failed",
 | |
|       "keywords": [
 | |
|         "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
 | |
|         "case_id": "text_then_weather_tool"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07186305243521929,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 1.8766405330970883,
 | |
|         "outcome": "failed",
 | |
|         "crash": {
 | |
|           "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
 | |
|           "lineno": 688,
 | |
|           "message": "IndexError: list index out of range"
 | |
|         },
 | |
|         "traceback": [
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 506,
 | |
|             "message": ""
 | |
|           },
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 688,
 | |
|             "message": "IndexError"
 | |
|           }
 | |
|         ],
 | |
|         "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f42743e54b0>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f42742f0820>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0003088880330324173,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]",
 | |
|       "lineno": 471,
 | |
|       "outcome": "failed",
 | |
|       "keywords": [
 | |
|         "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
 | |
|         "case_id": "weather_tool_then_text"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.0846314700320363,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.40889575984328985,
 | |
|         "outcome": "failed",
 | |
|         "crash": {
 | |
|           "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
 | |
|           "lineno": 688,
 | |
|           "message": "IndexError: list index out of range"
 | |
|         },
 | |
|         "traceback": [
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 506,
 | |
|             "message": ""
 | |
|           },
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 688,
 | |
|             "message": "IndexError"
 | |
|           }
 | |
|         ],
 | |
|         "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]>>\nopenai_client = <openai.OpenAI object at 0x7f42742f2bc0>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f42740fd270>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0003652172163128853,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]",
 | |
|       "lineno": 471,
 | |
|       "outcome": "failed",
 | |
|       "keywords": [
 | |
|         "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
 | |
|         "case_id": "add_product_tool"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07273881137371063,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 2.251293654553592,
 | |
|         "outcome": "failed",
 | |
|         "crash": {
 | |
|           "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
 | |
|           "lineno": 688,
 | |
|           "message": "IndexError: list index out of range"
 | |
|         },
 | |
|         "traceback": [
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 506,
 | |
|             "message": ""
 | |
|           },
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 688,
 | |
|             "message": "IndexError"
 | |
|           }
 | |
|         ],
 | |
|         "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f427420eda0>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f4273f940a0>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.00030664633959531784,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]",
 | |
|       "lineno": 471,
 | |
|       "outcome": "failed",
 | |
|       "keywords": [
 | |
|         "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
 | |
|         "case_id": "get_then_create_event_tool"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.071181770414114,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.5708655547350645,
 | |
|         "outcome": "failed",
 | |
|         "crash": {
 | |
|           "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
 | |
|           "lineno": 688,
 | |
|           "message": "IndexError: list index out of range"
 | |
|         },
 | |
|         "traceback": [
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 506,
 | |
|             "message": ""
 | |
|           },
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 688,
 | |
|             "message": "IndexError"
 | |
|           }
 | |
|         ],
 | |
|         "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f42740fc910>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f4273f82b90>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.00036500580608844757,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]",
 | |
|       "lineno": 471,
 | |
|       "outcome": "failed",
 | |
|       "keywords": [
 | |
|         "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
 | |
|         "case_id": "compare_monthly_expense_tool"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.06934114638715982,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.5055103581398726,
 | |
|         "outcome": "failed",
 | |
|         "crash": {
 | |
|           "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
 | |
|           "lineno": 688,
 | |
|           "message": "IndexError: list index out of range"
 | |
|         },
 | |
|         "traceback": [
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 506,
 | |
|             "message": ""
 | |
|           },
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 688,
 | |
|             "message": "IndexError"
 | |
|           }
 | |
|         ],
 | |
|         "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f427410dea0>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f427430c580>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.00035354867577552795,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]",
 | |
|       "lineno": 471,
 | |
|       "outcome": "failed",
 | |
|       "keywords": [
 | |
|         "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
 | |
|         "case_id": "text_then_weather_tool"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07129869516938925,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 1.5799349313601851,
 | |
|         "outcome": "failed",
 | |
|         "crash": {
 | |
|           "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
 | |
|           "lineno": 688,
 | |
|           "message": "IndexError: list index out of range"
 | |
|         },
 | |
|         "traceback": [
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 506,
 | |
|             "message": ""
 | |
|           },
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 688,
 | |
|             "message": "IndexError"
 | |
|           }
 | |
|         ],
 | |
|         "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f427410c580>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f427417b3a0>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.00033699069172143936,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]",
 | |
|       "lineno": 471,
 | |
|       "outcome": "failed",
 | |
|       "keywords": [
 | |
|         "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
 | |
|         "case_id": "weather_tool_then_text"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07074506860226393,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.5245106862857938,
 | |
|         "outcome": "failed",
 | |
|         "crash": {
 | |
|           "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
 | |
|           "lineno": 688,
 | |
|           "message": "IndexError: list index out of range"
 | |
|         },
 | |
|         "traceback": [
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 506,
 | |
|             "message": ""
 | |
|           },
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 688,
 | |
|             "message": "IndexError"
 | |
|           }
 | |
|         ],
 | |
|         "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]>>\nopenai_client = <openai.OpenAI object at 0x7f427430e590>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f4274268a90>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.00042015407234430313,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]",
 | |
|       "lineno": 471,
 | |
|       "outcome": "failed",
 | |
|       "keywords": [
 | |
|         "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
 | |
|         "case_id": "add_product_tool"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07020766660571098,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.6389470677822828,
 | |
|         "outcome": "failed",
 | |
|         "crash": {
 | |
|           "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
 | |
|           "lineno": 688,
 | |
|           "message": "IndexError: list index out of range"
 | |
|         },
 | |
|         "traceback": [
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 506,
 | |
|             "message": ""
 | |
|           },
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 688,
 | |
|             "message": "IndexError"
 | |
|           }
 | |
|         ],
 | |
|         "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f42741784f0>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f4274254bb0>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.00035757478326559067,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]",
 | |
|       "lineno": 471,
 | |
|       "outcome": "failed",
 | |
|       "keywords": [
 | |
|         "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
 | |
|         "case_id": "get_then_create_event_tool"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07121358439326286,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.5222592242062092,
 | |
|         "outcome": "failed",
 | |
|         "crash": {
 | |
|           "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
 | |
|           "lineno": 688,
 | |
|           "message": "IndexError: list index out of range"
 | |
|         },
 | |
|         "traceback": [
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 506,
 | |
|             "message": ""
 | |
|           },
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 688,
 | |
|             "message": "IndexError"
 | |
|           }
 | |
|         ],
 | |
|         "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f42741e8ca0>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f427416c6a0>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0003436664119362831,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]",
 | |
|       "lineno": 471,
 | |
|       "outcome": "failed",
 | |
|       "keywords": [
 | |
|         "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
 | |
|         "case_id": "compare_monthly_expense_tool"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07017400953918695,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 1.7245550760999322,
 | |
|         "outcome": "failed",
 | |
|         "crash": {
 | |
|           "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
 | |
|           "lineno": 688,
 | |
|           "message": "IndexError: list index out of range"
 | |
|         },
 | |
|         "traceback": [
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 506,
 | |
|             "message": ""
 | |
|           },
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 688,
 | |
|             "message": "IndexError"
 | |
|           }
 | |
|         ],
 | |
|         "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f4274256b90>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f427415f0a0>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0003162780776619911,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=False]",
 | |
|       "lineno": 554,
 | |
|       "outcome": "skipped",
 | |
|       "keywords": [
 | |
|         "test_chat_multi_turn_multiple_images[meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=False]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=False",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
 | |
|         "case_id": "stream=False"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07253758516162634,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.00021537486463785172,
 | |
|         "outcome": "skipped",
 | |
|         "longrepr": "('/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 561, 'Skipped: Skipping test_chat_multi_turn_multiple_images for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0004162406548857689,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=True]",
 | |
|       "lineno": 554,
 | |
|       "outcome": "skipped",
 | |
|       "keywords": [
 | |
|         "test_chat_multi_turn_multiple_images[meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=True]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=True",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
 | |
|         "case_id": "stream=True"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07268107868731022,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 0.0002132616937160492,
 | |
|         "outcome": "skipped",
 | |
|         "longrepr": "('/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 561, 'Skipped: Skipping test_chat_multi_turn_multiple_images for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.00021094270050525665,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=False]",
 | |
|       "lineno": 554,
 | |
|       "outcome": "passed",
 | |
|       "keywords": [
 | |
|         "test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=False]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=False",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
 | |
|         "case_id": "stream=False"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.07398672867566347,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 4.383559702895582,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0002781357616186142,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=True]",
 | |
|       "lineno": 554,
 | |
|       "outcome": "failed",
 | |
|       "keywords": [
 | |
|         "test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=True]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=True",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
 | |
|         "case_id": "stream=True"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.08006586041301489,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 2.16784877050668,
 | |
|         "outcome": "failed",
 | |
|         "crash": {
 | |
|           "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
 | |
|           "lineno": 596,
 | |
|           "message": "IndexError: list index out of range"
 | |
|         },
 | |
|         "traceback": [
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 596,
 | |
|             "message": "IndexError"
 | |
|           }
 | |
|         ],
 | |
|         "longrepr": "request = <FixtureRequest for <Function test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=True]>>\nopenai_client = <openai.OpenAI object at 0x7f427416c490>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\nmulti_image_data = ['data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAIBAQEBAQIBAQECAgICAgQDAgICAgUEBAMEBgUGBgYFBgYGBwkIBgcJBwYGC...6pH9jaTzNv7vfRRXzubfxj9f8Pv8AkTz/AMX/ALbEz5Ly38lfMk/5Z/u64PxhqEZh+z/6rzvn2UUV5EvgPuzy/wAc6p5dt5ccibJpNkkdFFFec27mZ//Z']\nstream = True\n\n    @pytest.mark.parametrize(\"stream\", [False, True], ids=[\"stream=False\", \"stream=True\"])\n    def test_chat_multi_turn_multiple_images(\n        request, openai_client, model, provider, verification_config, multi_image_data, stream\n    ):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages_turn1 = [\n            {\n                \"role\": \"user\",\n                \"content\": [\n                    {\n                        \"type\": \"image_url\",\n                        \"image_url\": {\n                            \"url\": multi_image_data[0],\n                        },\n                    },\n                    {\n                        \"type\": \"image_url\",\n                        \"image_url\": {\n                            \"url\": multi_image_data[1],\n                        },\n                    },\n                    {\n                        \"type\": \"text\",\n                        \"text\": \"What furniture is in the first image that is not in the second image?\",\n                    },\n                ],\n            },\n        ]\n    \n        # First API call\n        response1 = openai_client.chat.completions.create(\n            model=model,\n            messages=messages_turn1,\n            stream=stream,\n        )\n        if stream:\n            message_content1 = \"\"\n            for chunk in response1:\n>               message_content1 += chunk.choices[0].delta.content or \"\"\nE               IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:596: IndexError"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0003619194030761719,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=False]",
 | |
|       "lineno": 554,
 | |
|       "outcome": "passed",
 | |
|       "keywords": [
 | |
|         "test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=False]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=False",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
 | |
|         "case_id": "stream=False"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.0709412069991231,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 6.110534753650427,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0002450142055749893,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     },
 | |
|     {
 | |
|       "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=True]",
 | |
|       "lineno": 554,
 | |
|       "outcome": "failed",
 | |
|       "keywords": [
 | |
|         "test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=True]",
 | |
|         "parametrize",
 | |
|         "pytestmark",
 | |
|         "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=True",
 | |
|         "test_chat_completion.py",
 | |
|         "openai_api",
 | |
|         "verifications",
 | |
|         "tests",
 | |
|         "llama-stack",
 | |
|         ""
 | |
|       ],
 | |
|       "metadata": {
 | |
|         "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
 | |
|         "case_id": "stream=True"
 | |
|       },
 | |
|       "setup": {
 | |
|         "duration": 0.0725309094414115,
 | |
|         "outcome": "passed"
 | |
|       },
 | |
|       "call": {
 | |
|         "duration": 2.291131243109703,
 | |
|         "outcome": "failed",
 | |
|         "crash": {
 | |
|           "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
 | |
|           "lineno": 596,
 | |
|           "message": "IndexError: list index out of range"
 | |
|         },
 | |
|         "traceback": [
 | |
|           {
 | |
|             "path": "tests/verifications/openai_api/test_chat_completion.py",
 | |
|             "lineno": 596,
 | |
|             "message": "IndexError"
 | |
|           }
 | |
|         ],
 | |
|         "longrepr": "request = <FixtureRequest for <Function test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=True]>>\nopenai_client = <openai.OpenAI object at 0x7f42740eb0d0>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\nmulti_image_data = ['data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAIBAQEBAQIBAQECAgICAgQDAgICAgUEBAMEBgUGBgYFBgYGBwkIBgcJBwYGC...6pH9jaTzNv7vfRRXzubfxj9f8Pv8AkTz/AMX/ALbEz5Ly38lfMk/5Z/u64PxhqEZh+z/6rzvn2UUV5EvgPuzy/wAc6p5dt5ccibJpNkkdFFFec27mZ//Z']\nstream = True\n\n    @pytest.mark.parametrize(\"stream\", [False, True], ids=[\"stream=False\", \"stream=True\"])\n    def test_chat_multi_turn_multiple_images(\n        request, openai_client, model, provider, verification_config, multi_image_data, stream\n    ):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages_turn1 = [\n            {\n                \"role\": \"user\",\n                \"content\": [\n                    {\n                        \"type\": \"image_url\",\n                        \"image_url\": {\n                            \"url\": multi_image_data[0],\n                        },\n                    },\n                    {\n                        \"type\": \"image_url\",\n                        \"image_url\": {\n                            \"url\": multi_image_data[1],\n                        },\n                    },\n                    {\n                        \"type\": \"text\",\n                        \"text\": \"What furniture is in the first image that is not in the second image?\",\n                    },\n                ],\n            },\n        ]\n    \n        # First API call\n        response1 = openai_client.chat.completions.create(\n            model=model,\n            messages=messages_turn1,\n            stream=stream,\n        )\n        if stream:\n            message_content1 = \"\"\n            for chunk in response1:\n>               message_content1 += chunk.choices[0].delta.content or \"\"\nE               IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:596: IndexError"
 | |
|       },
 | |
|       "teardown": {
 | |
|         "duration": 0.0018906639888882637,
 | |
|         "outcome": "passed"
 | |
|       }
 | |
|     }
 | |
|   ],
 | |
|   "run_timestamp": 1744918065
 | |
| }
 |