mirror of
				https://github.com/meta-llama/llama-stack.git
				synced 2025-10-25 09:05:37 +00:00 
			
		
		
		
	# What does this PR do? ## Test Plan pytest tests/verifications/openai_api/test_chat_completion.py --provider openai -k 'test_chat_multiple_images'
		
			
				
	
	
	
	
		
			11 KiB
		
	
	
	
	
	
	
	
			
		
		
	
	
			11 KiB
		
	
	
	
	
	
	
	
Test Results Report
Generated on: 2025-04-17 12:42:33
This report was generated by running python tests/verifications/generate_report.py
Legend
- ✅ - Test passed
- ❌ - Test failed
- ⚪ - Test not applicable or not run for this model
Summary
| Provider | Pass Rate | Tests Passed | Total Tests | 
|---|---|---|---|
| Meta_reference | 100.0% | 28 | 28 | 
| Together | 50.0% | 40 | 80 | 
| Fireworks | 50.0% | 40 | 80 | 
| Openai | 100.0% | 56 | 56 | 
Meta_reference
Tests run on: 2025-04-17 12:37:11
# Run all tests for this provider:
pytest tests/verifications/openai_api/test_chat_completion.py --provider=meta_reference -v
# Example: Run only the 'stream=False' case of test_chat_multi_turn_multiple_images:
pytest tests/verifications/openai_api/test_chat_completion.py --provider=meta_reference -k "test_chat_multi_turn_multiple_images and stream=False"
Model Key (Meta_reference)
| Display Name | Full Model ID | 
|---|---|
| Llama-4-Scout-Instruct | meta-llama/Llama-4-Scout-17B-16E-Instruct | 
| Test | Llama-4-Scout-Instruct | 
|---|---|
| test_chat_multi_turn_multiple_images (stream=False) | ✅ | 
| test_chat_multi_turn_multiple_images (stream=True) | ✅ | 
| test_chat_non_streaming_basic (earth) | ✅ | 
| test_chat_non_streaming_basic (saturn) | ✅ | 
| test_chat_non_streaming_image | ✅ | 
| test_chat_non_streaming_multi_turn_tool_calling (add_product_tool) | ✅ | 
| test_chat_non_streaming_multi_turn_tool_calling (compare_monthly_expense_tool) | ✅ | 
| test_chat_non_streaming_multi_turn_tool_calling (get_then_create_event_tool) | ✅ | 
| test_chat_non_streaming_multi_turn_tool_calling (text_then_weather_tool) | ✅ | 
| test_chat_non_streaming_multi_turn_tool_calling (weather_tool_then_text) | ✅ | 
| test_chat_non_streaming_structured_output (calendar) | ✅ | 
| test_chat_non_streaming_structured_output (math) | ✅ | 
| test_chat_non_streaming_tool_calling | ✅ | 
| test_chat_non_streaming_tool_choice_none | ✅ | 
| test_chat_non_streaming_tool_choice_required | ✅ | 
| test_chat_streaming_basic (earth) | ✅ | 
| test_chat_streaming_basic (saturn) | ✅ | 
| test_chat_streaming_image | ✅ | 
| test_chat_streaming_multi_turn_tool_calling (add_product_tool) | ✅ | 
| test_chat_streaming_multi_turn_tool_calling (compare_monthly_expense_tool) | ✅ | 
| test_chat_streaming_multi_turn_tool_calling (get_then_create_event_tool) | ✅ | 
| test_chat_streaming_multi_turn_tool_calling (text_then_weather_tool) | ✅ | 
| test_chat_streaming_multi_turn_tool_calling (weather_tool_then_text) | ✅ | 
| test_chat_streaming_structured_output (calendar) | ✅ | 
| test_chat_streaming_structured_output (math) | ✅ | 
| test_chat_streaming_tool_calling | ✅ | 
| test_chat_streaming_tool_choice_none | ✅ | 
| test_chat_streaming_tool_choice_required | ✅ | 
Together
Tests run on: 2025-04-17 12:27:45
# Run all tests for this provider:
pytest tests/verifications/openai_api/test_chat_completion.py --provider=together -v
# Example: Run only the 'stream=False' case of test_chat_multi_turn_multiple_images:
pytest tests/verifications/openai_api/test_chat_completion.py --provider=together -k "test_chat_multi_turn_multiple_images and stream=False"
Model Key (Together)
| Display Name | Full Model ID | 
|---|---|
| Llama-3.3-70B-Instruct | meta-llama/Llama-3.3-70B-Instruct-Turbo | 
| Llama-4-Maverick-Instruct | meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 | 
| Llama-4-Scout-Instruct | meta-llama/Llama-4-Scout-17B-16E-Instruct | 
| Test | Llama-3.3-70B-Instruct | Llama-4-Maverick-Instruct | Llama-4-Scout-Instruct | 
|---|---|---|---|
| test_chat_multi_turn_multiple_images (stream=False) | ⚪ | ✅ | ✅ | 
| test_chat_multi_turn_multiple_images (stream=True) | ⚪ | ❌ | ❌ | 
| test_chat_non_streaming_basic (earth) | ✅ | ✅ | ✅ | 
| test_chat_non_streaming_basic (saturn) | ✅ | ✅ | ✅ | 
| test_chat_non_streaming_image | ⚪ | ✅ | ✅ | 
| test_chat_non_streaming_multi_turn_tool_calling (add_product_tool) | ✅ | ✅ | ✅ | 
| test_chat_non_streaming_multi_turn_tool_calling (compare_monthly_expense_tool) | ✅ | ✅ | ✅ | 
| test_chat_non_streaming_multi_turn_tool_calling (get_then_create_event_tool) | ✅ | ❌ | ✅ | 
| test_chat_non_streaming_multi_turn_tool_calling (text_then_weather_tool) | ❌ | ❌ | ❌ | 
| test_chat_non_streaming_multi_turn_tool_calling (weather_tool_then_text) | ✅ | ✅ | ✅ | 
| test_chat_non_streaming_structured_output (calendar) | ✅ | ✅ | ✅ | 
| test_chat_non_streaming_structured_output (math) | ✅ | ✅ | ✅ | 
| test_chat_non_streaming_tool_calling | ✅ | ✅ | ✅ | 
| test_chat_non_streaming_tool_choice_none | ❌ | ❌ | ❌ | 
| test_chat_non_streaming_tool_choice_required | ✅ | ✅ | ✅ | 
| test_chat_streaming_basic (earth) | ✅ | ❌ | ❌ | 
| test_chat_streaming_basic (saturn) | ✅ | ❌ | ❌ | 
| test_chat_streaming_image | ⚪ | ❌ | ❌ | 
| test_chat_streaming_multi_turn_tool_calling (add_product_tool) | ✅ | ❌ | ❌ | 
| test_chat_streaming_multi_turn_tool_calling (compare_monthly_expense_tool) | ❌ | ❌ | ❌ | 
| test_chat_streaming_multi_turn_tool_calling (get_then_create_event_tool) | ❌ | ❌ | ❌ | 
| test_chat_streaming_multi_turn_tool_calling (text_then_weather_tool) | ❌ | ❌ | ❌ | 
| test_chat_streaming_multi_turn_tool_calling (weather_tool_then_text) | ❌ | ❌ | ❌ | 
| test_chat_streaming_structured_output (calendar) | ✅ | ❌ | ❌ | 
| test_chat_streaming_structured_output (math) | ✅ | ❌ | ❌ | 
| test_chat_streaming_tool_calling | ✅ | ❌ | ❌ | 
| test_chat_streaming_tool_choice_none | ❌ | ❌ | ❌ | 
| test_chat_streaming_tool_choice_required | ✅ | ❌ | ❌ | 
Fireworks
Tests run on: 2025-04-17 12:29:53
# Run all tests for this provider:
pytest tests/verifications/openai_api/test_chat_completion.py --provider=fireworks -v
# Example: Run only the 'stream=False' case of test_chat_multi_turn_multiple_images:
pytest tests/verifications/openai_api/test_chat_completion.py --provider=fireworks -k "test_chat_multi_turn_multiple_images and stream=False"
Model Key (Fireworks)
| Display Name | Full Model ID | 
|---|---|
| Llama-3.3-70B-Instruct | accounts/fireworks/models/llama-v3p3-70b-instruct | 
| Llama-4-Maverick-Instruct | accounts/fireworks/models/llama4-maverick-instruct-basic | 
| Llama-4-Scout-Instruct | accounts/fireworks/models/llama4-scout-instruct-basic | 
| Test | Llama-3.3-70B-Instruct | Llama-4-Maverick-Instruct | Llama-4-Scout-Instruct | 
|---|---|---|---|
| test_chat_multi_turn_multiple_images (stream=False) | ⚪ | ✅ | ✅ | 
| test_chat_multi_turn_multiple_images (stream=True) | ⚪ | ✅ | ✅ | 
| test_chat_non_streaming_basic (earth) | ✅ | ✅ | ✅ | 
| test_chat_non_streaming_basic (saturn) | ✅ | ✅ | ✅ | 
| test_chat_non_streaming_image | ⚪ | ✅ | ✅ | 
| test_chat_non_streaming_multi_turn_tool_calling (add_product_tool) | ❌ | ❌ | ❌ | 
| test_chat_non_streaming_multi_turn_tool_calling (compare_monthly_expense_tool) | ❌ | ❌ | ❌ | 
| test_chat_non_streaming_multi_turn_tool_calling (get_then_create_event_tool) | ❌ | ❌ | ❌ | 
| test_chat_non_streaming_multi_turn_tool_calling (text_then_weather_tool) | ❌ | ❌ | ❌ | 
| test_chat_non_streaming_multi_turn_tool_calling (weather_tool_then_text) | ❌ | ❌ | ❌ | 
| test_chat_non_streaming_structured_output (calendar) | ✅ | ✅ | ✅ | 
| test_chat_non_streaming_structured_output (math) | ✅ | ✅ | ✅ | 
| test_chat_non_streaming_tool_calling | ❌ | ❌ | ❌ | 
| test_chat_non_streaming_tool_choice_none | ✅ | ✅ | ✅ | 
| test_chat_non_streaming_tool_choice_required | ✅ | ❌ | ❌ | 
| test_chat_streaming_basic (earth) | ✅ | ✅ | ✅ | 
| test_chat_streaming_basic (saturn) | ✅ | ✅ | ✅ | 
| test_chat_streaming_image | ⚪ | ✅ | ✅ | 
| test_chat_streaming_multi_turn_tool_calling (add_product_tool) | ❌ | ❌ | ❌ | 
| test_chat_streaming_multi_turn_tool_calling (compare_monthly_expense_tool) | ❌ | ❌ | ❌ | 
| test_chat_streaming_multi_turn_tool_calling (get_then_create_event_tool) | ❌ | ❌ | ❌ | 
| test_chat_streaming_multi_turn_tool_calling (text_then_weather_tool) | ❌ | ❌ | ❌ | 
| test_chat_streaming_multi_turn_tool_calling (weather_tool_then_text) | ❌ | ❌ | ❌ | 
| test_chat_streaming_structured_output (calendar) | ✅ | ✅ | ✅ | 
| test_chat_streaming_structured_output (math) | ✅ | ✅ | ✅ | 
| test_chat_streaming_tool_calling | ❌ | ❌ | ❌ | 
| test_chat_streaming_tool_choice_none | ✅ | ✅ | ✅ | 
| test_chat_streaming_tool_choice_required | ✅ | ❌ | ❌ | 
Openai
Tests run on: 2025-04-17 12:34:08
# Run all tests for this provider:
pytest tests/verifications/openai_api/test_chat_completion.py --provider=openai -v
# Example: Run only the 'stream=False' case of test_chat_multi_turn_multiple_images:
pytest tests/verifications/openai_api/test_chat_completion.py --provider=openai -k "test_chat_multi_turn_multiple_images and stream=False"
Model Key (Openai)
| Display Name | Full Model ID | 
|---|---|
| gpt-4o | gpt-4o | 
| gpt-4o-mini | gpt-4o-mini | 
| Test | gpt-4o | gpt-4o-mini | 
|---|---|---|
| test_chat_multi_turn_multiple_images (stream=False) | ✅ | ✅ | 
| test_chat_multi_turn_multiple_images (stream=True) | ✅ | ✅ | 
| test_chat_non_streaming_basic (earth) | ✅ | ✅ | 
| test_chat_non_streaming_basic (saturn) | ✅ | ✅ | 
| test_chat_non_streaming_image | ✅ | ✅ | 
| test_chat_non_streaming_multi_turn_tool_calling (add_product_tool) | ✅ | ✅ | 
| test_chat_non_streaming_multi_turn_tool_calling (compare_monthly_expense_tool) | ✅ | ✅ | 
| test_chat_non_streaming_multi_turn_tool_calling (get_then_create_event_tool) | ✅ | ✅ | 
| test_chat_non_streaming_multi_turn_tool_calling (text_then_weather_tool) | ✅ | ✅ | 
| test_chat_non_streaming_multi_turn_tool_calling (weather_tool_then_text) | ✅ | ✅ | 
| test_chat_non_streaming_structured_output (calendar) | ✅ | ✅ | 
| test_chat_non_streaming_structured_output (math) | ✅ | ✅ | 
| test_chat_non_streaming_tool_calling | ✅ | ✅ | 
| test_chat_non_streaming_tool_choice_none | ✅ | ✅ | 
| test_chat_non_streaming_tool_choice_required | ✅ | ✅ | 
| test_chat_streaming_basic (earth) | ✅ | ✅ | 
| test_chat_streaming_basic (saturn) | ✅ | ✅ | 
| test_chat_streaming_image | ✅ | ✅ | 
| test_chat_streaming_multi_turn_tool_calling (add_product_tool) | ✅ | ✅ | 
| test_chat_streaming_multi_turn_tool_calling (compare_monthly_expense_tool) | ✅ | ✅ | 
| test_chat_streaming_multi_turn_tool_calling (get_then_create_event_tool) | ✅ | ✅ | 
| test_chat_streaming_multi_turn_tool_calling (text_then_weather_tool) | ✅ | ✅ | 
| test_chat_streaming_multi_turn_tool_calling (weather_tool_then_text) | ✅ | ✅ | 
| test_chat_streaming_structured_output (calendar) | ✅ | ✅ | 
| test_chat_streaming_structured_output (math) | ✅ | ✅ | 
| test_chat_streaming_tool_calling | ✅ | ✅ | 
| test_chat_streaming_tool_choice_none | ✅ | ✅ | 
| test_chat_streaming_tool_choice_required | ✅ | ✅ |