mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-24 05:14:30 +00:00
test: add multi_image test (#1972)
# What does this PR do? ## Test Plan pytest tests/verifications/openai_api/test_chat_completion.py --provider openai -k 'test_chat_multiple_images'
This commit is contained in:
parent
2976b5d992
commit
0ed41aafbf
16 changed files with 2416 additions and 1585 deletions
|
@ -1,6 +1,6 @@
|
|||
# Test Results Report
|
||||
|
||||
*Generated on: 2025-04-17 11:08:16*
|
||||
*Generated on: 2025-04-17 12:42:33*
|
||||
|
||||
*This report was generated by running `python tests/verifications/generate_report.py`*
|
||||
|
||||
|
@ -15,23 +15,23 @@
|
|||
|
||||
| Provider | Pass Rate | Tests Passed | Total Tests |
|
||||
| --- | --- | --- | --- |
|
||||
| Meta_reference | 100.0% | 26 | 26 |
|
||||
| Together | 51.3% | 39 | 76 |
|
||||
| Fireworks | 47.4% | 36 | 76 |
|
||||
| Openai | 100.0% | 52 | 52 |
|
||||
| Meta_reference | 100.0% | 28 | 28 |
|
||||
| Together | 50.0% | 40 | 80 |
|
||||
| Fireworks | 50.0% | 40 | 80 |
|
||||
| Openai | 100.0% | 56 | 56 |
|
||||
|
||||
|
||||
|
||||
## Meta_reference
|
||||
|
||||
*Tests run on: 2025-04-15 17:08:59*
|
||||
*Tests run on: 2025-04-17 12:37:11*
|
||||
|
||||
```bash
|
||||
# Run all tests for this provider:
|
||||
pytest tests/verifications/openai_api/test_chat_completion.py --provider=meta_reference -v
|
||||
|
||||
# Example: Run only the 'earth' case of test_chat_non_streaming_basic:
|
||||
pytest tests/verifications/openai_api/test_chat_completion.py --provider=meta_reference -k "test_chat_non_streaming_basic and earth"
|
||||
# Example: Run only the 'stream=False' case of test_chat_multi_turn_multiple_images:
|
||||
pytest tests/verifications/openai_api/test_chat_completion.py --provider=meta_reference -k "test_chat_multi_turn_multiple_images and stream=False"
|
||||
```
|
||||
|
||||
|
||||
|
@ -44,6 +44,8 @@ pytest tests/verifications/openai_api/test_chat_completion.py --provider=meta_re
|
|||
|
||||
| Test | Llama-4-Scout-Instruct |
|
||||
| --- | --- |
|
||||
| test_chat_multi_turn_multiple_images (stream=False) | ✅ |
|
||||
| test_chat_multi_turn_multiple_images (stream=True) | ✅ |
|
||||
| test_chat_non_streaming_basic (earth) | ✅ |
|
||||
| test_chat_non_streaming_basic (saturn) | ✅ |
|
||||
| test_chat_non_streaming_image | ✅ |
|
||||
|
@ -73,14 +75,14 @@ pytest tests/verifications/openai_api/test_chat_completion.py --provider=meta_re
|
|||
|
||||
## Together
|
||||
|
||||
*Tests run on: 2025-04-16 15:03:51*
|
||||
*Tests run on: 2025-04-17 12:27:45*
|
||||
|
||||
```bash
|
||||
# Run all tests for this provider:
|
||||
pytest tests/verifications/openai_api/test_chat_completion.py --provider=together -v
|
||||
|
||||
# Example: Run only the 'earth' case of test_chat_non_streaming_basic:
|
||||
pytest tests/verifications/openai_api/test_chat_completion.py --provider=together -k "test_chat_non_streaming_basic and earth"
|
||||
# Example: Run only the 'stream=False' case of test_chat_multi_turn_multiple_images:
|
||||
pytest tests/verifications/openai_api/test_chat_completion.py --provider=together -k "test_chat_multi_turn_multiple_images and stream=False"
|
||||
```
|
||||
|
||||
|
||||
|
@ -95,12 +97,14 @@ pytest tests/verifications/openai_api/test_chat_completion.py --provider=togethe
|
|||
|
||||
| Test | Llama-3.3-70B-Instruct | Llama-4-Maverick-Instruct | Llama-4-Scout-Instruct |
|
||||
| --- | --- | --- | --- |
|
||||
| test_chat_multi_turn_multiple_images (stream=False) | ⚪ | ✅ | ✅ |
|
||||
| test_chat_multi_turn_multiple_images (stream=True) | ⚪ | ❌ | ❌ |
|
||||
| test_chat_non_streaming_basic (earth) | ✅ | ✅ | ✅ |
|
||||
| test_chat_non_streaming_basic (saturn) | ✅ | ✅ | ✅ |
|
||||
| test_chat_non_streaming_image | ⚪ | ✅ | ✅ |
|
||||
| test_chat_non_streaming_multi_turn_tool_calling (add_product_tool) | ✅ | ✅ | ✅ |
|
||||
| test_chat_non_streaming_multi_turn_tool_calling (compare_monthly_expense_tool) | ✅ | ✅ | ✅ |
|
||||
| test_chat_non_streaming_multi_turn_tool_calling (get_then_create_event_tool) | ✅ | ✅ | ✅ |
|
||||
| test_chat_non_streaming_multi_turn_tool_calling (get_then_create_event_tool) | ✅ | ❌ | ✅ |
|
||||
| test_chat_non_streaming_multi_turn_tool_calling (text_then_weather_tool) | ❌ | ❌ | ❌ |
|
||||
| test_chat_non_streaming_multi_turn_tool_calling (weather_tool_then_text) | ✅ | ✅ | ✅ |
|
||||
| test_chat_non_streaming_structured_output (calendar) | ✅ | ✅ | ✅ |
|
||||
|
@ -124,14 +128,14 @@ pytest tests/verifications/openai_api/test_chat_completion.py --provider=togethe
|
|||
|
||||
## Fireworks
|
||||
|
||||
*Tests run on: 2025-04-16 15:05:54*
|
||||
*Tests run on: 2025-04-17 12:29:53*
|
||||
|
||||
```bash
|
||||
# Run all tests for this provider:
|
||||
pytest tests/verifications/openai_api/test_chat_completion.py --provider=fireworks -v
|
||||
|
||||
# Example: Run only the 'earth' case of test_chat_non_streaming_basic:
|
||||
pytest tests/verifications/openai_api/test_chat_completion.py --provider=fireworks -k "test_chat_non_streaming_basic and earth"
|
||||
# Example: Run only the 'stream=False' case of test_chat_multi_turn_multiple_images:
|
||||
pytest tests/verifications/openai_api/test_chat_completion.py --provider=fireworks -k "test_chat_multi_turn_multiple_images and stream=False"
|
||||
```
|
||||
|
||||
|
||||
|
@ -146,6 +150,8 @@ pytest tests/verifications/openai_api/test_chat_completion.py --provider=firewor
|
|||
|
||||
| Test | Llama-3.3-70B-Instruct | Llama-4-Maverick-Instruct | Llama-4-Scout-Instruct |
|
||||
| --- | --- | --- | --- |
|
||||
| test_chat_multi_turn_multiple_images (stream=False) | ⚪ | ✅ | ✅ |
|
||||
| test_chat_multi_turn_multiple_images (stream=True) | ⚪ | ✅ | ✅ |
|
||||
| test_chat_non_streaming_basic (earth) | ✅ | ✅ | ✅ |
|
||||
| test_chat_non_streaming_basic (saturn) | ✅ | ✅ | ✅ |
|
||||
| test_chat_non_streaming_image | ⚪ | ✅ | ✅ |
|
||||
|
@ -175,14 +181,14 @@ pytest tests/verifications/openai_api/test_chat_completion.py --provider=firewor
|
|||
|
||||
## Openai
|
||||
|
||||
*Tests run on: 2025-04-16 15:09:18*
|
||||
*Tests run on: 2025-04-17 12:34:08*
|
||||
|
||||
```bash
|
||||
# Run all tests for this provider:
|
||||
pytest tests/verifications/openai_api/test_chat_completion.py --provider=openai -v
|
||||
|
||||
# Example: Run only the 'earth' case of test_chat_non_streaming_basic:
|
||||
pytest tests/verifications/openai_api/test_chat_completion.py --provider=openai -k "test_chat_non_streaming_basic and earth"
|
||||
# Example: Run only the 'stream=False' case of test_chat_multi_turn_multiple_images:
|
||||
pytest tests/verifications/openai_api/test_chat_completion.py --provider=openai -k "test_chat_multi_turn_multiple_images and stream=False"
|
||||
```
|
||||
|
||||
|
||||
|
@ -196,6 +202,8 @@ pytest tests/verifications/openai_api/test_chat_completion.py --provider=openai
|
|||
|
||||
| Test | gpt-4o | gpt-4o-mini |
|
||||
| --- | --- | --- |
|
||||
| test_chat_multi_turn_multiple_images (stream=False) | ✅ | ✅ |
|
||||
| test_chat_multi_turn_multiple_images (stream=True) | ✅ | ✅ |
|
||||
| test_chat_non_streaming_basic (earth) | ✅ | ✅ |
|
||||
| test_chat_non_streaming_basic (saturn) | ✅ | ✅ |
|
||||
| test_chat_non_streaming_image | ✅ | ✅ |
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue