mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-27 18:50:41 +00:00
fix: OAI compat endpoint for meta reference inference provider (#1962)
Test plan: python tests/verifications/generate_report.py --providers fireworks,together,llama_meta_ref,openai Co-authored-by: Eric Huang <erichuang@fb.com>
This commit is contained in:
parent
8bd6665775
commit
2976b5d992
8 changed files with 1184 additions and 44 deletions
|
@ -1,6 +1,6 @@
|
|||
# Test Results Report
|
||||
|
||||
*Generated on: 2025-04-16 15:10:57*
|
||||
*Generated on: 2025-04-17 11:08:16*
|
||||
|
||||
*This report was generated by running `python tests/verifications/generate_report.py`*
|
||||
|
||||
|
@ -15,12 +15,62 @@
|
|||
|
||||
| Provider | Pass Rate | Tests Passed | Total Tests |
|
||||
| --- | --- | --- | --- |
|
||||
| Meta_reference | 100.0% | 26 | 26 |
|
||||
| Together | 51.3% | 39 | 76 |
|
||||
| Fireworks | 47.4% | 36 | 76 |
|
||||
| Openai | 100.0% | 52 | 52 |
|
||||
|
||||
|
||||
|
||||
## Meta_reference
|
||||
|
||||
*Tests run on: 2025-04-15 17:08:59*
|
||||
|
||||
```bash
|
||||
# Run all tests for this provider:
|
||||
pytest tests/verifications/openai_api/test_chat_completion.py --provider=meta_reference -v
|
||||
|
||||
# Example: Run only the 'earth' case of test_chat_non_streaming_basic:
|
||||
pytest tests/verifications/openai_api/test_chat_completion.py --provider=meta_reference -k "test_chat_non_streaming_basic and earth"
|
||||
```
|
||||
|
||||
|
||||
**Model Key (Meta_reference)**
|
||||
|
||||
| Display Name | Full Model ID |
|
||||
| --- | --- |
|
||||
| Llama-4-Scout-Instruct | `meta-llama/Llama-4-Scout-17B-16E-Instruct` |
|
||||
|
||||
|
||||
| Test | Llama-4-Scout-Instruct |
|
||||
| --- | --- |
|
||||
| test_chat_non_streaming_basic (earth) | ✅ |
|
||||
| test_chat_non_streaming_basic (saturn) | ✅ |
|
||||
| test_chat_non_streaming_image | ✅ |
|
||||
| test_chat_non_streaming_multi_turn_tool_calling (add_product_tool) | ✅ |
|
||||
| test_chat_non_streaming_multi_turn_tool_calling (compare_monthly_expense_tool) | ✅ |
|
||||
| test_chat_non_streaming_multi_turn_tool_calling (get_then_create_event_tool) | ✅ |
|
||||
| test_chat_non_streaming_multi_turn_tool_calling (text_then_weather_tool) | ✅ |
|
||||
| test_chat_non_streaming_multi_turn_tool_calling (weather_tool_then_text) | ✅ |
|
||||
| test_chat_non_streaming_structured_output (calendar) | ✅ |
|
||||
| test_chat_non_streaming_structured_output (math) | ✅ |
|
||||
| test_chat_non_streaming_tool_calling | ✅ |
|
||||
| test_chat_non_streaming_tool_choice_none | ✅ |
|
||||
| test_chat_non_streaming_tool_choice_required | ✅ |
|
||||
| test_chat_streaming_basic (earth) | ✅ |
|
||||
| test_chat_streaming_basic (saturn) | ✅ |
|
||||
| test_chat_streaming_image | ✅ |
|
||||
| test_chat_streaming_multi_turn_tool_calling (add_product_tool) | ✅ |
|
||||
| test_chat_streaming_multi_turn_tool_calling (compare_monthly_expense_tool) | ✅ |
|
||||
| test_chat_streaming_multi_turn_tool_calling (get_then_create_event_tool) | ✅ |
|
||||
| test_chat_streaming_multi_turn_tool_calling (text_then_weather_tool) | ✅ |
|
||||
| test_chat_streaming_multi_turn_tool_calling (weather_tool_then_text) | ✅ |
|
||||
| test_chat_streaming_structured_output (calendar) | ✅ |
|
||||
| test_chat_streaming_structured_output (math) | ✅ |
|
||||
| test_chat_streaming_tool_calling | ✅ |
|
||||
| test_chat_streaming_tool_choice_none | ✅ |
|
||||
| test_chat_streaming_tool_choice_required | ✅ |
|
||||
|
||||
## Together
|
||||
|
||||
*Tests run on: 2025-04-16 15:03:51*
|
||||
|
|
8
tests/verifications/conf/meta_reference.yaml
Normal file
8
tests/verifications/conf/meta_reference.yaml
Normal file
|
@ -0,0 +1,8 @@
|
|||
# LLAMA_STACK_PORT=5002 llama stack run meta-reference-gpu --env INFERENCE_MODEL=meta-llama/Llama-4-Scout-17B-16E-Instruct --env INFERENCE_CHECKPOINT_DIR=<path_to_ckpt>
|
||||
base_url: http://localhost:5002/v1/openai/v1
|
||||
api_key_var: foo
|
||||
models:
|
||||
- meta-llama/Llama-4-Scout-17B-16E-Instruct
|
||||
model_display_names:
|
||||
meta-llama/Llama-4-Scout-17B-16E-Instruct: Llama-4-Scout-Instruct
|
||||
test_exclusions: {}
|
|
@ -60,6 +60,7 @@ RESULTS_DIR.mkdir(exist_ok=True)
|
|||
MAX_RESULTS_PER_PROVIDER = 1
|
||||
|
||||
DEFAULT_PROVIDERS = [
|
||||
"meta_reference",
|
||||
"together",
|
||||
"fireworks",
|
||||
"openai",
|
||||
|
|
|
@ -12,7 +12,9 @@ from typing import Any
|
|||
import pytest
|
||||
from pydantic import BaseModel
|
||||
|
||||
from tests.verifications.openai_api.fixtures.fixtures import _load_all_verification_configs
|
||||
from tests.verifications.openai_api.fixtures.fixtures import (
|
||||
_load_all_verification_configs,
|
||||
)
|
||||
from tests.verifications.openai_api.fixtures.load import load_test_cases
|
||||
|
||||
chat_completion_test_cases = load_test_cases("chat_completion")
|
||||
|
@ -272,7 +274,6 @@ def test_chat_non_streaming_tool_choice_required(request, openai_client, model,
|
|||
tool_choice="required", # Force tool call
|
||||
stream=False,
|
||||
)
|
||||
print(response)
|
||||
|
||||
assert response.choices[0].message.role == "assistant"
|
||||
assert len(response.choices[0].message.tool_calls) > 0, "Expected tool call when tool_choice='required'"
|
||||
|
|
1023
tests/verifications/test_results/meta_reference.json
Normal file
1023
tests/verifications/test_results/meta_reference.json
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue