fix: OAI compat endpoint for meta reference inference provider (#1962)

Test plan: python tests/verifications/generate_report.py --providers fireworks,together,llama_meta_ref,openai Co-authored-by: Eric Huang <erichuang@fb.com>
2025-06-27 18:50:41 +00:00 · 2025-04-17 11:16:04 -07:00 · 2025-04-17 11:16:04 -07:00 · 2976b5d992
commit 2976b5d992
parent 8bd6665775
8 changed files with 1184 additions and 44 deletions
--- a/tests/verifications/REPORT.md
+++ b/tests/verifications/REPORT.md
@ -1,6 +1,6 @@
 # Test Results Report

-*Generated on: 2025-04-16 15:10:57*
+*Generated on: 2025-04-17 11:08:16*

 *This report was generated by running `python tests/verifications/generate_report.py`*

@ -15,12 +15,62 @@

 | Provider | Pass Rate | Tests Passed | Total Tests |
 | --- | --- | --- | --- |
+| Meta_reference | 100.0% | 26 | 26 |
 | Together | 51.3% | 39 | 76 |
 | Fireworks | 47.4% | 36 | 76 |
 | Openai | 100.0% | 52 | 52 |



+## Meta_reference
+
+*Tests run on: 2025-04-15 17:08:59*
+
+```bash
+# Run all tests for this provider:
+pytest tests/verifications/openai_api/test_chat_completion.py --provider=meta_reference -v
+
+# Example: Run only the 'earth' case of test_chat_non_streaming_basic:
+pytest tests/verifications/openai_api/test_chat_completion.py --provider=meta_reference -k "test_chat_non_streaming_basic and earth"
+```
+
+
+**Model Key (Meta_reference)**
+
+| Display Name | Full Model ID |
+| --- | --- |
+| Llama-4-Scout-Instruct | `meta-llama/Llama-4-Scout-17B-16E-Instruct` |
+
+
+| Test | Llama-4-Scout-Instruct |
+| --- | --- |
+| test_chat_non_streaming_basic (earth) | ✅ |
+| test_chat_non_streaming_basic (saturn) | ✅ |
+| test_chat_non_streaming_image | ✅ |
+| test_chat_non_streaming_multi_turn_tool_calling (add_product_tool) | ✅ |
+| test_chat_non_streaming_multi_turn_tool_calling (compare_monthly_expense_tool) | ✅ |
+| test_chat_non_streaming_multi_turn_tool_calling (get_then_create_event_tool) | ✅ |
+| test_chat_non_streaming_multi_turn_tool_calling (text_then_weather_tool) | ✅ |
+| test_chat_non_streaming_multi_turn_tool_calling (weather_tool_then_text) | ✅ |
+| test_chat_non_streaming_structured_output (calendar) | ✅ |
+| test_chat_non_streaming_structured_output (math) | ✅ |
+| test_chat_non_streaming_tool_calling | ✅ |
+| test_chat_non_streaming_tool_choice_none | ✅ |
+| test_chat_non_streaming_tool_choice_required | ✅ |
+| test_chat_streaming_basic (earth) | ✅ |
+| test_chat_streaming_basic (saturn) | ✅ |
+| test_chat_streaming_image | ✅ |
+| test_chat_streaming_multi_turn_tool_calling (add_product_tool) | ✅ |
+| test_chat_streaming_multi_turn_tool_calling (compare_monthly_expense_tool) | ✅ |
+| test_chat_streaming_multi_turn_tool_calling (get_then_create_event_tool) | ✅ |
+| test_chat_streaming_multi_turn_tool_calling (text_then_weather_tool) | ✅ |
+| test_chat_streaming_multi_turn_tool_calling (weather_tool_then_text) | ✅ |
+| test_chat_streaming_structured_output (calendar) | ✅ |
+| test_chat_streaming_structured_output (math) | ✅ |
+| test_chat_streaming_tool_calling | ✅ |
+| test_chat_streaming_tool_choice_none | ✅ |
+| test_chat_streaming_tool_choice_required | ✅ |
+
 ## Together

 *Tests run on: 2025-04-16 15:03:51*
--- a/tests/verifications/conf/meta_reference.yaml
+++ b/tests/verifications/conf/meta_reference.yaml
@ -0,0 +1,8 @@
+# LLAMA_STACK_PORT=5002 llama stack run meta-reference-gpu --env INFERENCE_MODEL=meta-llama/Llama-4-Scout-17B-16E-Instruct --env INFERENCE_CHECKPOINT_DIR=<path_to_ckpt>
+base_url: http://localhost:5002/v1/openai/v1
+api_key_var: foo
+models:
+- meta-llama/Llama-4-Scout-17B-16E-Instruct
+model_display_names:
+  meta-llama/Llama-4-Scout-17B-16E-Instruct: Llama-4-Scout-Instruct
+test_exclusions: {}
--- a/tests/verifications/generate_report.py
+++ b/tests/verifications/generate_report.py
@ -60,6 +60,7 @@ RESULTS_DIR.mkdir(exist_ok=True)
 MAX_RESULTS_PER_PROVIDER = 1

 DEFAULT_PROVIDERS = [
+    "meta_reference",
    "together",
    "fireworks",
    "openai",
--- a/tests/verifications/openai_api/test_chat_completion.py
+++ b/tests/verifications/openai_api/test_chat_completion.py
@ -12,7 +12,9 @@ from typing import Any
 import pytest
 from pydantic import BaseModel

-from tests.verifications.openai_api.fixtures.fixtures import _load_all_verification_configs
+from tests.verifications.openai_api.fixtures.fixtures import (
+    _load_all_verification_configs,
+)
 from tests.verifications.openai_api.fixtures.load import load_test_cases

 chat_completion_test_cases = load_test_cases("chat_completion")
@ -272,7 +274,6 @@ def test_chat_non_streaming_tool_choice_required(request, openai_client, model,
        tool_choice="required",  # Force tool call
        stream=False,
    )
-    print(response)

    assert response.choices[0].message.role == "assistant"
    assert len(response.choices[0].message.tool_calls) > 0, "Expected tool call when tool_choice='required'"
--- a/tests/verifications/test_results/meta_reference.json
+++ b/tests/verifications/test_results/meta_reference.json