test: verification on provider's OAI endpoints (#1893)

# What does this PR do? ## Test Plan export MODEL=accounts/fireworks/models/llama4-scout-instruct-basic; LLAMA_STACK_CONFIG=verification pytest -s -v tests/integration/inference --vision-model $MODEL --text-model $MODEL
2025-06-27 18:50:41 +00:00 · 2025-04-07 23:06:28 -07:00 · 2025-04-07 23:06:28 -07:00 · 7b4eb0967e
commit 7b4eb0967e
parent 530d4bdfe1
43 changed files with 1683 additions and 17 deletions
--- a/tests/integration/inference/test_text_inference.py
+++ b/tests/integration/inference/test_text_inference.py
@ -6,6 +6,7 @@


 import os
+from time import sleep

 import pytest
 from pydantic import BaseModel
@ -23,11 +24,15 @@ def skip_if_model_doesnt_support_completion(client_with_models, model_id):
    provider_id = models[model_id].provider_id
    providers = {p.provider_id: p for p in client_with_models.providers.list()}
    provider = providers[provider_id]
-    if provider.provider_type in (
-        "remote::openai",
-        "remote::anthropic",
-        "remote::gemini",
-        "remote::groq",
+    if (
+        provider.provider_type
+        in (
+            "remote::openai",
+            "remote::anthropic",
+            "remote::gemini",
+            "remote::groq",
+        )
+        or "openai-compat" in provider.provider_type
    ):
        pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support completion")

@ -514,7 +519,7 @@ def test_text_chat_completion_tool_calling_tools_not_in_request(
 )
 def test_text_chat_completion_with_multi_turn_tool_calling(client_with_models, text_model_id, test_case):
    """This test tests the model's tool calling loop in various scenarios"""
-    if "llama-4" not in text_model_id.lower():
+    if "llama-4" not in text_model_id.lower() and "llama4" not in text_model_id.lower():
        pytest.xfail("Not tested for non-llama4 models yet")

    tc = TestCase(test_case)
@ -545,7 +550,7 @@ def test_text_chat_completion_with_multi_turn_tool_calling(client_with_models, t
        )
        op_msg = response.completion_message
        messages.append(op_msg.model_dump())
-        # pprint(op_msg)
+        # print(op_msg)

        assert op_msg.role == "assistant"
        expected = tc["expected"].pop(0)
@ -568,3 +573,6 @@ def test_text_chat_completion_with_multi_turn_tool_calling(client_with_models, t
            actual_answer = op_msg.content.lower()
            # pprint(actual_answer)
            assert expected["answer"] in actual_answer
+
+        # sleep to avoid rate limit
+        sleep(1)
--- a/tests/integration/inference/test_vision_inference.py
+++ b/tests/integration/inference/test_vision_inference.py
@ -76,8 +76,9 @@ def multi_image_data():

@pytest.mark.parametrize("stream", [True, False])
 def test_image_chat_completion_multiple_images(client_with_models, vision_model_id, multi_image_data, stream):
-    if "llama-4" not in vision_model_id.lower() and "gpt-4o" not in vision_model_id.lower():
-        pytest.skip("Skip for non-llama4, gpt4o models")
+    supported_models = ["llama-4", "gpt-4o", "llama4"]
+    if not any(model in vision_model_id.lower() for model in supported_models):
+        pytest.skip(f"Skip for non-supported model: {vision_model_id}")

    messages = [
        {
--- a/tests/integration/test_cases/inference/chat_completion.json
+++ b/tests/integration/test_cases/inference/chat_completion.json
@ -220,7 +220,7 @@
              "description": "Availability status of the product."
            },
            "tags": {
-              "param_type": "list",
+              "param_type": "list[str]",
              "description": "List of product tags"
            }
          }
@ -294,7 +294,7 @@
              "description": "Location of the event"
            },
            "participants": {
-              "param_type": "list",
+              "param_type": "list[str]",
              "description": "List of participant names"
            }
          }