test: verification on provider's OAI endpoints (#1893)

# What does this PR do? ## Test Plan export MODEL=accounts/fireworks/models/llama4-scout-instruct-basic; LLAMA_STACK_CONFIG=verification pytest -s -v tests/integration/inference --vision-model $MODEL --text-model $MODEL
2025-04-07 23:06:28 -07:00 · 2025-04-07 23:06:28 -07:00 · 7b4eb0967e
commit 7b4eb0967e
parent 530d4bdfe1
43 changed files with 1683 additions and 17 deletions
--- a/tests/integration/inference/test_text_inference.py
+++ b/tests/integration/inference/test_text_inference.py
@ -6,6 +6,7 @@


 import os
+from time import sleep

 import pytest
 from pydantic import BaseModel
@ -23,11 +24,15 @@ def skip_if_model_doesnt_support_completion(client_with_models, model_id):
    provider_id = models[model_id].provider_id
    providers = {p.provider_id: p for p in client_with_models.providers.list()}
    provider = providers[provider_id]
-    if provider.provider_type in (
-        "remote::openai",
-        "remote::anthropic",
-        "remote::gemini",
-        "remote::groq",
+    if (
+        provider.provider_type
+        in (
+            "remote::openai",
+            "remote::anthropic",
+            "remote::gemini",
+            "remote::groq",
+        )
+        or "openai-compat" in provider.provider_type
    ):
        pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support completion")

@ -514,7 +519,7 @@ def test_text_chat_completion_tool_calling_tools_not_in_request(
 )
 def test_text_chat_completion_with_multi_turn_tool_calling(client_with_models, text_model_id, test_case):
    """This test tests the model's tool calling loop in various scenarios"""
-    if "llama-4" not in text_model_id.lower():
+    if "llama-4" not in text_model_id.lower() and "llama4" not in text_model_id.lower():
        pytest.xfail("Not tested for non-llama4 models yet")

    tc = TestCase(test_case)
@ -545,7 +550,7 @@ def test_text_chat_completion_with_multi_turn_tool_calling(client_with_models, t
        )
        op_msg = response.completion_message
        messages.append(op_msg.model_dump())
-        # pprint(op_msg)
+        # print(op_msg)

        assert op_msg.role == "assistant"
        expected = tc["expected"].pop(0)
@ -568,3 +573,6 @@ def test_text_chat_completion_with_multi_turn_tool_calling(client_with_models, t
            actual_answer = op_msg.content.lower()
            # pprint(actual_answer)
            assert expected["answer"] in actual_answer
+
+        # sleep to avoid rate limit
+        sleep(1)