test: verification on provider's OAI endpoints (#1893)

# What does this PR do?


## Test Plan
export MODEL=accounts/fireworks/models/llama4-scout-instruct-basic;
LLAMA_STACK_CONFIG=verification pytest -s -v tests/integration/inference
--vision-model $MODEL --text-model $MODEL
This commit is contained in:
ehhuang 2025-04-07 23:06:28 -07:00 committed by GitHub
parent 530d4bdfe1
commit 7b4eb0967e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
43 changed files with 1683 additions and 17 deletions

View file

@ -6,6 +6,7 @@
import os
from time import sleep
import pytest
from pydantic import BaseModel
@ -23,11 +24,15 @@ def skip_if_model_doesnt_support_completion(client_with_models, model_id):
provider_id = models[model_id].provider_id
providers = {p.provider_id: p for p in client_with_models.providers.list()}
provider = providers[provider_id]
if provider.provider_type in (
"remote::openai",
"remote::anthropic",
"remote::gemini",
"remote::groq",
if (
provider.provider_type
in (
"remote::openai",
"remote::anthropic",
"remote::gemini",
"remote::groq",
)
or "openai-compat" in provider.provider_type
):
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support completion")
@ -514,7 +519,7 @@ def test_text_chat_completion_tool_calling_tools_not_in_request(
)
def test_text_chat_completion_with_multi_turn_tool_calling(client_with_models, text_model_id, test_case):
"""This test tests the model's tool calling loop in various scenarios"""
if "llama-4" not in text_model_id.lower():
if "llama-4" not in text_model_id.lower() and "llama4" not in text_model_id.lower():
pytest.xfail("Not tested for non-llama4 models yet")
tc = TestCase(test_case)
@ -545,7 +550,7 @@ def test_text_chat_completion_with_multi_turn_tool_calling(client_with_models, t
)
op_msg = response.completion_message
messages.append(op_msg.model_dump())
# pprint(op_msg)
# print(op_msg)
assert op_msg.role == "assistant"
expected = tc["expected"].pop(0)
@ -568,3 +573,6 @@ def test_text_chat_completion_with_multi_turn_tool_calling(client_with_models, t
actual_answer = op_msg.content.lower()
# pprint(actual_answer)
assert expected["answer"] in actual_answer
# sleep to avoid rate limit
sleep(1)