test: verification on provider's OAI endpoints (#1893)

# What does this PR do?


## Test Plan
export MODEL=accounts/fireworks/models/llama4-scout-instruct-basic;
LLAMA_STACK_CONFIG=verification pytest -s -v tests/integration/inference
--vision-model $MODEL --text-model $MODEL
This commit is contained in:
ehhuang 2025-04-07 23:06:28 -07:00 committed by GitHub
parent 530d4bdfe1
commit 7b4eb0967e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
43 changed files with 1683 additions and 17 deletions

View file

@ -6,6 +6,7 @@
import os
from time import sleep
import pytest
from pydantic import BaseModel
@ -23,11 +24,15 @@ def skip_if_model_doesnt_support_completion(client_with_models, model_id):
provider_id = models[model_id].provider_id
providers = {p.provider_id: p for p in client_with_models.providers.list()}
provider = providers[provider_id]
if provider.provider_type in (
"remote::openai",
"remote::anthropic",
"remote::gemini",
"remote::groq",
if (
provider.provider_type
in (
"remote::openai",
"remote::anthropic",
"remote::gemini",
"remote::groq",
)
or "openai-compat" in provider.provider_type
):
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support completion")
@ -514,7 +519,7 @@ def test_text_chat_completion_tool_calling_tools_not_in_request(
)
def test_text_chat_completion_with_multi_turn_tool_calling(client_with_models, text_model_id, test_case):
"""This test tests the model's tool calling loop in various scenarios"""
if "llama-4" not in text_model_id.lower():
if "llama-4" not in text_model_id.lower() and "llama4" not in text_model_id.lower():
pytest.xfail("Not tested for non-llama4 models yet")
tc = TestCase(test_case)
@ -545,7 +550,7 @@ def test_text_chat_completion_with_multi_turn_tool_calling(client_with_models, t
)
op_msg = response.completion_message
messages.append(op_msg.model_dump())
# pprint(op_msg)
# print(op_msg)
assert op_msg.role == "assistant"
expected = tc["expected"].pop(0)
@ -568,3 +573,6 @@ def test_text_chat_completion_with_multi_turn_tool_calling(client_with_models, t
actual_answer = op_msg.content.lower()
# pprint(actual_answer)
assert expected["answer"] in actual_answer
# sleep to avoid rate limit
sleep(1)

View file

@ -76,8 +76,9 @@ def multi_image_data():
@pytest.mark.parametrize("stream", [True, False])
def test_image_chat_completion_multiple_images(client_with_models, vision_model_id, multi_image_data, stream):
if "llama-4" not in vision_model_id.lower() and "gpt-4o" not in vision_model_id.lower():
pytest.skip("Skip for non-llama4, gpt4o models")
supported_models = ["llama-4", "gpt-4o", "llama4"]
if not any(model in vision_model_id.lower() for model in supported_models):
pytest.skip(f"Skip for non-supported model: {vision_model_id}")
messages = [
{

View file

@ -220,7 +220,7 @@
"description": "Availability status of the product."
},
"tags": {
"param_type": "list",
"param_type": "list[str]",
"description": "List of product tags"
}
}
@ -294,7 +294,7 @@
"description": "Location of the event"
},
"participants": {
"param_type": "list",
"param_type": "list[str]",
"description": "List of participant names"
}
}