mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-27 18:50:41 +00:00
test: verification on provider's OAI endpoints (#1893)
# What does this PR do? ## Test Plan export MODEL=accounts/fireworks/models/llama4-scout-instruct-basic; LLAMA_STACK_CONFIG=verification pytest -s -v tests/integration/inference --vision-model $MODEL --text-model $MODEL
This commit is contained in:
parent
530d4bdfe1
commit
7b4eb0967e
43 changed files with 1683 additions and 17 deletions
|
@ -6,6 +6,7 @@
|
|||
|
||||
|
||||
import os
|
||||
from time import sleep
|
||||
|
||||
import pytest
|
||||
from pydantic import BaseModel
|
||||
|
@ -23,11 +24,15 @@ def skip_if_model_doesnt_support_completion(client_with_models, model_id):
|
|||
provider_id = models[model_id].provider_id
|
||||
providers = {p.provider_id: p for p in client_with_models.providers.list()}
|
||||
provider = providers[provider_id]
|
||||
if provider.provider_type in (
|
||||
"remote::openai",
|
||||
"remote::anthropic",
|
||||
"remote::gemini",
|
||||
"remote::groq",
|
||||
if (
|
||||
provider.provider_type
|
||||
in (
|
||||
"remote::openai",
|
||||
"remote::anthropic",
|
||||
"remote::gemini",
|
||||
"remote::groq",
|
||||
)
|
||||
or "openai-compat" in provider.provider_type
|
||||
):
|
||||
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support completion")
|
||||
|
||||
|
@ -514,7 +519,7 @@ def test_text_chat_completion_tool_calling_tools_not_in_request(
|
|||
)
|
||||
def test_text_chat_completion_with_multi_turn_tool_calling(client_with_models, text_model_id, test_case):
|
||||
"""This test tests the model's tool calling loop in various scenarios"""
|
||||
if "llama-4" not in text_model_id.lower():
|
||||
if "llama-4" not in text_model_id.lower() and "llama4" not in text_model_id.lower():
|
||||
pytest.xfail("Not tested for non-llama4 models yet")
|
||||
|
||||
tc = TestCase(test_case)
|
||||
|
@ -545,7 +550,7 @@ def test_text_chat_completion_with_multi_turn_tool_calling(client_with_models, t
|
|||
)
|
||||
op_msg = response.completion_message
|
||||
messages.append(op_msg.model_dump())
|
||||
# pprint(op_msg)
|
||||
# print(op_msg)
|
||||
|
||||
assert op_msg.role == "assistant"
|
||||
expected = tc["expected"].pop(0)
|
||||
|
@ -568,3 +573,6 @@ def test_text_chat_completion_with_multi_turn_tool_calling(client_with_models, t
|
|||
actual_answer = op_msg.content.lower()
|
||||
# pprint(actual_answer)
|
||||
assert expected["answer"] in actual_answer
|
||||
|
||||
# sleep to avoid rate limit
|
||||
sleep(1)
|
||||
|
|
|
@ -76,8 +76,9 @@ def multi_image_data():
|
|||
|
||||
@pytest.mark.parametrize("stream", [True, False])
|
||||
def test_image_chat_completion_multiple_images(client_with_models, vision_model_id, multi_image_data, stream):
|
||||
if "llama-4" not in vision_model_id.lower() and "gpt-4o" not in vision_model_id.lower():
|
||||
pytest.skip("Skip for non-llama4, gpt4o models")
|
||||
supported_models = ["llama-4", "gpt-4o", "llama4"]
|
||||
if not any(model in vision_model_id.lower() for model in supported_models):
|
||||
pytest.skip(f"Skip for non-supported model: {vision_model_id}")
|
||||
|
||||
messages = [
|
||||
{
|
||||
|
|
|
@ -220,7 +220,7 @@
|
|||
"description": "Availability status of the product."
|
||||
},
|
||||
"tags": {
|
||||
"param_type": "list",
|
||||
"param_type": "list[str]",
|
||||
"description": "List of product tags"
|
||||
}
|
||||
}
|
||||
|
@ -294,7 +294,7 @@
|
|||
"description": "Location of the event"
|
||||
},
|
||||
"participants": {
|
||||
"param_type": "list",
|
||||
"param_type": "list[str]",
|
||||
"description": "List of participant names"
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue