forked from phoenix-oss/llama-stack-mirror
test: verification on provider's OAI endpoints (#1893)
# What does this PR do? ## Test Plan export MODEL=accounts/fireworks/models/llama4-scout-instruct-basic; LLAMA_STACK_CONFIG=verification pytest -s -v tests/integration/inference --vision-model $MODEL --text-model $MODEL
This commit is contained in:
parent
530d4bdfe1
commit
7b4eb0967e
43 changed files with 1683 additions and 17 deletions
|
@ -6,6 +6,7 @@
|
|||
|
||||
|
||||
import os
|
||||
from time import sleep
|
||||
|
||||
import pytest
|
||||
from pydantic import BaseModel
|
||||
|
@ -23,11 +24,15 @@ def skip_if_model_doesnt_support_completion(client_with_models, model_id):
|
|||
provider_id = models[model_id].provider_id
|
||||
providers = {p.provider_id: p for p in client_with_models.providers.list()}
|
||||
provider = providers[provider_id]
|
||||
if provider.provider_type in (
|
||||
"remote::openai",
|
||||
"remote::anthropic",
|
||||
"remote::gemini",
|
||||
"remote::groq",
|
||||
if (
|
||||
provider.provider_type
|
||||
in (
|
||||
"remote::openai",
|
||||
"remote::anthropic",
|
||||
"remote::gemini",
|
||||
"remote::groq",
|
||||
)
|
||||
or "openai-compat" in provider.provider_type
|
||||
):
|
||||
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support completion")
|
||||
|
||||
|
@ -514,7 +519,7 @@ def test_text_chat_completion_tool_calling_tools_not_in_request(
|
|||
)
|
||||
def test_text_chat_completion_with_multi_turn_tool_calling(client_with_models, text_model_id, test_case):
|
||||
"""This test tests the model's tool calling loop in various scenarios"""
|
||||
if "llama-4" not in text_model_id.lower():
|
||||
if "llama-4" not in text_model_id.lower() and "llama4" not in text_model_id.lower():
|
||||
pytest.xfail("Not tested for non-llama4 models yet")
|
||||
|
||||
tc = TestCase(test_case)
|
||||
|
@ -545,7 +550,7 @@ def test_text_chat_completion_with_multi_turn_tool_calling(client_with_models, t
|
|||
)
|
||||
op_msg = response.completion_message
|
||||
messages.append(op_msg.model_dump())
|
||||
# pprint(op_msg)
|
||||
# print(op_msg)
|
||||
|
||||
assert op_msg.role == "assistant"
|
||||
expected = tc["expected"].pop(0)
|
||||
|
@ -568,3 +573,6 @@ def test_text_chat_completion_with_multi_turn_tool_calling(client_with_models, t
|
|||
actual_answer = op_msg.content.lower()
|
||||
# pprint(actual_answer)
|
||||
assert expected["answer"] in actual_answer
|
||||
|
||||
# sleep to avoid rate limit
|
||||
sleep(1)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue