forked from phoenix-oss/llama-stack-mirror
feat(verification): various improvements (#1921)
# What does this PR do? - provider and their models now live in config.yaml - better distinguish different cases within a test - add model key to surface provider's model_id - include example command to rerun single test case ## Test Plan <img width="1173" alt="image" src="https://github.com/user-attachments/assets/b414baf0-c768-451f-8c3b-c2905cf36fac" />
This commit is contained in:
parent
09a83b1ec1
commit
14146e4b3f
22 changed files with 4449 additions and 8810 deletions
|
@ -4,6 +4,10 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import re
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def pytest_addoption(parser):
|
||||
parser.addoption(
|
||||
|
@ -14,7 +18,7 @@ def pytest_addoption(parser):
|
|||
parser.addoption(
|
||||
"--api-key",
|
||||
action="store",
|
||||
help="API key",
|
||||
help="API key to use for the provider",
|
||||
)
|
||||
parser.addoption(
|
||||
"--provider",
|
||||
|
@ -24,5 +28,64 @@ def pytest_addoption(parser):
|
|||
|
||||
|
||||
pytest_plugins = [
|
||||
"tests.verifications.openai.fixtures.fixtures",
|
||||
"pytest_jsonreport",
|
||||
"tests.verifications.openai_api.fixtures.fixtures",
|
||||
"tests.verifications.openai_api.fixtures.load",
|
||||
]
|
||||
|
||||
|
||||
@pytest.hookimpl(optionalhook=True)
|
||||
def pytest_json_runtest_metadata(item, call):
|
||||
"""Add model and case_id to pytest-json report metadata."""
|
||||
metadata = {}
|
||||
nodeid = item.nodeid
|
||||
|
||||
# 1. Extract model from callspec if available
|
||||
model = item.callspec.params.get("model") if hasattr(item, "callspec") else None
|
||||
if model:
|
||||
metadata["model"] = model
|
||||
else:
|
||||
# Fallback: Try parsing from nodeid (less reliable)
|
||||
match_model = re.search(r"\[(.*?)-", nodeid)
|
||||
if match_model:
|
||||
model = match_model.group(1) # Store model even if found via fallback
|
||||
metadata["model"] = model
|
||||
else:
|
||||
print(f"Warning: Could not determine model for test {nodeid}")
|
||||
model = None # Ensure model is None if not found
|
||||
|
||||
# 2. Extract case_id using the known model string if possible
|
||||
if model:
|
||||
# Construct a regex pattern to find the case_id *after* the model name and a hyphen.
|
||||
# Escape the model name in case it contains regex special characters.
|
||||
pattern = re.escape(model) + r"-(.*?)\]$"
|
||||
match_case = re.search(pattern, nodeid)
|
||||
if match_case:
|
||||
case_id = match_case.group(1)
|
||||
metadata["case_id"] = case_id
|
||||
else:
|
||||
# Fallback if the pattern didn't match (e.g., nodeid format unexpected)
|
||||
# Try the old less specific regex as a last resort.
|
||||
match_case_fallback = re.search(r"-(.*?)\]$", nodeid)
|
||||
if match_case_fallback:
|
||||
case_id = match_case_fallback.group(1)
|
||||
metadata["case_id"] = case_id
|
||||
print(f"Warning: Used fallback regex to parse case_id from nodeid {nodeid}")
|
||||
else:
|
||||
print(f"Warning: Could not parse case_id from nodeid {nodeid} even with fallback.")
|
||||
if "case" in (item.callspec.params if hasattr(item, "callspec") else {}):
|
||||
metadata["case_id"] = "parsing_failed"
|
||||
elif "case" in (item.callspec.params if hasattr(item, "callspec") else {}):
|
||||
# Cannot reliably parse case_id without model, but we know it's a case test.
|
||||
# Try the generic fallback regex.
|
||||
match_case_fallback = re.search(r"-(.*?)\]$", nodeid)
|
||||
if match_case_fallback:
|
||||
case_id = match_case_fallback.group(1)
|
||||
metadata["case_id"] = case_id
|
||||
print(f"Warning: Used fallback regex to parse case_id from nodeid {nodeid} (model unknown)")
|
||||
else:
|
||||
print(f"Warning: Could not parse case_id from nodeid {nodeid} (model unknown)")
|
||||
metadata["case_id"] = "parsing_failed_no_model"
|
||||
# else: Not a test with a model or case param we need to handle.
|
||||
|
||||
return metadata
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue