ci test report

This commit is contained in:
Sixian Yi 2025-01-09 00:35:18 -08:00
parent 6b466389b5
commit 934ecdc401
2 changed files with 165 additions and 0 deletions

View file

@ -22,6 +22,7 @@ from termcolor import colored
from .env import get_env_or_fail
from .test_config_helper import try_load_config_file_cached
from .report import Report
class ProviderFixture(BaseModel):
@ -141,6 +142,8 @@ def pytest_configure(config):
key, value = env_var.split("=", 1)
os.environ[key] = value
config.pluginmanager.register(Report(config))
def pytest_addoption(parser):
parser.addoption(

View file

@ -0,0 +1,162 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from collections import defaultdict
import pytest
from llama_models.datatypes import CoreModelId
from pytest_html.basereport import _process_outcome
INFERNECE_APIS = ["chat_completion"]
FUNCTIONALITIES = ["streaming", "structured_output", "tool_calling"]
SUPPORTED_MODELS = {
"ollama": [
CoreModelId.llama3_1_8b_instruct.value,
CoreModelId.llama3_1_8b_instruct.value,
CoreModelId.llama3_1_70b_instruct.value,
CoreModelId.llama3_1_70b_instruct.value,
CoreModelId.llama3_1_405b_instruct.value,
CoreModelId.llama3_1_405b_instruct.value,
CoreModelId.llama3_2_1b_instruct.value,
CoreModelId.llama3_2_1b_instruct.value,
CoreModelId.llama3_2_3b_instruct.value,
CoreModelId.llama3_2_3b_instruct.value,
CoreModelId.llama3_2_11b_vision_instruct.value,
CoreModelId.llama3_2_11b_vision_instruct.value,
CoreModelId.llama3_2_90b_vision_instruct.value,
CoreModelId.llama3_2_90b_vision_instruct.value,
CoreModelId.llama3_3_70b_instruct.value,
CoreModelId.llama_guard_3_8b.value,
CoreModelId.llama_guard_3_1b.value,
],
"fireworks": [
CoreModelId.llama3_1_8b_instruct.value,
CoreModelId.llama3_1_70b_instruct.value,
CoreModelId.llama3_1_405b_instruct.value,
CoreModelId.llama3_2_1b_instruct.value,
CoreModelId.llama3_2_3b_instruct.value,
CoreModelId.llama3_2_11b_vision_instruct.value,
CoreModelId.llama3_2_90b_vision_instruct.value,
CoreModelId.llama3_3_70b_instruct.value,
CoreModelId.llama_guard_3_8b.value,
CoreModelId.llama_guard_3_11b_vision.value,
],
"together": [
CoreModelId.llama3_1_8b_instruct.value,
CoreModelId.llama3_1_70b_instruct.value,
CoreModelId.llama3_1_405b_instruct.value,
CoreModelId.llama3_2_3b_instruct.value,
CoreModelId.llama3_2_11b_vision_instruct.value,
CoreModelId.llama3_2_90b_vision_instruct.value,
CoreModelId.llama3_3_70b_instruct.value,
CoreModelId.llama_guard_3_8b.value,
CoreModelId.llama_guard_3_11b_vision.value,
],
}
class Report:
def __init__(self, _config):
self.report_data = defaultdict(dict)
self.test_data = dict()
@pytest.hookimpl(tryfirst=True)
def pytest_runtest_logreport(self, report):
# This hook is called in several phases, including setup, call and teardown
# The test is considered failed / error if any of the outcomes is not "Passed"
outcome = _process_outcome(report)
if report.nodeid not in self.test_data:
self.test_data[report.nodeid] = outcome
elif self.test_data[report.nodeid] != outcome and outcome != "Passed":
self.test_data[report.nodeid] = outcome
def pytest_html_results_summary(self, prefix, summary, postfix):
prefix.append("<h3> Inference Providers: </h3>")
for provider in self.report_data.keys():
prefix.extend(
[
f"<h4> { provider } </h4>",
"<ul>",
"<li><b> Supported models: </b></li>",
]
)
supported_models = (
["<ul>"]
+ [f"<li> {model} </li>" for model in SUPPORTED_MODELS[provider]]
+ ["</ul>"]
)
prefix.extend(supported_models)
api_section = ["<li><h4> APIs: </h4></li>", "<ul>"]
for api in INFERNECE_APIS:
tests = self.report_data[provider].get(api, set())
api_section.append(f"<li> {api} </li>")
api_section.append("<ul>")
for test in tests:
result = self.test_data[test]
api_section.append(
f"<li> test: {test} {self._print_result_icon(result) } </li>"
)
api_section.append("</ul>")
api_section.append("</ul>")
prefix.extend(api_section)
prefix.append("<li><h4> Model capabilities: </h4> </li>")
prefix.append("<ul>")
for functionality in FUNCTIONALITIES:
tests = self.report_data[provider].get(functionality, set())
prefix.append(f"<li> <b>{functionality}</b> </li>")
prefix.append("<ul>")
for test in tests:
result = self.test_data[test]
prefix.append(
f"<li> tests: {test} { self._print_result_icon(result) } </li>"
)
prefix.append("</ul>")
prefix.append("</ul>")
prefix.append("</ul>")
@pytest.hookimpl(tryfirst=True)
def pytest_runtest_makereport(self, item, call):
if call.when != "setup":
return
# generate the mapping from provider, api/functionality to test nodeid
provider = item.callspec.params.get("inference_stack")
if provider is not None:
api, functionality = self._process_function_name(item.name.split("[")[0])
api_test_funcs = self.report_data[provider].get(api, set())
functionality_test_funcs = self.report_data[provider].get(
functionality, set()
)
api_test_funcs.add(item.nodeid)
functionality_test_funcs.add(item.nodeid)
self.report_data[provider][api] = api_test_funcs
self.report_data[provider][functionality] = functionality_test_funcs
def _process_function_name(self, function_name):
api, functionality = None, None
for val in INFERNECE_APIS:
if val in function_name:
api = val
for val in FUNCTIONALITIES:
if val in function_name:
functionality = val
return api, functionality
def _print_result_icon(self, result):
if result == "Passed":
return "&#x2705;"
else:
# result == "Failed" or result == "Error":
return "&#x274C;"