ci test report

2026-01-07 04:39:59 +00:00 · 2025-01-09 00:35:18 -08:00 · 2025-01-09 00:35:18 -08:00 · 934ecdc401
commit 934ecdc401
parent 6b466389b5
2 changed files with 165 additions and 0 deletions
--- a/llama_stack/providers/tests/conftest.py
+++ b/llama_stack/providers/tests/conftest.py
@ -22,6 +22,7 @@ from termcolor import colored
 from .env import get_env_or_fail
 from .test_config_helper import try_load_config_file_cached
 from .report import Report
 class ProviderFixture(BaseModel):
@ -141,6 +142,8 @@ def pytest_configure(config):
        key, value = env_var.split("=", 1)
        os.environ[key] = value
    config.pluginmanager.register(Report(config))
 def pytest_addoption(parser):
    parser.addoption(
--- a/llama_stack/providers/tests/report.py
+++ b/llama_stack/providers/tests/report.py
@ -0,0 +1,162 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from collections import defaultdict
 import pytest
 from llama_models.datatypes import CoreModelId
 from pytest_html.basereport import _process_outcome
 INFERNECE_APIS = ["chat_completion"]
 FUNCTIONALITIES = ["streaming", "structured_output", "tool_calling"]
 SUPPORTED_MODELS = {
    "ollama": [
        CoreModelId.llama3_1_8b_instruct.value,
        CoreModelId.llama3_1_8b_instruct.value,
        CoreModelId.llama3_1_70b_instruct.value,
        CoreModelId.llama3_1_70b_instruct.value,
        CoreModelId.llama3_1_405b_instruct.value,
        CoreModelId.llama3_1_405b_instruct.value,
        CoreModelId.llama3_2_1b_instruct.value,
        CoreModelId.llama3_2_1b_instruct.value,
        CoreModelId.llama3_2_3b_instruct.value,
        CoreModelId.llama3_2_3b_instruct.value,
        CoreModelId.llama3_2_11b_vision_instruct.value,
        CoreModelId.llama3_2_11b_vision_instruct.value,
        CoreModelId.llama3_2_90b_vision_instruct.value,
        CoreModelId.llama3_2_90b_vision_instruct.value,
        CoreModelId.llama3_3_70b_instruct.value,
        CoreModelId.llama_guard_3_8b.value,
        CoreModelId.llama_guard_3_1b.value,
    ],
    "fireworks": [
        CoreModelId.llama3_1_8b_instruct.value,
        CoreModelId.llama3_1_70b_instruct.value,
        CoreModelId.llama3_1_405b_instruct.value,
        CoreModelId.llama3_2_1b_instruct.value,
        CoreModelId.llama3_2_3b_instruct.value,
        CoreModelId.llama3_2_11b_vision_instruct.value,
        CoreModelId.llama3_2_90b_vision_instruct.value,
        CoreModelId.llama3_3_70b_instruct.value,
        CoreModelId.llama_guard_3_8b.value,
        CoreModelId.llama_guard_3_11b_vision.value,
    ],
    "together": [
        CoreModelId.llama3_1_8b_instruct.value,
        CoreModelId.llama3_1_70b_instruct.value,
        CoreModelId.llama3_1_405b_instruct.value,
        CoreModelId.llama3_2_3b_instruct.value,
        CoreModelId.llama3_2_11b_vision_instruct.value,
        CoreModelId.llama3_2_90b_vision_instruct.value,
        CoreModelId.llama3_3_70b_instruct.value,
        CoreModelId.llama_guard_3_8b.value,
        CoreModelId.llama_guard_3_11b_vision.value,
    ],
 }
 class Report:
    def __init__(self, _config):
        self.report_data = defaultdict(dict)
        self.test_data = dict()
    @pytest.hookimpl(tryfirst=True)
    def pytest_runtest_logreport(self, report):
        # This hook is called in several phases, including setup, call and teardown
        # The test is considered failed / error if any of the outcomes is not "Passed"
        outcome = _process_outcome(report)
        if report.nodeid not in self.test_data:
            self.test_data[report.nodeid] = outcome
        elif self.test_data[report.nodeid] != outcome and outcome != "Passed":
            self.test_data[report.nodeid] = outcome
    def pytest_html_results_summary(self, prefix, summary, postfix):
        prefix.append("<h3> Inference Providers: </h3>")
        for provider in self.report_data.keys():
            prefix.extend(
                [
                    f"<h4> { provider } </h4>",
                    "<ul>",
                    "<li><b> Supported models: </b></li>",
                ]
            )
            supported_models = (
                ["<ul>"]
                + [f"<li> {model} </li>" for model in SUPPORTED_MODELS[provider]]
                + ["</ul>"]
            )
            prefix.extend(supported_models)
            api_section = ["<li><h4> APIs: </h4></li>", "<ul>"]
            for api in INFERNECE_APIS:
                tests = self.report_data[provider].get(api, set())
                api_section.append(f"<li> {api} </li>")
                api_section.append("<ul>")
                for test in tests:
                    result = self.test_data[test]
                    api_section.append(
                        f"<li> test: {test} {self._print_result_icon(result) }  </li>"
                    )
                api_section.append("</ul>")
            api_section.append("</ul>")
            prefix.extend(api_section)
            prefix.append("<li><h4> Model capabilities: </h4> </li>")
            prefix.append("<ul>")
            for functionality in FUNCTIONALITIES:
                tests = self.report_data[provider].get(functionality, set())
                prefix.append(f"<li> <b>{functionality}</b>  </li>")
                prefix.append("<ul>")
                for test in tests:
                    result = self.test_data[test]
                    prefix.append(
                        f"<li> tests: {test} { self._print_result_icon(result) } </li>"
                    )
                prefix.append("</ul>")
            prefix.append("</ul>")
            prefix.append("</ul>")
    @pytest.hookimpl(tryfirst=True)
    def pytest_runtest_makereport(self, item, call):
        if call.when != "setup":
            return
        # generate the mapping from provider, api/functionality to test nodeid
        provider = item.callspec.params.get("inference_stack")
        if provider is not None:
            api, functionality = self._process_function_name(item.name.split("[")[0])
            api_test_funcs = self.report_data[provider].get(api, set())
            functionality_test_funcs = self.report_data[provider].get(
                functionality, set()
            )
            api_test_funcs.add(item.nodeid)
            functionality_test_funcs.add(item.nodeid)
            self.report_data[provider][api] = api_test_funcs
            self.report_data[provider][functionality] = functionality_test_funcs
    def _process_function_name(self, function_name):
        api, functionality = None, None
        for val in INFERNECE_APIS:
            if val in function_name:
                api = val
        for val in FUNCTIONALITIES:
            if val in function_name:
                functionality = val
        return api, functionality
    def _print_result_icon(self, result):
        if result == "Passed":
            return "&#x2705;"
        else:
            #  result == "Failed" or result == "Error":
            return "&#x274C;"