From 958225a44c1fa37cfbf833ecefb4a126c89ccdcf Mon Sep 17 00:00:00 2001 From: Sixian Yi Date: Sun, 12 Jan 2025 22:00:04 -0800 Subject: [PATCH] update report format --- llama_stack/providers/tests/report.py | 158 ++++++++++++++++---------- 1 file changed, 95 insertions(+), 63 deletions(-) diff --git a/llama_stack/providers/tests/report.py b/llama_stack/providers/tests/report.py index 33d276b28..d0e55e127 100644 --- a/llama_stack/providers/tests/report.py +++ b/llama_stack/providers/tests/report.py @@ -10,6 +10,7 @@ from pathlib import Path import pytest from llama_models.datatypes import CoreModelId +from llama_models.sku_list import all_registered_models from pytest_html.basereport import _process_outcome @@ -17,56 +18,61 @@ from pytest_html.basereport import _process_outcome INFERNECE_APIS = ["chat_completion"] FUNCTIONALITIES = ["streaming", "structured_output", "tool_calling"] SUPPORTED_MODELS = { - "ollama": [ - CoreModelId.llama3_1_8b_instruct.value, - CoreModelId.llama3_1_8b_instruct.value, - CoreModelId.llama3_1_70b_instruct.value, - CoreModelId.llama3_1_70b_instruct.value, - CoreModelId.llama3_1_405b_instruct.value, - CoreModelId.llama3_1_405b_instruct.value, - CoreModelId.llama3_2_1b_instruct.value, - CoreModelId.llama3_2_1b_instruct.value, - CoreModelId.llama3_2_3b_instruct.value, - CoreModelId.llama3_2_3b_instruct.value, - CoreModelId.llama3_2_11b_vision_instruct.value, - CoreModelId.llama3_2_11b_vision_instruct.value, - CoreModelId.llama3_2_90b_vision_instruct.value, - CoreModelId.llama3_2_90b_vision_instruct.value, - CoreModelId.llama3_3_70b_instruct.value, - CoreModelId.llama_guard_3_8b.value, - CoreModelId.llama_guard_3_1b.value, - ], - "fireworks": [ - CoreModelId.llama3_1_8b_instruct.value, - CoreModelId.llama3_1_70b_instruct.value, - CoreModelId.llama3_1_405b_instruct.value, - CoreModelId.llama3_2_1b_instruct.value, - CoreModelId.llama3_2_3b_instruct.value, - CoreModelId.llama3_2_11b_vision_instruct.value, - CoreModelId.llama3_2_90b_vision_instruct.value, - CoreModelId.llama3_3_70b_instruct.value, - CoreModelId.llama_guard_3_8b.value, - CoreModelId.llama_guard_3_11b_vision.value, - ], - "together": [ - CoreModelId.llama3_1_8b_instruct.value, - CoreModelId.llama3_1_70b_instruct.value, - CoreModelId.llama3_1_405b_instruct.value, - CoreModelId.llama3_2_3b_instruct.value, - CoreModelId.llama3_2_11b_vision_instruct.value, - CoreModelId.llama3_2_90b_vision_instruct.value, - CoreModelId.llama3_3_70b_instruct.value, - CoreModelId.llama_guard_3_8b.value, - CoreModelId.llama_guard_3_11b_vision.value, - ], + "ollama": set( + [ + CoreModelId.llama3_1_8b_instruct.value, + CoreModelId.llama3_1_8b_instruct.value, + CoreModelId.llama3_1_70b_instruct.value, + CoreModelId.llama3_1_70b_instruct.value, + CoreModelId.llama3_1_405b_instruct.value, + CoreModelId.llama3_1_405b_instruct.value, + CoreModelId.llama3_2_1b_instruct.value, + CoreModelId.llama3_2_1b_instruct.value, + CoreModelId.llama3_2_3b_instruct.value, + CoreModelId.llama3_2_3b_instruct.value, + CoreModelId.llama3_2_11b_vision_instruct.value, + CoreModelId.llama3_2_11b_vision_instruct.value, + CoreModelId.llama3_2_90b_vision_instruct.value, + CoreModelId.llama3_2_90b_vision_instruct.value, + CoreModelId.llama3_3_70b_instruct.value, + CoreModelId.llama_guard_3_8b.value, + CoreModelId.llama_guard_3_1b.value, + ] + ), + "fireworks": set( + [ + CoreModelId.llama3_1_8b_instruct.value, + CoreModelId.llama3_1_70b_instruct.value, + CoreModelId.llama3_1_405b_instruct.value, + CoreModelId.llama3_2_1b_instruct.value, + CoreModelId.llama3_2_3b_instruct.value, + CoreModelId.llama3_2_11b_vision_instruct.value, + CoreModelId.llama3_2_90b_vision_instruct.value, + CoreModelId.llama3_3_70b_instruct.value, + CoreModelId.llama_guard_3_8b.value, + CoreModelId.llama_guard_3_11b_vision.value, + ] + ), + "together": set( + [ + CoreModelId.llama3_1_8b_instruct.value, + CoreModelId.llama3_1_70b_instruct.value, + CoreModelId.llama3_1_405b_instruct.value, + CoreModelId.llama3_2_3b_instruct.value, + CoreModelId.llama3_2_11b_vision_instruct.value, + CoreModelId.llama3_2_90b_vision_instruct.value, + CoreModelId.llama3_3_70b_instruct.value, + CoreModelId.llama_guard_3_8b.value, + CoreModelId.llama_guard_3_11b_vision.value, + ] + ), } class Report: def __init__(self, _config): - self.report_data = defaultdict(dict) - self.test_data = dict() + self.test_data = defaultdict(dict) self.inference_tests = defaultdict(dict) @pytest.hookimpl(tryfirst=True) @@ -89,25 +95,51 @@ class Report: report = [] report.append("# Llama Stack Integration Test Results Report") report.append("\n## Summary") - report.append("\n### Inference Providers:") + report.append("\n## Supported Models: ") - for provider, models in SUPPORTED_MODELS.items(): - report.append(f"\n#### {provider}") - report.append("\n - **Supported models:**") - report.extend([f" - {model}" for model in models]) + header = "| Model Descriptor |" + dividor = "|:---|" + for k in SUPPORTED_MODELS.keys(): + header += f"{k} |" + dividor += ":---:|" + + report.append(header) + report.append(dividor) + + rows = [] + for model in all_registered_models(): + row = f"| {model.core_model_id.value} |" + for k in SUPPORTED_MODELS.keys(): + if model.core_model_id.value in SUPPORTED_MODELS[k]: + row += " ✅ |" + else: + row += " ❌ |" + rows.append(row) + report.extend(rows) + + report.append("\n### Tests:") + + for provider in SUPPORTED_MODELS.items(): if provider not in self.inference_tests: continue - report.append("\n - **APIs:**") + test_table = [ + "| Area | Model | API / Functionality | Test name | Test Result |", + "|:-----|:-----|:-----|:-----|:-----|", + ] for api in INFERNECE_APIS: - test_nodeids = self.inference_tests[provider][api] - report.append(f"\n - /{api}:") - report.extend(self._generate_test_result_short(test_nodeids)) + tests = self.inference_tests[provider][api] - report.append("\n - **Functionality:**") - for functionality in FUNCTIONALITIES: - test_nodeids = self.inference_tests[provider][functionality] - report.append(f"\n - {functionality}:") - report.extend(self._generate_test_result_short(test_nodeids)) + # report.append("\n - **APIs:**") + # for api in INFERNECE_APIS: + # test_nodeids = self.inference_tests[provider][api] + # report.append(f"\n - /{api}:") + # report.extend(self._generate_test_result_short(test_nodeids)) + + # report.append("\n - **Functionality:**") + # for functionality in FUNCTIONALITIES: + # test_nodeids = self.inference_tests[provider][functionality] + # report.append(f"\n - {functionality}:") + # report.extend(self._generate_test_result_short(test_nodeids)) output_file = Path("pytest_report.md") output_file.write_text("\n".join(report)) @@ -120,13 +152,13 @@ class Report: if "inference" in item.nodeid: api, functionality = self._process_function_name(item.nodeid) api_tests = self.inference_tests[inference].get(api, set()) - functionality_tests = self.inference_tests[inference].get( - functionality, set() - ) + # functionality_tests = self.inference_tests[inference].get( + # functionality, set() + # ) api_tests.add(item.nodeid) - functionality_tests.add(item.nodeid) + # functionality_tests.add(item.nodeid) self.inference_tests[inference][api] = api_tests - self.inference_tests[inference][functionality] = functionality_tests + # self.inference_tests[inference][functionality] = functionality_tests def _process_function_name(self, function_name): api, functionality = None, None