From e9f49a1eddacb5a0e46b3126ea8826e63ea401f2 Mon Sep 17 00:00:00 2001 From: Sixian Yi Date: Tue, 21 Jan 2025 18:50:08 -0800 Subject: [PATCH] address comment --- llama_stack/templates/fireworks/report.md | 34 +++++-------- tests/client-sdk/metadata.py | 10 ++-- tests/client-sdk/report.py | 60 ++++++++++------------- 3 files changed, 43 insertions(+), 61 deletions(-) diff --git a/llama_stack/templates/fireworks/report.md b/llama_stack/templates/fireworks/report.md index 568535437..5ca65c62e 100644 --- a/llama_stack/templates/fireworks/report.md +++ b/llama_stack/templates/fireworks/report.md @@ -8,46 +8,38 @@ | Llama3.1-8B-Instruct | ✅ | | Llama3.1-70B-Instruct | ✅ | | Llama3.1-405B-Instruct | ✅ | -| Llama3.1-405B-Instruct | ✅ | -| Llama3.1-405B-Instruct | ✅ | | Llama3.2-1B-Instruct | ✅ | | Llama3.2-3B-Instruct | ✅ | -| Llama3.2-1B-Instruct | ✅ | -| Llama3.2-1B-Instruct | ✅ | -| Llama3.2-3B-Instruct | ✅ | -| Llama3.2-3B-Instruct | ✅ | | Llama3.2-11B-Vision-Instruct | ✅ | | Llama3.2-90B-Vision-Instruct | ✅ | | Llama3.3-70B-Instruct | ✅ | | Llama-Guard-3-11B-Vision | ✅ | | Llama-Guard-3-1B | ❌ | -| Llama-Guard-3-1B | ❌ | -| Llama-Guard-3-8B | ✅ | | Llama-Guard-3-8B | ✅ | | Llama-Guard-2-8B | ❌ | ## Inference: | Model | API | Capability | Test | Status | |:----- |:-----|:-----|:-----|:-----| -| Text | /chat_completion | streaming | test_text_chat_completion_streaming | Passed | +| Text | /chat_completion | streaming | test_text_chat_completion_streaming | ✅ | | Vision | /chat_completion | streaming | test_image_chat_completion_streaming | Passed | -| Text | /chat_completion | non_streaming | test_text_chat_completion_non_streaming | Passed | +| Text | /chat_completion | non_streaming | test_text_chat_completion_non_streaming | ✅ | | Vision | /chat_completion | non_streaming | test_image_chat_completion_non_streaming | Passed | -| Text | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_streaming | Passed | -| Text | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_non_streaming | Passed | -| Text | /completion | streaming | test_text_completion_streaming | Passed | -| Text | /completion | non_streaming | test_text_completion_non_streaming | Passed | -| Text | /completion | structured_output | test_text_completion_structured_output | Passed | +| Text | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_streaming | ✅ | +| Text | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_non_streaming | ✅ | +| Text | /completion | streaming | test_text_completion_streaming | ✅ | +| Text | /completion | non_streaming | test_text_completion_non_streaming | ✅ | +| Text | /completion | structured_output | test_text_completion_structured_output | ✅ | ## Memory: | API | Capability | Test | Status | |:-----|:-----|:-----|:-----| -| insert and query | inline | test_memory_bank_insert_inline_and_query | Error | -| insert and query | url | test_memory_bank_insert_from_url_and_query | Failed | +| /insert, /query | inline | test_memory_bank_insert_inline_and_query | ❌ | +| /insert, /query | url | test_memory_bank_insert_from_url_and_query | ❌ | -## Agent: +## Agents: | API | Capability | Test | Status | |:-----|:-----|:-----|:-----| -| create_agent_turn | rag | test_rag_agent | Failed | -| create_agent_turn | custom_tool | test_custom_tool | Passed | -| create_agent_turn | code_execution | test_code_execution | Failed | +| create_agent_turn | rag | test_rag_agent | ❌ | +| create_agent_turn | custom_tool | test_custom_tool | ✅ | +| create_agent_turn | code_execution | test_code_execution | ❌ | diff --git a/tests/client-sdk/metadata.py b/tests/client-sdk/metadata.py index 667150578..d8d6616c2 100644 --- a/tests/client-sdk/metadata.py +++ b/tests/client-sdk/metadata.py @@ -27,14 +27,14 @@ INFERENCE_API_CAPA_TEST_MAP = { }, } -MEMORY_API_CAPA_TEST_MAP = { +MEMORY_API_TEST_MAP = { "/insert, /query": { "inline": ["test_memory_bank_insert_inline_and_query"], "url": ["test_memory_bank_insert_from_url_and_query"], } } -AGENTS_API_CAPA_TEST_MAP = { +AGENTS_API_TEST_MAP = { "create_agent_turn": { "rag": ["test_rag_agent"], "custom_tool": ["test_custom_tool"], @@ -43,8 +43,8 @@ AGENTS_API_CAPA_TEST_MAP = { } -API_CAPA_MAPS = { +API_MAPS = { "inference": INFERENCE_API_CAPA_TEST_MAP, - "memory": MEMORY_API_CAPA_TEST_MAP, - "agents": AGENTS_API_CAPA_TEST_MAP, + "memory": MEMORY_API_TEST_MAP, + "agents": AGENTS_API_TEST_MAP, } diff --git a/tests/client-sdk/report.py b/tests/client-sdk/report.py index 7a675372b..a2ff07e4f 100644 --- a/tests/client-sdk/report.py +++ b/tests/client-sdk/report.py @@ -14,7 +14,7 @@ from llama_models.datatypes import CoreModelId from llama_models.sku_list import all_registered_models from llama_stack.distribution.library_client import LlamaStackAsLibraryClient -from metadata import API_CAPA_MAPS +from metadata import API_MAPS from pytest import CollectReport @@ -118,7 +118,7 @@ class Report: if ( "Instruct" not in model.core_model_id.value and "Guard" not in model.core_model_id.value - ): + ) or (model.variant): continue row = f"| {model.core_model_id.value} |" if model.core_model_id.value in SUPPORTED_MODELS[self.image_name]: @@ -133,7 +133,7 @@ class Report: "| Model | API | Capability | Test | Status |", "|:----- |:-----|:-----|:-----|:-----|", ] - for api, capa_map in API_CAPA_MAPS["inference"].items(): + for api, capa_map in API_MAPS["inference"].items(): for capa, tests in capa_map.items(): vision_tests = filter(lambda test_name: "image" in test_name, tests) text_tests = filter(lambda test_name: "text" in test_name, tests) @@ -145,7 +145,7 @@ class Report: # the result of the first one for now. Ideally we should mark the test as failed if # any of the parametrizations failed. test_table.append( - f"| Text | /{api} | {capa} | {test_name} | {self.test_data[test_nodeids[0]]} |" + f"| Text | /{api} | {capa} | {test_name} | {self._print_result_icon(self.test_data[test_nodeids[0]])} |" ) for test_name in vision_tests: @@ -157,34 +157,22 @@ class Report: report.extend(test_table) - report.append("\n## Memory: ") - test_table = [ - "| API | Capability | Test | Status |", - "|:-----|:-----|:-----|:-----|", - ] - for api, capa_map in API_CAPA_MAPS["memory"].items(): - for capa, tests in capa_map.items(): - for test_name in tests: - test_nodeids = self.test_name_to_nodeid[test_name] - assert len(test_nodeids) > 0 - test_table.append( - f"| {api} | {capa} | {test_name} | {self.test_data[test_nodeids[0]]} |" - ) - report.extend(test_table) - report.append("\n## Agent: ") - test_table = [ - "| API | Capability | Test | Status |", - "|:-----|:-----|:-----|:-----|", - ] - for api, capa_map in API_CAPA_MAPS["agents"].items(): - for capa, tests in capa_map.items(): - for test_name in tests: - test_nodeids = self.test_name_to_nodeid[test_name] - assert len(test_nodeids) > 0 - test_table.append( - f"| /{api} | {capa} | {test_name} | {self.test_data[test_nodeids[0]]} |" - ) - report.extend(test_table) + for api_group in ["memory", "agents"]: + api_capitalized = api_group.capitalize() + report.append(f"\n## {api_capitalized}: ") + test_table = [ + "| API | Capability | Test | Status |", + "|:-----|:-----|:-----|:-----|", + ] + for api, capa_map in API_MAPS[api_group].items(): + for capa, tests in capa_map.items(): + for test_name in tests: + test_nodeids = self.test_name_to_nodeid[test_name] + assert len(test_nodeids) > 0 + test_table.append( + f"| {api} | {capa} | {test_name} | {self._print_result_icon(self.test_data[test_nodeids[0]])} |" + ) + report.extend(test_table) output_file = self.output_path output_file.write_text("\n".join(report)) print(f"\nReport generated: {output_file.absolute()}") @@ -195,10 +183,12 @@ class Report: def _print_result_icon(self, result): if result == "Passed": - return "✅" + return "✅" + elif result == "Failed" or result == "Error": + return "❌" else: - # result == "Failed" or result == "Error": - return "❌" + # result == "Skipped": + return "⏭️" def _process_outcome(self, report: CollectReport): if self._is_error(report):