address comment

2026-01-05 13:12:26 +00:00 · 2025-01-21 18:50:08 -08:00 · 2025-01-21 18:50:08 -08:00 · e9f49a1edd
commit e9f49a1edd
parent 447d65dbc2
3 changed files with 43 additions and 61 deletions
--- a/llama_stack/templates/fireworks/report.md
+++ b/llama_stack/templates/fireworks/report.md
@ -8,46 +8,38 @@
 | Llama3.1-8B-Instruct | ✅ |
 | Llama3.1-70B-Instruct | ✅ |
 | Llama3.1-405B-Instruct | ✅ |
-| Llama3.1-405B-Instruct | ✅ |
-| Llama3.1-405B-Instruct | ✅ |
 | Llama3.2-1B-Instruct | ✅ |
 | Llama3.2-3B-Instruct | ✅ |
-| Llama3.2-1B-Instruct | ✅ |
-| Llama3.2-1B-Instruct | ✅ |
-| Llama3.2-3B-Instruct | ✅ |
-| Llama3.2-3B-Instruct | ✅ |
 | Llama3.2-11B-Vision-Instruct | ✅ |
 | Llama3.2-90B-Vision-Instruct | ✅ |
 | Llama3.3-70B-Instruct | ✅ |
 | Llama-Guard-3-11B-Vision | ✅ |
 | Llama-Guard-3-1B | ❌ |
-| Llama-Guard-3-1B | ❌ |
-| Llama-Guard-3-8B | ✅ |
 | Llama-Guard-3-8B | ✅ |
 | Llama-Guard-2-8B | ❌ |

 ## Inference:
 | Model | API | Capability | Test | Status |
 |:----- |:-----|:-----|:-----|:-----|
-| Text | /chat_completion | streaming | test_text_chat_completion_streaming | Passed |
+| Text | /chat_completion | streaming | test_text_chat_completion_streaming | ✅ |
 | Vision | /chat_completion | streaming | test_image_chat_completion_streaming | Passed |
-| Text | /chat_completion | non_streaming | test_text_chat_completion_non_streaming | Passed |
+| Text | /chat_completion | non_streaming | test_text_chat_completion_non_streaming | ✅ |
 | Vision | /chat_completion | non_streaming | test_image_chat_completion_non_streaming | Passed |
-| Text | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_streaming | Passed |
-| Text | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_non_streaming | Passed |
-| Text | /completion | streaming | test_text_completion_streaming | Passed |
-| Text | /completion | non_streaming | test_text_completion_non_streaming | Passed |
-| Text | /completion | structured_output | test_text_completion_structured_output | Passed |
+| Text | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_streaming | ✅ |
+| Text | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_non_streaming | ✅ |
+| Text | /completion | streaming | test_text_completion_streaming | ✅ |
+| Text | /completion | non_streaming | test_text_completion_non_streaming | ✅ |
+| Text | /completion | structured_output | test_text_completion_structured_output | ✅ |

 ## Memory:
 | API | Capability | Test | Status |
 |:-----|:-----|:-----|:-----|
-| insert and query | inline | test_memory_bank_insert_inline_and_query | Error |
-| insert and query | url | test_memory_bank_insert_from_url_and_query | Failed |
+| /insert, /query | inline | test_memory_bank_insert_inline_and_query | ❌ |
+| /insert, /query | url | test_memory_bank_insert_from_url_and_query | ❌ |

-## Agent:
+## Agents:
 | API | Capability | Test | Status |
 |:-----|:-----|:-----|:-----|
-| create_agent_turn | rag | test_rag_agent | Failed |
-| create_agent_turn | custom_tool | test_custom_tool | Passed |
-| create_agent_turn | code_execution | test_code_execution | Failed |
+| create_agent_turn | rag | test_rag_agent | ❌ |
+| create_agent_turn | custom_tool | test_custom_tool | ✅ |
+| create_agent_turn | code_execution | test_code_execution | ❌ |
--- a/tests/client-sdk/metadata.py
+++ b/tests/client-sdk/metadata.py
@ -27,14 +27,14 @@ INFERENCE_API_CAPA_TEST_MAP = {
    },
 }

-MEMORY_API_CAPA_TEST_MAP = {
+MEMORY_API_TEST_MAP = {
    "/insert, /query": {
        "inline": ["test_memory_bank_insert_inline_and_query"],
        "url": ["test_memory_bank_insert_from_url_and_query"],
    }
 }

-AGENTS_API_CAPA_TEST_MAP = {
+AGENTS_API_TEST_MAP = {
    "create_agent_turn": {
        "rag": ["test_rag_agent"],
        "custom_tool": ["test_custom_tool"],
@ -43,8 +43,8 @@ AGENTS_API_CAPA_TEST_MAP = {
 }


-API_CAPA_MAPS = {
+API_MAPS = {
    "inference": INFERENCE_API_CAPA_TEST_MAP,
-    "memory": MEMORY_API_CAPA_TEST_MAP,
-    "agents": AGENTS_API_CAPA_TEST_MAP,
+    "memory": MEMORY_API_TEST_MAP,
+    "agents": AGENTS_API_TEST_MAP,
 }
--- a/tests/client-sdk/report.py
+++ b/tests/client-sdk/report.py
@ -14,7 +14,7 @@ from llama_models.datatypes import CoreModelId
 from llama_models.sku_list import all_registered_models

 from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
-from metadata import API_CAPA_MAPS
+from metadata import API_MAPS

 from pytest import CollectReport

@ -118,7 +118,7 @@ class Report:
            if (
                "Instruct" not in model.core_model_id.value
                and "Guard" not in model.core_model_id.value
-            ):
+            ) or (model.variant):
                continue
            row = f"| {model.core_model_id.value} |"
            if model.core_model_id.value in SUPPORTED_MODELS[self.image_name]:
@ -133,7 +133,7 @@ class Report:
            "| Model | API | Capability | Test | Status |",
            "|:----- |:-----|:-----|:-----|:-----|",
        ]
-        for api, capa_map in API_CAPA_MAPS["inference"].items():
+        for api, capa_map in API_MAPS["inference"].items():
            for capa, tests in capa_map.items():
                vision_tests = filter(lambda test_name: "image" in test_name, tests)
                text_tests = filter(lambda test_name: "text" in test_name, tests)
@ -145,7 +145,7 @@ class Report:
                    # the result of the first one for now. Ideally we should mark the test as failed if
                    # any of the parametrizations failed.
                    test_table.append(
-                        f"| Text | /{api} | {capa} | {test_name} | {self.test_data[test_nodeids[0]]} |"
+                        f"| Text | /{api} | {capa} | {test_name} | {self._print_result_icon(self.test_data[test_nodeids[0]])} |"
                    )

                for test_name in vision_tests:
@ -157,34 +157,22 @@ class Report:

        report.extend(test_table)

-        report.append("\n## Memory: ")
-        test_table = [
-            "| API | Capability | Test | Status |",
-            "|:-----|:-----|:-----|:-----|",
-        ]
-        for api, capa_map in API_CAPA_MAPS["memory"].items():
-            for capa, tests in capa_map.items():
-                for test_name in tests:
-                    test_nodeids = self.test_name_to_nodeid[test_name]
-                    assert len(test_nodeids) > 0
-                    test_table.append(
-                        f"| {api} | {capa} | {test_name} | {self.test_data[test_nodeids[0]]} |"
-                    )
-        report.extend(test_table)
-        report.append("\n## Agent: ")
-        test_table = [
-            "| API | Capability | Test | Status |",
-            "|:-----|:-----|:-----|:-----|",
-        ]
-        for api, capa_map in API_CAPA_MAPS["agents"].items():
-            for capa, tests in capa_map.items():
-                for test_name in tests:
-                    test_nodeids = self.test_name_to_nodeid[test_name]
-                    assert len(test_nodeids) > 0
-                    test_table.append(
-                        f"| /{api} | {capa} | {test_name} | {self.test_data[test_nodeids[0]]} |"
-                    )
-        report.extend(test_table)
+        for api_group in ["memory", "agents"]:
+            api_capitalized = api_group.capitalize()
+            report.append(f"\n## {api_capitalized}: ")
+            test_table = [
+                "| API | Capability | Test | Status |",
+                "|:-----|:-----|:-----|:-----|",
+            ]
+            for api, capa_map in API_MAPS[api_group].items():
+                for capa, tests in capa_map.items():
+                    for test_name in tests:
+                        test_nodeids = self.test_name_to_nodeid[test_name]
+                        assert len(test_nodeids) > 0
+                        test_table.append(
+                            f"| {api} | {capa} | {test_name} | {self._print_result_icon(self.test_data[test_nodeids[0]])} |"
+                        )
+            report.extend(test_table)
        output_file = self.output_path
        output_file.write_text("\n".join(report))
        print(f"\nReport generated: {output_file.absolute()}")
@ -195,10 +183,12 @@ class Report:

    def _print_result_icon(self, result):
        if result == "Passed":
-            return "&#x2705;"
+            return "✅"
+        elif result == "Failed" or result == "Error":
+            return "❌"
        else:
-            #  result == "Failed" or result == "Error":
-            return "&#x274C;"
+            #  result == "Skipped":
+            return "⏭️"

    def _process_outcome(self, report: CollectReport):
        if self._is_error(report):