address comment

This commit is contained in:
Sixian Yi 2025-01-21 18:50:08 -08:00
parent 447d65dbc2
commit e9f49a1edd
3 changed files with 43 additions and 61 deletions

View file

@ -8,46 +8,38 @@
| Llama3.1-8B-Instruct | ✅ | | Llama3.1-8B-Instruct | ✅ |
| Llama3.1-70B-Instruct | ✅ | | Llama3.1-70B-Instruct | ✅ |
| Llama3.1-405B-Instruct | ✅ | | Llama3.1-405B-Instruct | ✅ |
| Llama3.1-405B-Instruct | ✅ |
| Llama3.1-405B-Instruct | ✅ |
| Llama3.2-1B-Instruct | ✅ | | Llama3.2-1B-Instruct | ✅ |
| Llama3.2-3B-Instruct | ✅ | | Llama3.2-3B-Instruct | ✅ |
| Llama3.2-1B-Instruct | ✅ |
| Llama3.2-1B-Instruct | ✅ |
| Llama3.2-3B-Instruct | ✅ |
| Llama3.2-3B-Instruct | ✅ |
| Llama3.2-11B-Vision-Instruct | ✅ | | Llama3.2-11B-Vision-Instruct | ✅ |
| Llama3.2-90B-Vision-Instruct | ✅ | | Llama3.2-90B-Vision-Instruct | ✅ |
| Llama3.3-70B-Instruct | ✅ | | Llama3.3-70B-Instruct | ✅ |
| Llama-Guard-3-11B-Vision | ✅ | | Llama-Guard-3-11B-Vision | ✅ |
| Llama-Guard-3-1B | ❌ | | Llama-Guard-3-1B | ❌ |
| Llama-Guard-3-1B | ❌ |
| Llama-Guard-3-8B | ✅ |
| Llama-Guard-3-8B | ✅ | | Llama-Guard-3-8B | ✅ |
| Llama-Guard-2-8B | ❌ | | Llama-Guard-2-8B | ❌ |
## Inference: ## Inference:
| Model | API | Capability | Test | Status | | Model | API | Capability | Test | Status |
|:----- |:-----|:-----|:-----|:-----| |:----- |:-----|:-----|:-----|:-----|
| Text | /chat_completion | streaming | test_text_chat_completion_streaming | Passed | | Text | /chat_completion | streaming | test_text_chat_completion_streaming | |
| Vision | /chat_completion | streaming | test_image_chat_completion_streaming | Passed | | Vision | /chat_completion | streaming | test_image_chat_completion_streaming | Passed |
| Text | /chat_completion | non_streaming | test_text_chat_completion_non_streaming | Passed | | Text | /chat_completion | non_streaming | test_text_chat_completion_non_streaming | |
| Vision | /chat_completion | non_streaming | test_image_chat_completion_non_streaming | Passed | | Vision | /chat_completion | non_streaming | test_image_chat_completion_non_streaming | Passed |
| Text | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_streaming | Passed | | Text | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_streaming | |
| Text | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_non_streaming | Passed | | Text | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_non_streaming | |
| Text | /completion | streaming | test_text_completion_streaming | Passed | | Text | /completion | streaming | test_text_completion_streaming | |
| Text | /completion | non_streaming | test_text_completion_non_streaming | Passed | | Text | /completion | non_streaming | test_text_completion_non_streaming | |
| Text | /completion | structured_output | test_text_completion_structured_output | Passed | | Text | /completion | structured_output | test_text_completion_structured_output | |
## Memory: ## Memory:
| API | Capability | Test | Status | | API | Capability | Test | Status |
|:-----|:-----|:-----|:-----| |:-----|:-----|:-----|:-----|
| insert and query | inline | test_memory_bank_insert_inline_and_query | Error | | /insert, /query | inline | test_memory_bank_insert_inline_and_query | ❌ |
| insert and query | url | test_memory_bank_insert_from_url_and_query | Failed | | /insert, /query | url | test_memory_bank_insert_from_url_and_query | ❌ |
## Agent: ## Agents:
| API | Capability | Test | Status | | API | Capability | Test | Status |
|:-----|:-----|:-----|:-----| |:-----|:-----|:-----|:-----|
| create_agent_turn | rag | test_rag_agent | Failed | | create_agent_turn | rag | test_rag_agent | |
| create_agent_turn | custom_tool | test_custom_tool | Passed | | create_agent_turn | custom_tool | test_custom_tool | |
| create_agent_turn | code_execution | test_code_execution | Failed | | create_agent_turn | code_execution | test_code_execution | |

View file

@ -27,14 +27,14 @@ INFERENCE_API_CAPA_TEST_MAP = {
}, },
} }
MEMORY_API_CAPA_TEST_MAP = { MEMORY_API_TEST_MAP = {
"/insert, /query": { "/insert, /query": {
"inline": ["test_memory_bank_insert_inline_and_query"], "inline": ["test_memory_bank_insert_inline_and_query"],
"url": ["test_memory_bank_insert_from_url_and_query"], "url": ["test_memory_bank_insert_from_url_and_query"],
} }
} }
AGENTS_API_CAPA_TEST_MAP = { AGENTS_API_TEST_MAP = {
"create_agent_turn": { "create_agent_turn": {
"rag": ["test_rag_agent"], "rag": ["test_rag_agent"],
"custom_tool": ["test_custom_tool"], "custom_tool": ["test_custom_tool"],
@ -43,8 +43,8 @@ AGENTS_API_CAPA_TEST_MAP = {
} }
API_CAPA_MAPS = { API_MAPS = {
"inference": INFERENCE_API_CAPA_TEST_MAP, "inference": INFERENCE_API_CAPA_TEST_MAP,
"memory": MEMORY_API_CAPA_TEST_MAP, "memory": MEMORY_API_TEST_MAP,
"agents": AGENTS_API_CAPA_TEST_MAP, "agents": AGENTS_API_TEST_MAP,
} }

View file

@ -14,7 +14,7 @@ from llama_models.datatypes import CoreModelId
from llama_models.sku_list import all_registered_models from llama_models.sku_list import all_registered_models
from llama_stack.distribution.library_client import LlamaStackAsLibraryClient from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
from metadata import API_CAPA_MAPS from metadata import API_MAPS
from pytest import CollectReport from pytest import CollectReport
@ -118,7 +118,7 @@ class Report:
if ( if (
"Instruct" not in model.core_model_id.value "Instruct" not in model.core_model_id.value
and "Guard" not in model.core_model_id.value and "Guard" not in model.core_model_id.value
): ) or (model.variant):
continue continue
row = f"| {model.core_model_id.value} |" row = f"| {model.core_model_id.value} |"
if model.core_model_id.value in SUPPORTED_MODELS[self.image_name]: if model.core_model_id.value in SUPPORTED_MODELS[self.image_name]:
@ -133,7 +133,7 @@ class Report:
"| Model | API | Capability | Test | Status |", "| Model | API | Capability | Test | Status |",
"|:----- |:-----|:-----|:-----|:-----|", "|:----- |:-----|:-----|:-----|:-----|",
] ]
for api, capa_map in API_CAPA_MAPS["inference"].items(): for api, capa_map in API_MAPS["inference"].items():
for capa, tests in capa_map.items(): for capa, tests in capa_map.items():
vision_tests = filter(lambda test_name: "image" in test_name, tests) vision_tests = filter(lambda test_name: "image" in test_name, tests)
text_tests = filter(lambda test_name: "text" in test_name, tests) text_tests = filter(lambda test_name: "text" in test_name, tests)
@ -145,7 +145,7 @@ class Report:
# the result of the first one for now. Ideally we should mark the test as failed if # the result of the first one for now. Ideally we should mark the test as failed if
# any of the parametrizations failed. # any of the parametrizations failed.
test_table.append( test_table.append(
f"| Text | /{api} | {capa} | {test_name} | {self.test_data[test_nodeids[0]]} |" f"| Text | /{api} | {capa} | {test_name} | {self._print_result_icon(self.test_data[test_nodeids[0]])} |"
) )
for test_name in vision_tests: for test_name in vision_tests:
@ -157,34 +157,22 @@ class Report:
report.extend(test_table) report.extend(test_table)
report.append("\n## Memory: ") for api_group in ["memory", "agents"]:
test_table = [ api_capitalized = api_group.capitalize()
"| API | Capability | Test | Status |", report.append(f"\n## {api_capitalized}: ")
"|:-----|:-----|:-----|:-----|", test_table = [
] "| API | Capability | Test | Status |",
for api, capa_map in API_CAPA_MAPS["memory"].items(): "|:-----|:-----|:-----|:-----|",
for capa, tests in capa_map.items(): ]
for test_name in tests: for api, capa_map in API_MAPS[api_group].items():
test_nodeids = self.test_name_to_nodeid[test_name] for capa, tests in capa_map.items():
assert len(test_nodeids) > 0 for test_name in tests:
test_table.append( test_nodeids = self.test_name_to_nodeid[test_name]
f"| {api} | {capa} | {test_name} | {self.test_data[test_nodeids[0]]} |" assert len(test_nodeids) > 0
) test_table.append(
report.extend(test_table) f"| {api} | {capa} | {test_name} | {self._print_result_icon(self.test_data[test_nodeids[0]])} |"
report.append("\n## Agent: ") )
test_table = [ report.extend(test_table)
"| API | Capability | Test | Status |",
"|:-----|:-----|:-----|:-----|",
]
for api, capa_map in API_CAPA_MAPS["agents"].items():
for capa, tests in capa_map.items():
for test_name in tests:
test_nodeids = self.test_name_to_nodeid[test_name]
assert len(test_nodeids) > 0
test_table.append(
f"| /{api} | {capa} | {test_name} | {self.test_data[test_nodeids[0]]} |"
)
report.extend(test_table)
output_file = self.output_path output_file = self.output_path
output_file.write_text("\n".join(report)) output_file.write_text("\n".join(report))
print(f"\nReport generated: {output_file.absolute()}") print(f"\nReport generated: {output_file.absolute()}")
@ -195,10 +183,12 @@ class Report:
def _print_result_icon(self, result): def _print_result_icon(self, result):
if result == "Passed": if result == "Passed":
return "✅" return ""
elif result == "Failed" or result == "Error":
return ""
else: else:
# result == "Failed" or result == "Error": # result == "Skipped":
return "❌" return "⏭️"
def _process_outcome(self, report: CollectReport): def _process_outcome(self, report: CollectReport):
if self._is_error(report): if self._is_error(report):