address comment

This commit is contained in:
Sixian Yi 2025-01-21 18:50:08 -08:00
parent 447d65dbc2
commit e9f49a1edd
3 changed files with 43 additions and 61 deletions

View file

@ -8,46 +8,38 @@
| Llama3.1-8B-Instruct | ✅ |
| Llama3.1-70B-Instruct | ✅ |
| Llama3.1-405B-Instruct | ✅ |
| Llama3.1-405B-Instruct | ✅ |
| Llama3.1-405B-Instruct | ✅ |
| Llama3.2-1B-Instruct | ✅ |
| Llama3.2-3B-Instruct | ✅ |
| Llama3.2-1B-Instruct | ✅ |
| Llama3.2-1B-Instruct | ✅ |
| Llama3.2-3B-Instruct | ✅ |
| Llama3.2-3B-Instruct | ✅ |
| Llama3.2-11B-Vision-Instruct | ✅ |
| Llama3.2-90B-Vision-Instruct | ✅ |
| Llama3.3-70B-Instruct | ✅ |
| Llama-Guard-3-11B-Vision | ✅ |
| Llama-Guard-3-1B | ❌ |
| Llama-Guard-3-1B | ❌ |
| Llama-Guard-3-8B | ✅ |
| Llama-Guard-3-8B | ✅ |
| Llama-Guard-2-8B | ❌ |
## Inference:
| Model | API | Capability | Test | Status |
|:----- |:-----|:-----|:-----|:-----|
| Text | /chat_completion | streaming | test_text_chat_completion_streaming | Passed |
| Text | /chat_completion | streaming | test_text_chat_completion_streaming | |
| Vision | /chat_completion | streaming | test_image_chat_completion_streaming | Passed |
| Text | /chat_completion | non_streaming | test_text_chat_completion_non_streaming | Passed |
| Text | /chat_completion | non_streaming | test_text_chat_completion_non_streaming | |
| Vision | /chat_completion | non_streaming | test_image_chat_completion_non_streaming | Passed |
| Text | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_streaming | Passed |
| Text | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_non_streaming | Passed |
| Text | /completion | streaming | test_text_completion_streaming | Passed |
| Text | /completion | non_streaming | test_text_completion_non_streaming | Passed |
| Text | /completion | structured_output | test_text_completion_structured_output | Passed |
| Text | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_streaming | |
| Text | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_non_streaming | |
| Text | /completion | streaming | test_text_completion_streaming | |
| Text | /completion | non_streaming | test_text_completion_non_streaming | |
| Text | /completion | structured_output | test_text_completion_structured_output | |
## Memory:
| API | Capability | Test | Status |
|:-----|:-----|:-----|:-----|
| insert and query | inline | test_memory_bank_insert_inline_and_query | Error |
| insert and query | url | test_memory_bank_insert_from_url_and_query | Failed |
| /insert, /query | inline | test_memory_bank_insert_inline_and_query | ❌ |
| /insert, /query | url | test_memory_bank_insert_from_url_and_query | ❌ |
## Agent:
## Agents:
| API | Capability | Test | Status |
|:-----|:-----|:-----|:-----|
| create_agent_turn | rag | test_rag_agent | Failed |
| create_agent_turn | custom_tool | test_custom_tool | Passed |
| create_agent_turn | code_execution | test_code_execution | Failed |
| create_agent_turn | rag | test_rag_agent | |
| create_agent_turn | custom_tool | test_custom_tool | |
| create_agent_turn | code_execution | test_code_execution | |

View file

@ -27,14 +27,14 @@ INFERENCE_API_CAPA_TEST_MAP = {
},
}
MEMORY_API_CAPA_TEST_MAP = {
MEMORY_API_TEST_MAP = {
"/insert, /query": {
"inline": ["test_memory_bank_insert_inline_and_query"],
"url": ["test_memory_bank_insert_from_url_and_query"],
}
}
AGENTS_API_CAPA_TEST_MAP = {
AGENTS_API_TEST_MAP = {
"create_agent_turn": {
"rag": ["test_rag_agent"],
"custom_tool": ["test_custom_tool"],
@ -43,8 +43,8 @@ AGENTS_API_CAPA_TEST_MAP = {
}
API_CAPA_MAPS = {
API_MAPS = {
"inference": INFERENCE_API_CAPA_TEST_MAP,
"memory": MEMORY_API_CAPA_TEST_MAP,
"agents": AGENTS_API_CAPA_TEST_MAP,
"memory": MEMORY_API_TEST_MAP,
"agents": AGENTS_API_TEST_MAP,
}

View file

@ -14,7 +14,7 @@ from llama_models.datatypes import CoreModelId
from llama_models.sku_list import all_registered_models
from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
from metadata import API_CAPA_MAPS
from metadata import API_MAPS
from pytest import CollectReport
@ -118,7 +118,7 @@ class Report:
if (
"Instruct" not in model.core_model_id.value
and "Guard" not in model.core_model_id.value
):
) or (model.variant):
continue
row = f"| {model.core_model_id.value} |"
if model.core_model_id.value in SUPPORTED_MODELS[self.image_name]:
@ -133,7 +133,7 @@ class Report:
"| Model | API | Capability | Test | Status |",
"|:----- |:-----|:-----|:-----|:-----|",
]
for api, capa_map in API_CAPA_MAPS["inference"].items():
for api, capa_map in API_MAPS["inference"].items():
for capa, tests in capa_map.items():
vision_tests = filter(lambda test_name: "image" in test_name, tests)
text_tests = filter(lambda test_name: "text" in test_name, tests)
@ -145,7 +145,7 @@ class Report:
# the result of the first one for now. Ideally we should mark the test as failed if
# any of the parametrizations failed.
test_table.append(
f"| Text | /{api} | {capa} | {test_name} | {self.test_data[test_nodeids[0]]} |"
f"| Text | /{api} | {capa} | {test_name} | {self._print_result_icon(self.test_data[test_nodeids[0]])} |"
)
for test_name in vision_tests:
@ -157,34 +157,22 @@ class Report:
report.extend(test_table)
report.append("\n## Memory: ")
test_table = [
"| API | Capability | Test | Status |",
"|:-----|:-----|:-----|:-----|",
]
for api, capa_map in API_CAPA_MAPS["memory"].items():
for capa, tests in capa_map.items():
for test_name in tests:
test_nodeids = self.test_name_to_nodeid[test_name]
assert len(test_nodeids) > 0
test_table.append(
f"| {api} | {capa} | {test_name} | {self.test_data[test_nodeids[0]]} |"
)
report.extend(test_table)
report.append("\n## Agent: ")
test_table = [
"| API | Capability | Test | Status |",
"|:-----|:-----|:-----|:-----|",
]
for api, capa_map in API_CAPA_MAPS["agents"].items():
for capa, tests in capa_map.items():
for test_name in tests:
test_nodeids = self.test_name_to_nodeid[test_name]
assert len(test_nodeids) > 0
test_table.append(
f"| /{api} | {capa} | {test_name} | {self.test_data[test_nodeids[0]]} |"
)
report.extend(test_table)
for api_group in ["memory", "agents"]:
api_capitalized = api_group.capitalize()
report.append(f"\n## {api_capitalized}: ")
test_table = [
"| API | Capability | Test | Status |",
"|:-----|:-----|:-----|:-----|",
]
for api, capa_map in API_MAPS[api_group].items():
for capa, tests in capa_map.items():
for test_name in tests:
test_nodeids = self.test_name_to_nodeid[test_name]
assert len(test_nodeids) > 0
test_table.append(
f"| {api} | {capa} | {test_name} | {self._print_result_icon(self.test_data[test_nodeids[0]])} |"
)
report.extend(test_table)
output_file = self.output_path
output_file.write_text("\n".join(report))
print(f"\nReport generated: {output_file.absolute()}")
@ -195,10 +183,12 @@ class Report:
def _print_result_icon(self, result):
if result == "Passed":
return "✅"
return ""
elif result == "Failed" or result == "Error":
return ""
else:
# result == "Failed" or result == "Error":
return "❌"
# result == "Skipped":
return "⏭️"
def _process_outcome(self, report: CollectReport):
if self._is_error(report):