mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-06 02:32:40 +00:00
address comment
This commit is contained in:
parent
447d65dbc2
commit
e9f49a1edd
3 changed files with 43 additions and 61 deletions
|
@ -8,46 +8,38 @@
|
|||
| Llama3.1-8B-Instruct | ✅ |
|
||||
| Llama3.1-70B-Instruct | ✅ |
|
||||
| Llama3.1-405B-Instruct | ✅ |
|
||||
| Llama3.1-405B-Instruct | ✅ |
|
||||
| Llama3.1-405B-Instruct | ✅ |
|
||||
| Llama3.2-1B-Instruct | ✅ |
|
||||
| Llama3.2-3B-Instruct | ✅ |
|
||||
| Llama3.2-1B-Instruct | ✅ |
|
||||
| Llama3.2-1B-Instruct | ✅ |
|
||||
| Llama3.2-3B-Instruct | ✅ |
|
||||
| Llama3.2-3B-Instruct | ✅ |
|
||||
| Llama3.2-11B-Vision-Instruct | ✅ |
|
||||
| Llama3.2-90B-Vision-Instruct | ✅ |
|
||||
| Llama3.3-70B-Instruct | ✅ |
|
||||
| Llama-Guard-3-11B-Vision | ✅ |
|
||||
| Llama-Guard-3-1B | ❌ |
|
||||
| Llama-Guard-3-1B | ❌ |
|
||||
| Llama-Guard-3-8B | ✅ |
|
||||
| Llama-Guard-3-8B | ✅ |
|
||||
| Llama-Guard-2-8B | ❌ |
|
||||
|
||||
## Inference:
|
||||
| Model | API | Capability | Test | Status |
|
||||
|:----- |:-----|:-----|:-----|:-----|
|
||||
| Text | /chat_completion | streaming | test_text_chat_completion_streaming | Passed |
|
||||
| Text | /chat_completion | streaming | test_text_chat_completion_streaming | ✅ |
|
||||
| Vision | /chat_completion | streaming | test_image_chat_completion_streaming | Passed |
|
||||
| Text | /chat_completion | non_streaming | test_text_chat_completion_non_streaming | Passed |
|
||||
| Text | /chat_completion | non_streaming | test_text_chat_completion_non_streaming | ✅ |
|
||||
| Vision | /chat_completion | non_streaming | test_image_chat_completion_non_streaming | Passed |
|
||||
| Text | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_streaming | Passed |
|
||||
| Text | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_non_streaming | Passed |
|
||||
| Text | /completion | streaming | test_text_completion_streaming | Passed |
|
||||
| Text | /completion | non_streaming | test_text_completion_non_streaming | Passed |
|
||||
| Text | /completion | structured_output | test_text_completion_structured_output | Passed |
|
||||
| Text | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_streaming | ✅ |
|
||||
| Text | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_non_streaming | ✅ |
|
||||
| Text | /completion | streaming | test_text_completion_streaming | ✅ |
|
||||
| Text | /completion | non_streaming | test_text_completion_non_streaming | ✅ |
|
||||
| Text | /completion | structured_output | test_text_completion_structured_output | ✅ |
|
||||
|
||||
## Memory:
|
||||
| API | Capability | Test | Status |
|
||||
|:-----|:-----|:-----|:-----|
|
||||
| insert and query | inline | test_memory_bank_insert_inline_and_query | Error |
|
||||
| insert and query | url | test_memory_bank_insert_from_url_and_query | Failed |
|
||||
| /insert, /query | inline | test_memory_bank_insert_inline_and_query | ❌ |
|
||||
| /insert, /query | url | test_memory_bank_insert_from_url_and_query | ❌ |
|
||||
|
||||
## Agent:
|
||||
## Agents:
|
||||
| API | Capability | Test | Status |
|
||||
|:-----|:-----|:-----|:-----|
|
||||
| create_agent_turn | rag | test_rag_agent | Failed |
|
||||
| create_agent_turn | custom_tool | test_custom_tool | Passed |
|
||||
| create_agent_turn | code_execution | test_code_execution | Failed |
|
||||
| create_agent_turn | rag | test_rag_agent | ❌ |
|
||||
| create_agent_turn | custom_tool | test_custom_tool | ✅ |
|
||||
| create_agent_turn | code_execution | test_code_execution | ❌ |
|
||||
|
|
|
@ -27,14 +27,14 @@ INFERENCE_API_CAPA_TEST_MAP = {
|
|||
},
|
||||
}
|
||||
|
||||
MEMORY_API_CAPA_TEST_MAP = {
|
||||
MEMORY_API_TEST_MAP = {
|
||||
"/insert, /query": {
|
||||
"inline": ["test_memory_bank_insert_inline_and_query"],
|
||||
"url": ["test_memory_bank_insert_from_url_and_query"],
|
||||
}
|
||||
}
|
||||
|
||||
AGENTS_API_CAPA_TEST_MAP = {
|
||||
AGENTS_API_TEST_MAP = {
|
||||
"create_agent_turn": {
|
||||
"rag": ["test_rag_agent"],
|
||||
"custom_tool": ["test_custom_tool"],
|
||||
|
@ -43,8 +43,8 @@ AGENTS_API_CAPA_TEST_MAP = {
|
|||
}
|
||||
|
||||
|
||||
API_CAPA_MAPS = {
|
||||
API_MAPS = {
|
||||
"inference": INFERENCE_API_CAPA_TEST_MAP,
|
||||
"memory": MEMORY_API_CAPA_TEST_MAP,
|
||||
"agents": AGENTS_API_CAPA_TEST_MAP,
|
||||
"memory": MEMORY_API_TEST_MAP,
|
||||
"agents": AGENTS_API_TEST_MAP,
|
||||
}
|
||||
|
|
|
@ -14,7 +14,7 @@ from llama_models.datatypes import CoreModelId
|
|||
from llama_models.sku_list import all_registered_models
|
||||
|
||||
from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
|
||||
from metadata import API_CAPA_MAPS
|
||||
from metadata import API_MAPS
|
||||
|
||||
from pytest import CollectReport
|
||||
|
||||
|
@ -118,7 +118,7 @@ class Report:
|
|||
if (
|
||||
"Instruct" not in model.core_model_id.value
|
||||
and "Guard" not in model.core_model_id.value
|
||||
):
|
||||
) or (model.variant):
|
||||
continue
|
||||
row = f"| {model.core_model_id.value} |"
|
||||
if model.core_model_id.value in SUPPORTED_MODELS[self.image_name]:
|
||||
|
@ -133,7 +133,7 @@ class Report:
|
|||
"| Model | API | Capability | Test | Status |",
|
||||
"|:----- |:-----|:-----|:-----|:-----|",
|
||||
]
|
||||
for api, capa_map in API_CAPA_MAPS["inference"].items():
|
||||
for api, capa_map in API_MAPS["inference"].items():
|
||||
for capa, tests in capa_map.items():
|
||||
vision_tests = filter(lambda test_name: "image" in test_name, tests)
|
||||
text_tests = filter(lambda test_name: "text" in test_name, tests)
|
||||
|
@ -145,7 +145,7 @@ class Report:
|
|||
# the result of the first one for now. Ideally we should mark the test as failed if
|
||||
# any of the parametrizations failed.
|
||||
test_table.append(
|
||||
f"| Text | /{api} | {capa} | {test_name} | {self.test_data[test_nodeids[0]]} |"
|
||||
f"| Text | /{api} | {capa} | {test_name} | {self._print_result_icon(self.test_data[test_nodeids[0]])} |"
|
||||
)
|
||||
|
||||
for test_name in vision_tests:
|
||||
|
@ -157,34 +157,22 @@ class Report:
|
|||
|
||||
report.extend(test_table)
|
||||
|
||||
report.append("\n## Memory: ")
|
||||
test_table = [
|
||||
"| API | Capability | Test | Status |",
|
||||
"|:-----|:-----|:-----|:-----|",
|
||||
]
|
||||
for api, capa_map in API_CAPA_MAPS["memory"].items():
|
||||
for capa, tests in capa_map.items():
|
||||
for test_name in tests:
|
||||
test_nodeids = self.test_name_to_nodeid[test_name]
|
||||
assert len(test_nodeids) > 0
|
||||
test_table.append(
|
||||
f"| {api} | {capa} | {test_name} | {self.test_data[test_nodeids[0]]} |"
|
||||
)
|
||||
report.extend(test_table)
|
||||
report.append("\n## Agent: ")
|
||||
test_table = [
|
||||
"| API | Capability | Test | Status |",
|
||||
"|:-----|:-----|:-----|:-----|",
|
||||
]
|
||||
for api, capa_map in API_CAPA_MAPS["agents"].items():
|
||||
for capa, tests in capa_map.items():
|
||||
for test_name in tests:
|
||||
test_nodeids = self.test_name_to_nodeid[test_name]
|
||||
assert len(test_nodeids) > 0
|
||||
test_table.append(
|
||||
f"| /{api} | {capa} | {test_name} | {self.test_data[test_nodeids[0]]} |"
|
||||
)
|
||||
report.extend(test_table)
|
||||
for api_group in ["memory", "agents"]:
|
||||
api_capitalized = api_group.capitalize()
|
||||
report.append(f"\n## {api_capitalized}: ")
|
||||
test_table = [
|
||||
"| API | Capability | Test | Status |",
|
||||
"|:-----|:-----|:-----|:-----|",
|
||||
]
|
||||
for api, capa_map in API_MAPS[api_group].items():
|
||||
for capa, tests in capa_map.items():
|
||||
for test_name in tests:
|
||||
test_nodeids = self.test_name_to_nodeid[test_name]
|
||||
assert len(test_nodeids) > 0
|
||||
test_table.append(
|
||||
f"| {api} | {capa} | {test_name} | {self._print_result_icon(self.test_data[test_nodeids[0]])} |"
|
||||
)
|
||||
report.extend(test_table)
|
||||
output_file = self.output_path
|
||||
output_file.write_text("\n".join(report))
|
||||
print(f"\nReport generated: {output_file.absolute()}")
|
||||
|
@ -195,10 +183,12 @@ class Report:
|
|||
|
||||
def _print_result_icon(self, result):
|
||||
if result == "Passed":
|
||||
return "✅"
|
||||
return "✅"
|
||||
elif result == "Failed" or result == "Error":
|
||||
return "❌"
|
||||
else:
|
||||
# result == "Failed" or result == "Error":
|
||||
return "❌"
|
||||
# result == "Skipped":
|
||||
return "⏭️"
|
||||
|
||||
def _process_outcome(self, report: CollectReport):
|
||||
if self._is_error(report):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue