mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-06 18:50:44 +00:00
address comment
This commit is contained in:
parent
447d65dbc2
commit
e9f49a1edd
3 changed files with 43 additions and 61 deletions
|
@ -8,46 +8,38 @@
|
||||||
| Llama3.1-8B-Instruct | ✅ |
|
| Llama3.1-8B-Instruct | ✅ |
|
||||||
| Llama3.1-70B-Instruct | ✅ |
|
| Llama3.1-70B-Instruct | ✅ |
|
||||||
| Llama3.1-405B-Instruct | ✅ |
|
| Llama3.1-405B-Instruct | ✅ |
|
||||||
| Llama3.1-405B-Instruct | ✅ |
|
|
||||||
| Llama3.1-405B-Instruct | ✅ |
|
|
||||||
| Llama3.2-1B-Instruct | ✅ |
|
| Llama3.2-1B-Instruct | ✅ |
|
||||||
| Llama3.2-3B-Instruct | ✅ |
|
| Llama3.2-3B-Instruct | ✅ |
|
||||||
| Llama3.2-1B-Instruct | ✅ |
|
|
||||||
| Llama3.2-1B-Instruct | ✅ |
|
|
||||||
| Llama3.2-3B-Instruct | ✅ |
|
|
||||||
| Llama3.2-3B-Instruct | ✅ |
|
|
||||||
| Llama3.2-11B-Vision-Instruct | ✅ |
|
| Llama3.2-11B-Vision-Instruct | ✅ |
|
||||||
| Llama3.2-90B-Vision-Instruct | ✅ |
|
| Llama3.2-90B-Vision-Instruct | ✅ |
|
||||||
| Llama3.3-70B-Instruct | ✅ |
|
| Llama3.3-70B-Instruct | ✅ |
|
||||||
| Llama-Guard-3-11B-Vision | ✅ |
|
| Llama-Guard-3-11B-Vision | ✅ |
|
||||||
| Llama-Guard-3-1B | ❌ |
|
| Llama-Guard-3-1B | ❌ |
|
||||||
| Llama-Guard-3-1B | ❌ |
|
|
||||||
| Llama-Guard-3-8B | ✅ |
|
|
||||||
| Llama-Guard-3-8B | ✅ |
|
| Llama-Guard-3-8B | ✅ |
|
||||||
| Llama-Guard-2-8B | ❌ |
|
| Llama-Guard-2-8B | ❌ |
|
||||||
|
|
||||||
## Inference:
|
## Inference:
|
||||||
| Model | API | Capability | Test | Status |
|
| Model | API | Capability | Test | Status |
|
||||||
|:----- |:-----|:-----|:-----|:-----|
|
|:----- |:-----|:-----|:-----|:-----|
|
||||||
| Text | /chat_completion | streaming | test_text_chat_completion_streaming | Passed |
|
| Text | /chat_completion | streaming | test_text_chat_completion_streaming | ✅ |
|
||||||
| Vision | /chat_completion | streaming | test_image_chat_completion_streaming | Passed |
|
| Vision | /chat_completion | streaming | test_image_chat_completion_streaming | Passed |
|
||||||
| Text | /chat_completion | non_streaming | test_text_chat_completion_non_streaming | Passed |
|
| Text | /chat_completion | non_streaming | test_text_chat_completion_non_streaming | ✅ |
|
||||||
| Vision | /chat_completion | non_streaming | test_image_chat_completion_non_streaming | Passed |
|
| Vision | /chat_completion | non_streaming | test_image_chat_completion_non_streaming | Passed |
|
||||||
| Text | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_streaming | Passed |
|
| Text | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_streaming | ✅ |
|
||||||
| Text | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_non_streaming | Passed |
|
| Text | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_non_streaming | ✅ |
|
||||||
| Text | /completion | streaming | test_text_completion_streaming | Passed |
|
| Text | /completion | streaming | test_text_completion_streaming | ✅ |
|
||||||
| Text | /completion | non_streaming | test_text_completion_non_streaming | Passed |
|
| Text | /completion | non_streaming | test_text_completion_non_streaming | ✅ |
|
||||||
| Text | /completion | structured_output | test_text_completion_structured_output | Passed |
|
| Text | /completion | structured_output | test_text_completion_structured_output | ✅ |
|
||||||
|
|
||||||
## Memory:
|
## Memory:
|
||||||
| API | Capability | Test | Status |
|
| API | Capability | Test | Status |
|
||||||
|:-----|:-----|:-----|:-----|
|
|:-----|:-----|:-----|:-----|
|
||||||
| insert and query | inline | test_memory_bank_insert_inline_and_query | Error |
|
| /insert, /query | inline | test_memory_bank_insert_inline_and_query | ❌ |
|
||||||
| insert and query | url | test_memory_bank_insert_from_url_and_query | Failed |
|
| /insert, /query | url | test_memory_bank_insert_from_url_and_query | ❌ |
|
||||||
|
|
||||||
## Agent:
|
## Agents:
|
||||||
| API | Capability | Test | Status |
|
| API | Capability | Test | Status |
|
||||||
|:-----|:-----|:-----|:-----|
|
|:-----|:-----|:-----|:-----|
|
||||||
| create_agent_turn | rag | test_rag_agent | Failed |
|
| create_agent_turn | rag | test_rag_agent | ❌ |
|
||||||
| create_agent_turn | custom_tool | test_custom_tool | Passed |
|
| create_agent_turn | custom_tool | test_custom_tool | ✅ |
|
||||||
| create_agent_turn | code_execution | test_code_execution | Failed |
|
| create_agent_turn | code_execution | test_code_execution | ❌ |
|
||||||
|
|
|
@ -27,14 +27,14 @@ INFERENCE_API_CAPA_TEST_MAP = {
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
MEMORY_API_CAPA_TEST_MAP = {
|
MEMORY_API_TEST_MAP = {
|
||||||
"/insert, /query": {
|
"/insert, /query": {
|
||||||
"inline": ["test_memory_bank_insert_inline_and_query"],
|
"inline": ["test_memory_bank_insert_inline_and_query"],
|
||||||
"url": ["test_memory_bank_insert_from_url_and_query"],
|
"url": ["test_memory_bank_insert_from_url_and_query"],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
AGENTS_API_CAPA_TEST_MAP = {
|
AGENTS_API_TEST_MAP = {
|
||||||
"create_agent_turn": {
|
"create_agent_turn": {
|
||||||
"rag": ["test_rag_agent"],
|
"rag": ["test_rag_agent"],
|
||||||
"custom_tool": ["test_custom_tool"],
|
"custom_tool": ["test_custom_tool"],
|
||||||
|
@ -43,8 +43,8 @@ AGENTS_API_CAPA_TEST_MAP = {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
API_CAPA_MAPS = {
|
API_MAPS = {
|
||||||
"inference": INFERENCE_API_CAPA_TEST_MAP,
|
"inference": INFERENCE_API_CAPA_TEST_MAP,
|
||||||
"memory": MEMORY_API_CAPA_TEST_MAP,
|
"memory": MEMORY_API_TEST_MAP,
|
||||||
"agents": AGENTS_API_CAPA_TEST_MAP,
|
"agents": AGENTS_API_TEST_MAP,
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,7 +14,7 @@ from llama_models.datatypes import CoreModelId
|
||||||
from llama_models.sku_list import all_registered_models
|
from llama_models.sku_list import all_registered_models
|
||||||
|
|
||||||
from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
|
from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
|
||||||
from metadata import API_CAPA_MAPS
|
from metadata import API_MAPS
|
||||||
|
|
||||||
from pytest import CollectReport
|
from pytest import CollectReport
|
||||||
|
|
||||||
|
@ -118,7 +118,7 @@ class Report:
|
||||||
if (
|
if (
|
||||||
"Instruct" not in model.core_model_id.value
|
"Instruct" not in model.core_model_id.value
|
||||||
and "Guard" not in model.core_model_id.value
|
and "Guard" not in model.core_model_id.value
|
||||||
):
|
) or (model.variant):
|
||||||
continue
|
continue
|
||||||
row = f"| {model.core_model_id.value} |"
|
row = f"| {model.core_model_id.value} |"
|
||||||
if model.core_model_id.value in SUPPORTED_MODELS[self.image_name]:
|
if model.core_model_id.value in SUPPORTED_MODELS[self.image_name]:
|
||||||
|
@ -133,7 +133,7 @@ class Report:
|
||||||
"| Model | API | Capability | Test | Status |",
|
"| Model | API | Capability | Test | Status |",
|
||||||
"|:----- |:-----|:-----|:-----|:-----|",
|
"|:----- |:-----|:-----|:-----|:-----|",
|
||||||
]
|
]
|
||||||
for api, capa_map in API_CAPA_MAPS["inference"].items():
|
for api, capa_map in API_MAPS["inference"].items():
|
||||||
for capa, tests in capa_map.items():
|
for capa, tests in capa_map.items():
|
||||||
vision_tests = filter(lambda test_name: "image" in test_name, tests)
|
vision_tests = filter(lambda test_name: "image" in test_name, tests)
|
||||||
text_tests = filter(lambda test_name: "text" in test_name, tests)
|
text_tests = filter(lambda test_name: "text" in test_name, tests)
|
||||||
|
@ -145,7 +145,7 @@ class Report:
|
||||||
# the result of the first one for now. Ideally we should mark the test as failed if
|
# the result of the first one for now. Ideally we should mark the test as failed if
|
||||||
# any of the parametrizations failed.
|
# any of the parametrizations failed.
|
||||||
test_table.append(
|
test_table.append(
|
||||||
f"| Text | /{api} | {capa} | {test_name} | {self.test_data[test_nodeids[0]]} |"
|
f"| Text | /{api} | {capa} | {test_name} | {self._print_result_icon(self.test_data[test_nodeids[0]])} |"
|
||||||
)
|
)
|
||||||
|
|
||||||
for test_name in vision_tests:
|
for test_name in vision_tests:
|
||||||
|
@ -157,34 +157,22 @@ class Report:
|
||||||
|
|
||||||
report.extend(test_table)
|
report.extend(test_table)
|
||||||
|
|
||||||
report.append("\n## Memory: ")
|
for api_group in ["memory", "agents"]:
|
||||||
test_table = [
|
api_capitalized = api_group.capitalize()
|
||||||
"| API | Capability | Test | Status |",
|
report.append(f"\n## {api_capitalized}: ")
|
||||||
"|:-----|:-----|:-----|:-----|",
|
test_table = [
|
||||||
]
|
"| API | Capability | Test | Status |",
|
||||||
for api, capa_map in API_CAPA_MAPS["memory"].items():
|
"|:-----|:-----|:-----|:-----|",
|
||||||
for capa, tests in capa_map.items():
|
]
|
||||||
for test_name in tests:
|
for api, capa_map in API_MAPS[api_group].items():
|
||||||
test_nodeids = self.test_name_to_nodeid[test_name]
|
for capa, tests in capa_map.items():
|
||||||
assert len(test_nodeids) > 0
|
for test_name in tests:
|
||||||
test_table.append(
|
test_nodeids = self.test_name_to_nodeid[test_name]
|
||||||
f"| {api} | {capa} | {test_name} | {self.test_data[test_nodeids[0]]} |"
|
assert len(test_nodeids) > 0
|
||||||
)
|
test_table.append(
|
||||||
report.extend(test_table)
|
f"| {api} | {capa} | {test_name} | {self._print_result_icon(self.test_data[test_nodeids[0]])} |"
|
||||||
report.append("\n## Agent: ")
|
)
|
||||||
test_table = [
|
report.extend(test_table)
|
||||||
"| API | Capability | Test | Status |",
|
|
||||||
"|:-----|:-----|:-----|:-----|",
|
|
||||||
]
|
|
||||||
for api, capa_map in API_CAPA_MAPS["agents"].items():
|
|
||||||
for capa, tests in capa_map.items():
|
|
||||||
for test_name in tests:
|
|
||||||
test_nodeids = self.test_name_to_nodeid[test_name]
|
|
||||||
assert len(test_nodeids) > 0
|
|
||||||
test_table.append(
|
|
||||||
f"| /{api} | {capa} | {test_name} | {self.test_data[test_nodeids[0]]} |"
|
|
||||||
)
|
|
||||||
report.extend(test_table)
|
|
||||||
output_file = self.output_path
|
output_file = self.output_path
|
||||||
output_file.write_text("\n".join(report))
|
output_file.write_text("\n".join(report))
|
||||||
print(f"\nReport generated: {output_file.absolute()}")
|
print(f"\nReport generated: {output_file.absolute()}")
|
||||||
|
@ -195,10 +183,12 @@ class Report:
|
||||||
|
|
||||||
def _print_result_icon(self, result):
|
def _print_result_icon(self, result):
|
||||||
if result == "Passed":
|
if result == "Passed":
|
||||||
return "✅"
|
return "✅"
|
||||||
|
elif result == "Failed" or result == "Error":
|
||||||
|
return "❌"
|
||||||
else:
|
else:
|
||||||
# result == "Failed" or result == "Error":
|
# result == "Skipped":
|
||||||
return "❌"
|
return "⏭️"
|
||||||
|
|
||||||
def _process_outcome(self, report: CollectReport):
|
def _process_outcome(self, report: CollectReport):
|
||||||
if self._is_error(report):
|
if self._is_error(report):
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue