Report generation minor fixes (#884)

# What does this PR do?

fixed report generation:
1) do not initialize a new client in report.py - instead get it from
pytest fixture
2) Add "provider" for "safety" and "agents" section
3) add logprobs functionality in "inference" section


## Test Plan

See the regenerated report 



## Before submitting

- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Ran pre-commit to handle lint / formatting issues.
- [ ] Read the [contributor
guideline](https://github.com/meta-llama/llama-stack/blob/main/CONTRIBUTING.md),
      Pull Request section?
- [ ] Updated relevant documentation.
- [ ] Wrote necessary unit or integration tests.
This commit is contained in:
Sixian Yi 2025-01-28 04:58:12 -08:00 committed by GitHub
parent 5b0d778871
commit ba453c3487
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 50 additions and 43 deletions

View file

@ -27,18 +27,20 @@
| Llama-3.1-8B-Instruct | /chat_completion | non_streaming | test_text_chat_completion_non_streaming | ✅ | | Llama-3.1-8B-Instruct | /chat_completion | non_streaming | test_text_chat_completion_non_streaming | ✅ |
| Llama-3.1-8B-Instruct | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_streaming | ✅ | | Llama-3.1-8B-Instruct | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_streaming | ✅ |
| Llama-3.1-8B-Instruct | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_non_streaming | ✅ | | Llama-3.1-8B-Instruct | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_non_streaming | ✅ |
| Llama-3.2-11B-Vision-Instruct | /chat_completion | log_probs | test_completion_log_probs_non_streaming | ✅ |
| Llama-3.2-11B-Vision-Instruct | /chat_completion | log_probs | test_completion_log_probs_streaming | ✅ |
| Llama-3.1-8B-Instruct | /completion | streaming | test_text_completion_streaming | ✅ | | Llama-3.1-8B-Instruct | /completion | streaming | test_text_completion_streaming | ✅ |
| Llama-3.1-8B-Instruct | /completion | non_streaming | test_text_completion_non_streaming | ✅ | | Llama-3.1-8B-Instruct | /completion | non_streaming | test_text_completion_non_streaming | ✅ |
| Llama-3.1-8B-Instruct | /completion | structured_output | test_text_completion_structured_output | ✅ | | Llama-3.1-8B-Instruct | /completion | structured_output | test_text_completion_structured_output | ✅ |
## Vector IO ## Vector IO
| API | Capability | Test | Status | | Provider | API | Capability | Test | Status |
|:-----|:-----|:-----|:-----| |:-----|:-----|:-----|:-----|:-----|
| /retrieve | | test_vector_db_retrieve | ✅ | | inline::faiss | /retrieve | | test_vector_db_retrieve | ✅ |
## Agents ## Agents
| API | Capability | Test | Status | | Provider | API | Capability | Test | Status |
|:-----|:-----|:-----|:-----| |:-----|:-----|:-----|:-----|:-----|
| /create_agent_turn | rag | test_rag_agent | ✅ | | inline::meta-reference | /create_agent_turn | rag | test_rag_agent | ✅ |
| /create_agent_turn | custom_tool | test_custom_tool | ✅ | | inline::meta-reference | /create_agent_turn | custom_tool | test_custom_tool | ✅ |
| /create_agent_turn | code_execution | test_code_interpreter_for_attachments | ✅ | | inline::meta-reference | /create_agent_turn | code_execution | test_code_interpreter_for_attachments | ✅ |

View file

@ -27,18 +27,20 @@
| Llama-3.1-8B-Instruct | /chat_completion | non_streaming | test_text_chat_completion_non_streaming | ✅ | | Llama-3.1-8B-Instruct | /chat_completion | non_streaming | test_text_chat_completion_non_streaming | ✅ |
| Llama-3.1-8B-Instruct | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_streaming | ✅ | | Llama-3.1-8B-Instruct | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_streaming | ✅ |
| Llama-3.1-8B-Instruct | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_non_streaming | ✅ | | Llama-3.1-8B-Instruct | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_non_streaming | ✅ |
| Llama-3.2-11B-Vision-Instruct | /chat_completion | log_probs | test_completion_log_probs_non_streaming | ✅ |
| Llama-3.2-11B-Vision-Instruct | /chat_completion | log_probs | test_completion_log_probs_streaming | ✅ |
| Llama-3.1-8B-Instruct | /completion | streaming | test_text_completion_streaming | ✅ | | Llama-3.1-8B-Instruct | /completion | streaming | test_text_completion_streaming | ✅ |
| Llama-3.1-8B-Instruct | /completion | non_streaming | test_text_completion_non_streaming | ✅ | | Llama-3.1-8B-Instruct | /completion | non_streaming | test_text_completion_non_streaming | ✅ |
| Llama-3.1-8B-Instruct | /completion | structured_output | test_text_completion_structured_output | ✅ | | Llama-3.1-8B-Instruct | /completion | structured_output | test_text_completion_structured_output | ✅ |
## Vector IO ## Vector IO
| API | Capability | Test | Status | | Provider | API | Capability | Test | Status |
|:-----|:-----|:-----|:-----| |:-----|:-----|:-----|:-----|:-----|
| /retrieve | | test_vector_db_retrieve | ✅ | | inline::faiss | /retrieve | | test_vector_db_retrieve | ✅ |
## Agents ## Agents
| API | Capability | Test | Status | | Provider | API | Capability | Test | Status |
|:-----|:-----|:-----|:-----| |:-----|:-----|:-----|:-----|:-----|
| /create_agent_turn | rag | test_rag_agent | ✅ | | inline::meta-reference | /create_agent_turn | rag | test_rag_agent | ✅ |
| /create_agent_turn | custom_tool | test_custom_tool | ✅ | | inline::meta-reference | /create_agent_turn | custom_tool | test_custom_tool | ✅ |
| /create_agent_turn | code_execution | test_code_interpreter_for_attachments | ✅ | | inline::meta-reference | /create_agent_turn | code_execution | test_code_interpreter_for_attachments | ✅ |

View file

@ -20,6 +20,10 @@ INFERENCE_API_CAPA_TEST_MAP = {
"test_text_chat_completion_with_tool_calling_and_streaming", "test_text_chat_completion_with_tool_calling_and_streaming",
"test_text_chat_completion_with_tool_calling_and_non_streaming", "test_text_chat_completion_with_tool_calling_and_non_streaming",
], ],
"log_probs": [
"test_completion_log_probs_non_streaming",
"test_completion_log_probs_streaming",
],
}, },
"completion": { "completion": {
"streaming": ["test_text_completion_streaming"], "streaming": ["test_text_completion_streaming"],

View file

@ -23,18 +23,16 @@ from llama_models.sku_list import (
safety_models, safety_models,
) )
from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
from llama_stack.providers.datatypes import Api from llama_stack.providers.datatypes import Api
from llama_stack.providers.tests.env import get_env_or_fail from llama_stack.providers.tests.env import get_env_or_fail
from llama_stack_client import LlamaStackClient
from metadata import API_MAPS from metadata import API_MAPS
from pytest import CollectReport from pytest import CollectReport
from termcolor import cprint from termcolor import cprint
def featured_models_repo_names(): def featured_models():
models = [ models = [
*llama3_instruct_models(), *llama3_instruct_models(),
*llama3_1_instruct_models(), *llama3_1_instruct_models(),
@ -42,7 +40,7 @@ def featured_models_repo_names():
*llama3_3_instruct_models(), *llama3_3_instruct_models(),
*safety_models(), *safety_models(),
] ]
return [model.huggingface_repo for model in models if not model.variant] return {model.huggingface_repo: model for model in models if not model.variant}
SUPPORTED_MODELS = { SUPPORTED_MODELS = {
@ -99,25 +97,15 @@ class Report:
if not config_path.exists(): if not config_path.exists():
raise ValueError(f"Config file {config_path} does not exist") raise ValueError(f"Config file {config_path} does not exist")
self.output_path = Path(config_path.parent / "report.md") self.output_path = Path(config_path.parent / "report.md")
self.client = LlamaStackAsLibraryClient( self.distro_name = None
config_path_or_template_name,
provider_data=None,
skip_logger_removal=True,
)
self.client.initialize()
self.image_name = self.client.async_client.config.image_name
elif os.environ.get("LLAMA_STACK_BASE_URL"): elif os.environ.get("LLAMA_STACK_BASE_URL"):
url = get_env_or_fail("LLAMA_STACK_BASE_URL") url = get_env_or_fail("LLAMA_STACK_BASE_URL")
self.image_name = urlparse(url).netloc self.distro_name = urlparse(url).netloc
if report_path is None: if report_path is None:
raise ValueError( raise ValueError(
"Report path must be provided when LLAMA_STACK_BASE_URL is set" "Report path must be provided when LLAMA_STACK_BASE_URL is set"
) )
self.output_path = Path(report_path) self.output_path = Path(report_path)
self.client = LlamaStackClient(
base_url=url,
provider_data=None,
)
else: else:
raise ValueError("LLAMA_STACK_CONFIG or LLAMA_STACK_BASE_URL must be set") raise ValueError("LLAMA_STACK_CONFIG or LLAMA_STACK_BASE_URL must be set")
@ -127,6 +115,7 @@ class Report:
self.test_name_to_nodeid = defaultdict(list) self.test_name_to_nodeid = defaultdict(list)
self.vision_model_id = None self.vision_model_id = None
self.text_model_id = None self.text_model_id = None
self.client = None
@pytest.hookimpl(tryfirst=True) @pytest.hookimpl(tryfirst=True)
def pytest_runtest_logreport(self, report): def pytest_runtest_logreport(self, report):
@ -140,17 +129,17 @@ class Report:
def pytest_sessionfinish(self, session): def pytest_sessionfinish(self, session):
report = [] report = []
report.append(f"# Report for {self.image_name} distribution") report.append(f"# Report for {self.distro_name} distribution")
report.append("\n## Supported Models") report.append("\n## Supported Models")
header = f"| Model Descriptor | {self.image_name} |" header = f"| Model Descriptor | {self.distro_name} |"
dividor = "|:---|:---|" dividor = "|:---|:---|"
report.append(header) report.append(header)
report.append(dividor) report.append(dividor)
rows = [] rows = []
if self.image_name in SUPPORTED_MODELS: if self.distro_name in SUPPORTED_MODELS:
for model in all_registered_models(): for model in all_registered_models():
if ( if (
"Instruct" not in model.core_model_id.value "Instruct" not in model.core_model_id.value
@ -158,16 +147,16 @@ class Report:
) or (model.variant): ) or (model.variant):
continue continue
row = f"| {model.core_model_id.value} |" row = f"| {model.core_model_id.value} |"
if model.core_model_id.value in SUPPORTED_MODELS[self.image_name]: if model.core_model_id.value in SUPPORTED_MODELS[self.distro_name]:
row += " ✅ |" row += " ✅ |"
else: else:
row += " ❌ |" row += " ❌ |"
rows.append(row) rows.append(row)
else: else:
supported_models = {m.identifier for m in self.client.models.list()} supported_models = {m.identifier for m in self.client.models.list()}
for model in featured_models_repo_names(): for hf_name, model in featured_models().items():
row = f"| {model} |" row = f"| {model.core_model_id.value} |"
if model in supported_models: if hf_name in supported_models:
row += " ✅ |" row += " ✅ |"
else: else:
row += " ❌ |" row += " ❌ |"
@ -200,20 +189,23 @@ class Report:
report.extend(test_table) report.extend(test_table)
name_map = {Api.vector_io: "Vector IO", Api.agents: "Agents"} name_map = {Api.vector_io: "Vector IO", Api.agents: "Agents"}
providers = self.client.providers.list()
for api_group in [Api.vector_io, Api.agents]: for api_group in [Api.vector_io, Api.agents]:
api_capitalized = name_map[api_group] api_capitalized = name_map[api_group]
report.append(f"\n## {api_capitalized}") report.append(f"\n## {api_capitalized}")
test_table = [ test_table = [
"| API | Capability | Test | Status |", "| Provider | API | Capability | Test | Status |",
"|:-----|:-----|:-----|:-----|", "|:-----|:-----|:-----|:-----|:-----|",
] ]
provider = [p for p in providers if p.api == str(api_group.name)]
provider_str = provider[0].provider_type if provider else ""
for api, capa_map in API_MAPS[api_group].items(): for api, capa_map in API_MAPS[api_group].items():
for capa, tests in capa_map.items(): for capa, tests in capa_map.items():
for test_name in tests: for test_name in tests:
test_nodeids = self.test_name_to_nodeid[test_name] test_nodeids = self.test_name_to_nodeid[test_name]
assert len(test_nodeids) > 0 assert len(test_nodeids) > 0
test_table.append( test_table.append(
f"| /{api} | {capa} | {test_name} | {self._print_result_icon(self.test_data[test_nodeids[0]])} |" f"| {provider_str} | /{api} | {capa} | {test_name} | {self._print_result_icon(self.test_data[test_nodeids[0]])} |"
) )
report.extend(test_table) report.extend(test_table)
@ -224,6 +216,9 @@ class Report:
def pytest_runtest_makereport(self, item, call): def pytest_runtest_makereport(self, item, call):
func_name = getattr(item, "originalname", item.name) func_name = getattr(item, "originalname", item.name)
self.test_name_to_nodeid[func_name].append(item.nodeid)
# Get values from fixtures for report output
if "text_model_id" in item.funcargs: if "text_model_id" in item.funcargs:
text_model = item.funcargs["text_model_id"].split("/")[1] text_model = item.funcargs["text_model_id"].split("/")[1]
self.text_model_id = self.text_model_id or text_model self.text_model_id = self.text_model_id or text_model
@ -231,7 +226,11 @@ class Report:
vision_model = item.funcargs["vision_model_id"].split("/")[1] vision_model = item.funcargs["vision_model_id"].split("/")[1]
self.vision_model_id = self.vision_model_id or vision_model self.vision_model_id = self.vision_model_id or vision_model
self.test_name_to_nodeid[func_name].append(item.nodeid) if self.client is None and "llama_stack_client" in item.funcargs:
self.client = item.funcargs["llama_stack_client"]
self.distro_name = (
self.distro_name or self.client.async_client.config.image_name
)
def _print_result_icon(self, result): def _print_result_icon(self, result):
if result == "Passed": if result == "Passed":