llama-stack/tests/client-sdk/report.py
Hardik Shah 632e60439a
Fix report generation for url endpoints (#876)
Earlier, we would have some unknown magic to identify the path for
remote endpoints when testing client_sdk/tests.
Removed that and now you have to explicitly pass a path
2025-01-24 13:15:44 -08:00

259 lines
9.7 KiB
Python

# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import importlib
import os
from collections import defaultdict
from pathlib import Path
from typing import Optional
from urllib.parse import urlparse
import pytest
from llama_models.datatypes import CoreModelId
from llama_models.sku_list import (
all_registered_models,
llama3_1_instruct_models,
llama3_2_instruct_models,
llama3_3_instruct_models,
llama3_instruct_models,
safety_models,
)
from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
from llama_stack.providers.datatypes import Api
from llama_stack.providers.tests.env import get_env_or_fail
from llama_stack_client import LlamaStackClient
from metadata import API_MAPS
from pytest import CollectReport
from termcolor import cprint
def featured_models_repo_names():
models = [
*llama3_instruct_models(),
*llama3_1_instruct_models(),
*llama3_2_instruct_models(),
*llama3_3_instruct_models(),
*safety_models(),
]
return [model.huggingface_repo for model in models if not model.variant]
SUPPORTED_MODELS = {
"ollama": set(
[
CoreModelId.llama3_1_8b_instruct.value,
CoreModelId.llama3_1_8b_instruct.value,
CoreModelId.llama3_1_70b_instruct.value,
CoreModelId.llama3_1_70b_instruct.value,
CoreModelId.llama3_1_405b_instruct.value,
CoreModelId.llama3_1_405b_instruct.value,
CoreModelId.llama3_2_1b_instruct.value,
CoreModelId.llama3_2_1b_instruct.value,
CoreModelId.llama3_2_3b_instruct.value,
CoreModelId.llama3_2_3b_instruct.value,
CoreModelId.llama3_2_11b_vision_instruct.value,
CoreModelId.llama3_2_11b_vision_instruct.value,
CoreModelId.llama3_2_90b_vision_instruct.value,
CoreModelId.llama3_2_90b_vision_instruct.value,
CoreModelId.llama3_3_70b_instruct.value,
CoreModelId.llama_guard_3_8b.value,
CoreModelId.llama_guard_3_1b.value,
]
),
"tgi": set(
[
model.core_model_id.value
for model in all_registered_models()
if model.huggingface_repo
]
),
"vllm": set(
[
model.core_model_id.value
for model in all_registered_models()
if model.huggingface_repo
]
),
}
class Report:
def __init__(self, report_path: Optional[str] = None):
if os.environ.get("LLAMA_STACK_CONFIG"):
config_path_or_template_name = get_env_or_fail("LLAMA_STACK_CONFIG")
if config_path_or_template_name.endswith(".yaml"):
config_path = Path(config_path_or_template_name)
else:
config_path = Path(
importlib.resources.files("llama_stack")
/ f"templates/{config_path_or_template_name}/run.yaml"
)
if not config_path.exists():
raise ValueError(f"Config file {config_path} does not exist")
self.output_path = Path(config_path.parent / "report.md")
self.client = LlamaStackAsLibraryClient(
config_path_or_template_name,
provider_data=None,
skip_logger_removal=True,
)
self.client.initialize()
self.image_name = self.client.async_client.config.image_name
elif os.environ.get("LLAMA_STACK_BASE_URL"):
url = get_env_or_fail("LLAMA_STACK_BASE_URL")
self.image_name = urlparse(url).netloc
if report_path is None:
raise ValueError(
"Report path must be provided when LLAMA_STACK_BASE_URL is set"
)
self.output_path = Path(report_path)
self.client = LlamaStackClient(
base_url=url,
provider_data=None,
)
else:
raise ValueError("LLAMA_STACK_CONFIG or LLAMA_STACK_BASE_URL must be set")
self.report_data = defaultdict(dict)
# test function -> test nodeid
self.test_data = dict()
self.test_name_to_nodeid = defaultdict(list)
self.vision_model_id = None
self.text_model_id = None
@pytest.hookimpl(tryfirst=True)
def pytest_runtest_logreport(self, report):
# This hook is called in several phases, including setup, call and teardown
# The test is considered failed / error if any of the outcomes is not "Passed"
outcome = self._process_outcome(report)
if report.nodeid not in self.test_data:
self.test_data[report.nodeid] = outcome
elif self.test_data[report.nodeid] != outcome and outcome != "Passed":
self.test_data[report.nodeid] = outcome
def pytest_sessionfinish(self, session):
report = []
report.append(f"# Report for {self.image_name} distribution")
report.append("\n## Supported Models")
header = f"| Model Descriptor | {self.image_name} |"
dividor = "|:---|:---|"
report.append(header)
report.append(dividor)
rows = []
if self.image_name in SUPPORTED_MODELS:
for model in all_registered_models():
if (
"Instruct" not in model.core_model_id.value
and "Guard" not in model.core_model_id.value
) or (model.variant):
continue
row = f"| {model.core_model_id.value} |"
if model.core_model_id.value in SUPPORTED_MODELS[self.image_name]:
row += " ✅ |"
else:
row += " ❌ |"
rows.append(row)
else:
supported_models = {m.identifier for m in self.client.models.list()}
for model in featured_models_repo_names():
row = f"| {model} |"
if model in supported_models:
row += " ✅ |"
else:
row += " ❌ |"
rows.append(row)
report.extend(rows)
report.append("\n## Inference")
test_table = [
"| Model | API | Capability | Test | Status |",
"|:----- |:-----|:-----|:-----|:-----|",
]
for api, capa_map in API_MAPS[Api.inference].items():
for capa, tests in capa_map.items():
for test_name in tests:
model_id = (
self.text_model_id
if "text" in test_name
else self.vision_model_id
)
test_nodeids = self.test_name_to_nodeid[test_name]
assert len(test_nodeids) > 0
# There might be more than one parametrizations for the same test function. We take
# the result of the first one for now. Ideally we should mark the test as failed if
# any of the parametrizations failed.
test_table.append(
f"| {model_id} | /{api} | {capa} | {test_name} | {self._print_result_icon(self.test_data[test_nodeids[0]])} |"
)
report.extend(test_table)
name_map = {Api.vector_io: "Vector IO", Api.agents: "Agents"}
for api_group in [Api.vector_io, Api.agents]:
api_capitalized = name_map[api_group]
report.append(f"\n## {api_capitalized}")
test_table = [
"| API | Capability | Test | Status |",
"|:-----|:-----|:-----|:-----|",
]
for api, capa_map in API_MAPS[api_group].items():
for capa, tests in capa_map.items():
for test_name in tests:
test_nodeids = self.test_name_to_nodeid[test_name]
assert len(test_nodeids) > 0
test_table.append(
f"| /{api} | {capa} | {test_name} | {self._print_result_icon(self.test_data[test_nodeids[0]])} |"
)
report.extend(test_table)
output_file = self.output_path
text = "\n".join(report) + "\n"
output_file.write_text(text)
cprint(f"\nReport generated: {output_file.absolute()}", "green")
def pytest_runtest_makereport(self, item, call):
func_name = getattr(item, "originalname", item.name)
if "text_model_id" in item.funcargs:
text_model = item.funcargs["text_model_id"].split("/")[1]
self.text_model_id = self.text_model_id or text_model
elif "vision_model_id" in item.funcargs:
vision_model = item.funcargs["vision_model_id"].split("/")[1]
self.vision_model_id = self.vision_model_id or vision_model
self.test_name_to_nodeid[func_name].append(item.nodeid)
def _print_result_icon(self, result):
if result == "Passed":
return ""
elif result == "Failed" or result == "Error":
return ""
else:
# result == "Skipped":
return "⏭️"
def _process_outcome(self, report: CollectReport):
if self._is_error(report):
return "Error"
if hasattr(report, "wasxfail"):
if report.outcome in ["passed", "failed"]:
return "XPassed"
if report.outcome == "skipped":
return "XFailed"
return report.outcome.capitalize()
def _is_error(self, report: CollectReport):
return (
report.when in ["setup", "teardown", "collect"]
and report.outcome == "failed"
)