From ba453c3487aa1473f3a45cbab41af6065844f715 Mon Sep 17 00:00:00 2001 From: Sixian Yi Date: Tue, 28 Jan 2025 04:58:12 -0800 Subject: [PATCH] Report generation minor fixes (#884) # What does this PR do? fixed report generation: 1) do not initialize a new client in report.py - instead get it from pytest fixture 2) Add "provider" for "safety" and "agents" section 3) add logprobs functionality in "inference" section ## Test Plan See the regenerated report ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [ ] Ran pre-commit to handle lint / formatting issues. - [ ] Read the [contributor guideline](https://github.com/meta-llama/llama-stack/blob/main/CONTRIBUTING.md), Pull Request section? - [ ] Updated relevant documentation. - [ ] Wrote necessary unit or integration tests. --- llama_stack/templates/fireworks/report.md | 18 ++++---- llama_stack/templates/together/report.md | 18 ++++---- tests/client-sdk/metadata.py | 4 ++ tests/client-sdk/report.py | 53 +++++++++++------------ 4 files changed, 50 insertions(+), 43 deletions(-) diff --git a/llama_stack/templates/fireworks/report.md b/llama_stack/templates/fireworks/report.md index 00e8f6a55..2c1ccc943 100644 --- a/llama_stack/templates/fireworks/report.md +++ b/llama_stack/templates/fireworks/report.md @@ -27,18 +27,20 @@ | Llama-3.1-8B-Instruct | /chat_completion | non_streaming | test_text_chat_completion_non_streaming | ✅ | | Llama-3.1-8B-Instruct | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_streaming | ✅ | | Llama-3.1-8B-Instruct | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_non_streaming | ✅ | +| Llama-3.2-11B-Vision-Instruct | /chat_completion | log_probs | test_completion_log_probs_non_streaming | ✅ | +| Llama-3.2-11B-Vision-Instruct | /chat_completion | log_probs | test_completion_log_probs_streaming | ✅ | | Llama-3.1-8B-Instruct | /completion | streaming | test_text_completion_streaming | ✅ | | Llama-3.1-8B-Instruct | /completion | non_streaming | test_text_completion_non_streaming | ✅ | | Llama-3.1-8B-Instruct | /completion | structured_output | test_text_completion_structured_output | ✅ | ## Vector IO -| API | Capability | Test | Status | -|:-----|:-----|:-----|:-----| -| /retrieve | | test_vector_db_retrieve | ✅ | +| Provider | API | Capability | Test | Status | +|:-----|:-----|:-----|:-----|:-----| +| inline::faiss | /retrieve | | test_vector_db_retrieve | ✅ | ## Agents -| API | Capability | Test | Status | -|:-----|:-----|:-----|:-----| -| /create_agent_turn | rag | test_rag_agent | ✅ | -| /create_agent_turn | custom_tool | test_custom_tool | ✅ | -| /create_agent_turn | code_execution | test_code_interpreter_for_attachments | ✅ | +| Provider | API | Capability | Test | Status | +|:-----|:-----|:-----|:-----|:-----| +| inline::meta-reference | /create_agent_turn | rag | test_rag_agent | ✅ | +| inline::meta-reference | /create_agent_turn | custom_tool | test_custom_tool | ✅ | +| inline::meta-reference | /create_agent_turn | code_execution | test_code_interpreter_for_attachments | ✅ | diff --git a/llama_stack/templates/together/report.md b/llama_stack/templates/together/report.md index b5339c640..e125d5665 100644 --- a/llama_stack/templates/together/report.md +++ b/llama_stack/templates/together/report.md @@ -27,18 +27,20 @@ | Llama-3.1-8B-Instruct | /chat_completion | non_streaming | test_text_chat_completion_non_streaming | ✅ | | Llama-3.1-8B-Instruct | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_streaming | ✅ | | Llama-3.1-8B-Instruct | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_non_streaming | ✅ | +| Llama-3.2-11B-Vision-Instruct | /chat_completion | log_probs | test_completion_log_probs_non_streaming | ✅ | +| Llama-3.2-11B-Vision-Instruct | /chat_completion | log_probs | test_completion_log_probs_streaming | ✅ | | Llama-3.1-8B-Instruct | /completion | streaming | test_text_completion_streaming | ✅ | | Llama-3.1-8B-Instruct | /completion | non_streaming | test_text_completion_non_streaming | ✅ | | Llama-3.1-8B-Instruct | /completion | structured_output | test_text_completion_structured_output | ✅ | ## Vector IO -| API | Capability | Test | Status | -|:-----|:-----|:-----|:-----| -| /retrieve | | test_vector_db_retrieve | ✅ | +| Provider | API | Capability | Test | Status | +|:-----|:-----|:-----|:-----|:-----| +| inline::faiss | /retrieve | | test_vector_db_retrieve | ✅ | ## Agents -| API | Capability | Test | Status | -|:-----|:-----|:-----|:-----| -| /create_agent_turn | rag | test_rag_agent | ✅ | -| /create_agent_turn | custom_tool | test_custom_tool | ✅ | -| /create_agent_turn | code_execution | test_code_interpreter_for_attachments | ✅ | +| Provider | API | Capability | Test | Status | +|:-----|:-----|:-----|:-----|:-----| +| inline::meta-reference | /create_agent_turn | rag | test_rag_agent | ✅ | +| inline::meta-reference | /create_agent_turn | custom_tool | test_custom_tool | ✅ | +| inline::meta-reference | /create_agent_turn | code_execution | test_code_interpreter_for_attachments | ✅ | diff --git a/tests/client-sdk/metadata.py b/tests/client-sdk/metadata.py index badd7edff..55663c046 100644 --- a/tests/client-sdk/metadata.py +++ b/tests/client-sdk/metadata.py @@ -20,6 +20,10 @@ INFERENCE_API_CAPA_TEST_MAP = { "test_text_chat_completion_with_tool_calling_and_streaming", "test_text_chat_completion_with_tool_calling_and_non_streaming", ], + "log_probs": [ + "test_completion_log_probs_non_streaming", + "test_completion_log_probs_streaming", + ], }, "completion": { "streaming": ["test_text_completion_streaming"], diff --git a/tests/client-sdk/report.py b/tests/client-sdk/report.py index f8f224a37..f39ea02fa 100644 --- a/tests/client-sdk/report.py +++ b/tests/client-sdk/report.py @@ -23,18 +23,16 @@ from llama_models.sku_list import ( safety_models, ) -from llama_stack.distribution.library_client import LlamaStackAsLibraryClient from llama_stack.providers.datatypes import Api from llama_stack.providers.tests.env import get_env_or_fail -from llama_stack_client import LlamaStackClient from metadata import API_MAPS from pytest import CollectReport from termcolor import cprint -def featured_models_repo_names(): +def featured_models(): models = [ *llama3_instruct_models(), *llama3_1_instruct_models(), @@ -42,7 +40,7 @@ def featured_models_repo_names(): *llama3_3_instruct_models(), *safety_models(), ] - return [model.huggingface_repo for model in models if not model.variant] + return {model.huggingface_repo: model for model in models if not model.variant} SUPPORTED_MODELS = { @@ -99,25 +97,15 @@ class Report: if not config_path.exists(): raise ValueError(f"Config file {config_path} does not exist") self.output_path = Path(config_path.parent / "report.md") - self.client = LlamaStackAsLibraryClient( - config_path_or_template_name, - provider_data=None, - skip_logger_removal=True, - ) - self.client.initialize() - self.image_name = self.client.async_client.config.image_name + self.distro_name = None elif os.environ.get("LLAMA_STACK_BASE_URL"): url = get_env_or_fail("LLAMA_STACK_BASE_URL") - self.image_name = urlparse(url).netloc + self.distro_name = urlparse(url).netloc if report_path is None: raise ValueError( "Report path must be provided when LLAMA_STACK_BASE_URL is set" ) self.output_path = Path(report_path) - self.client = LlamaStackClient( - base_url=url, - provider_data=None, - ) else: raise ValueError("LLAMA_STACK_CONFIG or LLAMA_STACK_BASE_URL must be set") @@ -127,6 +115,7 @@ class Report: self.test_name_to_nodeid = defaultdict(list) self.vision_model_id = None self.text_model_id = None + self.client = None @pytest.hookimpl(tryfirst=True) def pytest_runtest_logreport(self, report): @@ -140,17 +129,17 @@ class Report: def pytest_sessionfinish(self, session): report = [] - report.append(f"# Report for {self.image_name} distribution") + report.append(f"# Report for {self.distro_name} distribution") report.append("\n## Supported Models") - header = f"| Model Descriptor | {self.image_name} |" + header = f"| Model Descriptor | {self.distro_name} |" dividor = "|:---|:---|" report.append(header) report.append(dividor) rows = [] - if self.image_name in SUPPORTED_MODELS: + if self.distro_name in SUPPORTED_MODELS: for model in all_registered_models(): if ( "Instruct" not in model.core_model_id.value @@ -158,16 +147,16 @@ class Report: ) or (model.variant): continue row = f"| {model.core_model_id.value} |" - if model.core_model_id.value in SUPPORTED_MODELS[self.image_name]: + if model.core_model_id.value in SUPPORTED_MODELS[self.distro_name]: row += " ✅ |" else: row += " ❌ |" rows.append(row) else: supported_models = {m.identifier for m in self.client.models.list()} - for model in featured_models_repo_names(): - row = f"| {model} |" - if model in supported_models: + for hf_name, model in featured_models().items(): + row = f"| {model.core_model_id.value} |" + if hf_name in supported_models: row += " ✅ |" else: row += " ❌ |" @@ -200,20 +189,23 @@ class Report: report.extend(test_table) name_map = {Api.vector_io: "Vector IO", Api.agents: "Agents"} + providers = self.client.providers.list() for api_group in [Api.vector_io, Api.agents]: api_capitalized = name_map[api_group] report.append(f"\n## {api_capitalized}") test_table = [ - "| API | Capability | Test | Status |", - "|:-----|:-----|:-----|:-----|", + "| Provider | API | Capability | Test | Status |", + "|:-----|:-----|:-----|:-----|:-----|", ] + provider = [p for p in providers if p.api == str(api_group.name)] + provider_str = provider[0].provider_type if provider else "" for api, capa_map in API_MAPS[api_group].items(): for capa, tests in capa_map.items(): for test_name in tests: test_nodeids = self.test_name_to_nodeid[test_name] assert len(test_nodeids) > 0 test_table.append( - f"| /{api} | {capa} | {test_name} | {self._print_result_icon(self.test_data[test_nodeids[0]])} |" + f"| {provider_str} | /{api} | {capa} | {test_name} | {self._print_result_icon(self.test_data[test_nodeids[0]])} |" ) report.extend(test_table) @@ -224,6 +216,9 @@ class Report: def pytest_runtest_makereport(self, item, call): func_name = getattr(item, "originalname", item.name) + self.test_name_to_nodeid[func_name].append(item.nodeid) + + # Get values from fixtures for report output if "text_model_id" in item.funcargs: text_model = item.funcargs["text_model_id"].split("/")[1] self.text_model_id = self.text_model_id or text_model @@ -231,7 +226,11 @@ class Report: vision_model = item.funcargs["vision_model_id"].split("/")[1] self.vision_model_id = self.vision_model_id or vision_model - self.test_name_to_nodeid[func_name].append(item.nodeid) + if self.client is None and "llama_stack_client" in item.funcargs: + self.client = item.funcargs["llama_stack_client"] + self.distro_name = ( + self.distro_name or self.client.async_client.config.image_name + ) def _print_result_icon(self, result): if result == "Passed":