remove evals from top-level

2025-12-31 05:53:53 +00:00 · 2025-03-18 21:46:41 -07:00 · 2025-03-18 21:46:41 -07:00 · 8162f96d00
commit 8162f96d00
parent 37225347a3
80 changed files with 121 additions and 853 deletions
--- a/llama_stack/templates/open-benchmark/open_benchmark.py
+++ b/llama_stack/templates/open-benchmark/open_benchmark.py
@ -31,12 +31,14 @@ from llama_stack.providers.remote.vector_io.pgvector.config import (
 from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
 from llama_stack.templates.template import (
    DistributionTemplate,
-    RunConfigSettings,
    get_model_registry,
+    RunConfigSettings,
 )


-def get_inference_providers() -> Tuple[List[Provider], Dict[str, List[ProviderModelEntry]]]:
+def get_inference_providers() -> (
+    Tuple[List[Provider], Dict[str, List[ProviderModelEntry]]]
+):
    # in this template, we allow each API key to be optional
    providers = [
        (
@ -102,9 +104,7 @@ def get_distribution_template() -> DistributionTemplate:
        "safety": ["inline::llama-guard"],
        "agents": ["inline::meta-reference"],
        "telemetry": ["inline::meta-reference"],
-        "eval": ["inline::meta-reference"],
        "datasetio": ["remote::huggingface", "inline::localfs"],
-        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
        "tool_runtime": [
            "remote::brave-search",
            "remote::tavily-search",
@ -119,7 +119,9 @@ def get_distribution_template() -> DistributionTemplate:
        Provider(
            provider_id="sqlite-vec",
            provider_type="inline::sqlite-vec",
-            config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
+            config=SQLiteVectorIOConfig.sample_run_config(
+                f"~/.llama/distributions/{name}"
+            ),
        ),
        Provider(
            provider_id="${env.ENABLE_CHROMADB+chromadb}",
@ -210,33 +212,36 @@ def get_distribution_template() -> DistributionTemplate:
        ),
    ]

-    default_benchmarks = [
-        BenchmarkInput(
-            benchmark_id="meta-reference-simpleqa",
-            dataset_id="simpleqa",
-            grader_ids=["llm-as-judge::405b-simpleqa"],
-        ),
-        BenchmarkInput(
-            benchmark_id="meta-reference-mmlu-cot",
-            dataset_id="mmlu_cot",
-            grader_ids=["basic::regex_parser_multiple_choice_answer"],
-        ),
-        BenchmarkInput(
-            benchmark_id="meta-reference-gpqa-cot",
-            dataset_id="gpqa_cot",
-            grader_ids=["basic::regex_parser_multiple_choice_answer"],
-        ),
-        BenchmarkInput(
-            benchmark_id="meta-reference-math-500",
-            dataset_id="math_500",
-            grader_ids=["basic::regex_parser_math_response"],
-        ),
-        BenchmarkInput(
-            benchmark_id="meta-reference-bfcl",
-            dataset_id="bfcl",
-            grader_ids=["basic::bfcl"],
-        ),
-    ]
+    # TODO(xiyan): fix this back as registerable resources
+    default_benchmarks = []
+    # default_benchmarks = [
+    #     BenchmarkInput(
+    #         benchmark_id="meta-reference-simpleqa",
+    #         dataset_id="simpleqa",
+    #         grader_ids=["llm-as-judge::405b-simpleqa"],
+    #     ),
+    #     BenchmarkInput(
+    #         benchmark_id="meta-reference-mmlu-cot",
+    #         dataset_id="mmlu_cot",
+    #         grader_ids=["basic::regex_parser_multiple_choice_answer"],
+    #     ),
+    #     BenchmarkInput(
+    #         benchmark_id="meta-reference-gpqa-cot",
+    #         dataset_id="gpqa_cot",
+    #         grader_ids=["basic::regex_parser_multiple_choice_answer"],
+    #     ),
+    #     BenchmarkInput(
+    #         benchmark_id="meta-reference-math-500",
+    #         dataset_id="math_500",
+    #         grader_ids=["basic::regex_parser_math_response"],
+    #     ),
+    #     BenchmarkInput(
+    #         benchmark_id="meta-reference-bfcl",
+    #         dataset_id="bfcl",
+    #         grader_ids=["basic::bfcl"],
+    #     ),
+    # ]
+
    return DistributionTemplate(
        name=name,
        distro_type="self_hosted",