feat(eval api): (2.1/n) fix resolver for benchmark routing table + fix precommit (#1691)

# What does this PR do? - fixes routing table so that `llama stack run` works - fixes pre-commit - one of many fixes to address implementation fix [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan ``` llama stack run ``` [//]: # (## Documentation)
2025-03-18 21:09:49 -07:00 · 2025-03-18 21:09:49 -07:00 · 08c0c5505e
commit 08c0c5505e
parent bf135f38b1
4 changed files with 31 additions and 26 deletions
--- a/llama_stack/templates/open-benchmark/open_benchmark.py
+++ b/llama_stack/templates/open-benchmark/open_benchmark.py
@ -214,27 +214,27 @@ def get_distribution_template() -> DistributionTemplate:
        BenchmarkInput(
            benchmark_id="meta-reference-simpleqa",
            dataset_id="simpleqa",
-            scoring_functions=["llm-as-judge::405b-simpleqa"],
+            grader_ids=["llm-as-judge::405b-simpleqa"],
        ),
        BenchmarkInput(
            benchmark_id="meta-reference-mmlu-cot",
            dataset_id="mmlu_cot",
-            scoring_functions=["basic::regex_parser_multiple_choice_answer"],
+            grader_ids=["basic::regex_parser_multiple_choice_answer"],
        ),
        BenchmarkInput(
            benchmark_id="meta-reference-gpqa-cot",
            dataset_id="gpqa_cot",
-            scoring_functions=["basic::regex_parser_multiple_choice_answer"],
+            grader_ids=["basic::regex_parser_multiple_choice_answer"],
        ),
        BenchmarkInput(
            benchmark_id="meta-reference-math-500",
            dataset_id="math_500",
-            scoring_functions=["basic::regex_parser_math_response"],
+            grader_ids=["basic::regex_parser_math_response"],
        ),
        BenchmarkInput(
            benchmark_id="meta-reference-bfcl",
            dataset_id="bfcl",
-            scoring_functions=["basic::bfcl"],
+            grader_ids=["basic::bfcl"],
        ),
    ]
    return DistributionTemplate(