regen pre-commit hooks

This commit is contained in:
Yang Yang 2025-03-19 15:04:35 -07:00
parent d4df61e47a
commit 2a5374dbe3
2 changed files with 40 additions and 0 deletions

View file

@ -270,6 +270,16 @@ def get_distribution_template() -> DistributionTemplate:
dataset_id="gpqa_cot",
scoring_functions=["basic::regex_parser_multiple_choice_answer"],
),
BenchmarkInput(
benchmark_id="meta-reference-gpqa-cot-diamond",
dataset_id="gpqa_cot_diamond",
scoring_functions=["basic::regex_parser_multiple_choice_answer"],
),
BenchmarkInput(
benchmark_id="meta-reference-gpqa",
dataset_id="gpqa",
scoring_functions=["basic::regex_parser_multiple_choice_answer"],
),
BenchmarkInput(
benchmark_id="meta-reference-math-500",
dataset_id="math_500",
@ -285,6 +295,11 @@ def get_distribution_template() -> DistributionTemplate:
dataset_id="docvqa",
scoring_functions=["basic::docvqa"],
),
BenchmarkInput(
benchmark_id="meta-reference-MMMU",
dataset_id="MMMU",
scoring_functions=["basic::regex_parser_multiple_choice_answer"],
),
BenchmarkInput(
benchmark_id="meta-reference-MMMU_Pro_standard",
dataset_id="MMMU_Pro_standard",
@ -295,6 +310,11 @@ def get_distribution_template() -> DistributionTemplate:
dataset_id="MMMU_Pro_vision",
scoring_functions=["basic::regex_parser_multiple_choice_answer"],
),
BenchmarkInput(
benchmark_id="meta-reference-ai2d",
dataset_id="ai2d",
scoring_functions=["basic::regex_parser_multiple_choice_answer"],
),
]
return DistributionTemplate(
name=name,

View file

@ -247,6 +247,16 @@ benchmarks:
- basic::regex_parser_multiple_choice_answer
metadata: {}
benchmark_id: meta-reference-gpqa-cot
- dataset_id: gpqa_cot_diamond
scoring_functions:
- basic::regex_parser_multiple_choice_answer
metadata: {}
benchmark_id: meta-reference-gpqa-cot-diamond
- dataset_id: gpqa
scoring_functions:
- basic::regex_parser_multiple_choice_answer
metadata: {}
benchmark_id: meta-reference-gpqa
- dataset_id: math_500
scoring_functions:
- basic::regex_parser_math_response
@ -262,6 +272,11 @@ benchmarks:
- basic::docvqa
metadata: {}
benchmark_id: meta-reference-docvqa
- dataset_id: MMMU
scoring_functions:
- basic::regex_parser_multiple_choice_answer
metadata: {}
benchmark_id: meta-reference-MMMU
- dataset_id: MMMU_Pro_standard
scoring_functions:
- basic::regex_parser_multiple_choice_answer
@ -272,6 +287,11 @@ benchmarks:
- basic::regex_parser_multiple_choice_answer
metadata: {}
benchmark_id: meta-reference-MMMU_Pro_vision
- dataset_id: ai2d
scoring_functions:
- basic::regex_parser_multiple_choice_answer
metadata: {}
benchmark_id: meta-reference-ai2d
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search