mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-07 11:08:20 +00:00
regen pre-commit hooks
This commit is contained in:
parent
d4df61e47a
commit
2a5374dbe3
2 changed files with 40 additions and 0 deletions
|
@ -270,6 +270,16 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
dataset_id="gpqa_cot",
|
dataset_id="gpqa_cot",
|
||||||
scoring_functions=["basic::regex_parser_multiple_choice_answer"],
|
scoring_functions=["basic::regex_parser_multiple_choice_answer"],
|
||||||
),
|
),
|
||||||
|
BenchmarkInput(
|
||||||
|
benchmark_id="meta-reference-gpqa-cot-diamond",
|
||||||
|
dataset_id="gpqa_cot_diamond",
|
||||||
|
scoring_functions=["basic::regex_parser_multiple_choice_answer"],
|
||||||
|
),
|
||||||
|
BenchmarkInput(
|
||||||
|
benchmark_id="meta-reference-gpqa",
|
||||||
|
dataset_id="gpqa",
|
||||||
|
scoring_functions=["basic::regex_parser_multiple_choice_answer"],
|
||||||
|
),
|
||||||
BenchmarkInput(
|
BenchmarkInput(
|
||||||
benchmark_id="meta-reference-math-500",
|
benchmark_id="meta-reference-math-500",
|
||||||
dataset_id="math_500",
|
dataset_id="math_500",
|
||||||
|
@ -285,6 +295,11 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
dataset_id="docvqa",
|
dataset_id="docvqa",
|
||||||
scoring_functions=["basic::docvqa"],
|
scoring_functions=["basic::docvqa"],
|
||||||
),
|
),
|
||||||
|
BenchmarkInput(
|
||||||
|
benchmark_id="meta-reference-MMMU",
|
||||||
|
dataset_id="MMMU",
|
||||||
|
scoring_functions=["basic::regex_parser_multiple_choice_answer"],
|
||||||
|
),
|
||||||
BenchmarkInput(
|
BenchmarkInput(
|
||||||
benchmark_id="meta-reference-MMMU_Pro_standard",
|
benchmark_id="meta-reference-MMMU_Pro_standard",
|
||||||
dataset_id="MMMU_Pro_standard",
|
dataset_id="MMMU_Pro_standard",
|
||||||
|
@ -295,6 +310,11 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
dataset_id="MMMU_Pro_vision",
|
dataset_id="MMMU_Pro_vision",
|
||||||
scoring_functions=["basic::regex_parser_multiple_choice_answer"],
|
scoring_functions=["basic::regex_parser_multiple_choice_answer"],
|
||||||
),
|
),
|
||||||
|
BenchmarkInput(
|
||||||
|
benchmark_id="meta-reference-ai2d",
|
||||||
|
dataset_id="ai2d",
|
||||||
|
scoring_functions=["basic::regex_parser_multiple_choice_answer"],
|
||||||
|
),
|
||||||
]
|
]
|
||||||
return DistributionTemplate(
|
return DistributionTemplate(
|
||||||
name=name,
|
name=name,
|
||||||
|
|
|
@ -247,6 +247,16 @@ benchmarks:
|
||||||
- basic::regex_parser_multiple_choice_answer
|
- basic::regex_parser_multiple_choice_answer
|
||||||
metadata: {}
|
metadata: {}
|
||||||
benchmark_id: meta-reference-gpqa-cot
|
benchmark_id: meta-reference-gpqa-cot
|
||||||
|
- dataset_id: gpqa_cot_diamond
|
||||||
|
scoring_functions:
|
||||||
|
- basic::regex_parser_multiple_choice_answer
|
||||||
|
metadata: {}
|
||||||
|
benchmark_id: meta-reference-gpqa-cot-diamond
|
||||||
|
- dataset_id: gpqa
|
||||||
|
scoring_functions:
|
||||||
|
- basic::regex_parser_multiple_choice_answer
|
||||||
|
metadata: {}
|
||||||
|
benchmark_id: meta-reference-gpqa
|
||||||
- dataset_id: math_500
|
- dataset_id: math_500
|
||||||
scoring_functions:
|
scoring_functions:
|
||||||
- basic::regex_parser_math_response
|
- basic::regex_parser_math_response
|
||||||
|
@ -262,6 +272,11 @@ benchmarks:
|
||||||
- basic::docvqa
|
- basic::docvqa
|
||||||
metadata: {}
|
metadata: {}
|
||||||
benchmark_id: meta-reference-docvqa
|
benchmark_id: meta-reference-docvqa
|
||||||
|
- dataset_id: MMMU
|
||||||
|
scoring_functions:
|
||||||
|
- basic::regex_parser_multiple_choice_answer
|
||||||
|
metadata: {}
|
||||||
|
benchmark_id: meta-reference-MMMU
|
||||||
- dataset_id: MMMU_Pro_standard
|
- dataset_id: MMMU_Pro_standard
|
||||||
scoring_functions:
|
scoring_functions:
|
||||||
- basic::regex_parser_multiple_choice_answer
|
- basic::regex_parser_multiple_choice_answer
|
||||||
|
@ -272,6 +287,11 @@ benchmarks:
|
||||||
- basic::regex_parser_multiple_choice_answer
|
- basic::regex_parser_multiple_choice_answer
|
||||||
metadata: {}
|
metadata: {}
|
||||||
benchmark_id: meta-reference-MMMU_Pro_vision
|
benchmark_id: meta-reference-MMMU_Pro_vision
|
||||||
|
- dataset_id: ai2d
|
||||||
|
scoring_functions:
|
||||||
|
- basic::regex_parser_multiple_choice_answer
|
||||||
|
metadata: {}
|
||||||
|
benchmark_id: meta-reference-ai2d
|
||||||
tool_groups:
|
tool_groups:
|
||||||
- toolgroup_id: builtin::websearch
|
- toolgroup_id: builtin::websearch
|
||||||
provider_id: tavily-search
|
provider_id: tavily-search
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue