pre-commit scripts

This commit is contained in:
Yang Yang 2025-03-16 23:55:49 -07:00
parent 9c26992390
commit d4df61e47a
2 changed files with 98 additions and 0 deletions

View file

@ -189,6 +189,20 @@ def get_distribution_template() -> DistributionTemplate:
uri="huggingface://datasets/llamastack/gpqa_0shot_cot?split=test&name=gpqa_main",
),
),
DatasetInput(
dataset_id="gpqa_cot_diamond",
purpose=DatasetPurpose.eval_messages_answer,
source=URIDataSource(
uri="huggingface://datasets/llamastack/gpqa_0shot_cot?split=test&name=gpqa_diamond",
),
),
DatasetInput(
dataset_id="gpqa",
purpose=DatasetPurpose.eval_messages_answer,
source=URIDataSource(
uri="huggingface://datasets/llamastack/gpqa_0shot?split=test&name=gpqa_main",
),
),
DatasetInput(
dataset_id="math_500",
purpose=DatasetPurpose.eval_messages_answer,
@ -210,6 +224,34 @@ def get_distribution_template() -> DistributionTemplate:
uri="huggingface://datasets/llamastack/docvqa?split=val",
),
),
DatasetInput(
dataset_id="MMMU",
purpose=DatasetPurpose.eval_messages_answer,
source=URIDataSource(
uri="huggingface://datasets/llamastack/mmmu_v3?split=validation",
),
),
DatasetInput(
dataset_id="MMMU_Pro_standard",
purpose=DatasetPurpose.eval_messages_answer,
source=URIDataSource(
uri="huggingface://datasets/llamastack/MMMU_Pro?name=standard%20(10%20options)&split=test",
),
),
DatasetInput(
dataset_id="MMMU_Pro_vision",
purpose=DatasetPurpose.eval_messages_answer,
source=URIDataSource(
uri="huggingface://datasets/llamastack/MMMU_Pro?name=vision&split=test",
),
),
DatasetInput(
dataset_id="ai2d",
purpose=DatasetPurpose.eval_messages_answer,
source=URIDataSource(
uri="huggingface://datasets/llamastack/ai2d?split=test",
),
),
]
default_benchmarks = [
@ -243,6 +285,16 @@ def get_distribution_template() -> DistributionTemplate:
dataset_id="docvqa",
scoring_functions=["basic::docvqa"],
),
BenchmarkInput(
benchmark_id="meta-reference-MMMU_Pro_standard",
dataset_id="MMMU_Pro_standard",
scoring_functions=["basic::regex_parser_multiple_choice_answer"],
),
BenchmarkInput(
benchmark_id="meta-reference-MMMU_Pro_vision",
dataset_id="MMMU_Pro_vision",
scoring_functions=["basic::regex_parser_multiple_choice_answer"],
),
]
return DistributionTemplate(
name=name,

View file

@ -176,6 +176,18 @@ datasets:
uri: huggingface://datasets/llamastack/gpqa_0shot_cot?split=test&name=gpqa_main
metadata: {}
dataset_id: gpqa_cot
- purpose: eval/messages-answer
source:
type: uri
uri: huggingface://datasets/llamastack/gpqa_0shot_cot?split=test&name=gpqa_diamond
metadata: {}
dataset_id: gpqa_cot_diamond
- purpose: eval/messages-answer
source:
type: uri
uri: huggingface://datasets/llamastack/gpqa_0shot?split=test&name=gpqa_main
metadata: {}
dataset_id: gpqa
- purpose: eval/messages-answer
source:
type: uri
@ -194,6 +206,30 @@ datasets:
uri: huggingface://datasets/llamastack/docvqa?split=val
metadata: {}
dataset_id: docvqa
- purpose: eval/messages-answer
source:
type: uri
uri: huggingface://datasets/llamastack/mmmu_v3?split=validation
metadata: {}
dataset_id: MMMU
- purpose: eval/messages-answer
source:
type: uri
uri: huggingface://datasets/llamastack/MMMU_Pro?name=standard%20(10%20options)&split=test
metadata: {}
dataset_id: MMMU_Pro_standard
- purpose: eval/messages-answer
source:
type: uri
uri: huggingface://datasets/llamastack/MMMU_Pro?name=vision&split=test
metadata: {}
dataset_id: MMMU_Pro_vision
- purpose: eval/messages-answer
source:
type: uri
uri: huggingface://datasets/llamastack/ai2d?split=test
metadata: {}
dataset_id: ai2d
scoring_fns: []
benchmarks:
- dataset_id: simpleqa
@ -226,6 +262,16 @@ benchmarks:
- basic::docvqa
metadata: {}
benchmark_id: meta-reference-docvqa
- dataset_id: MMMU_Pro_standard
scoring_functions:
- basic::regex_parser_multiple_choice_answer
metadata: {}
benchmark_id: meta-reference-MMMU_Pro_standard
- dataset_id: MMMU_Pro_vision
scoring_functions:
- basic::regex_parser_multiple_choice_answer
metadata: {}
benchmark_id: meta-reference-MMMU_Pro_vision
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search