mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-07 02:58:21 +00:00
pre-commit scripts
This commit is contained in:
parent
9c26992390
commit
d4df61e47a
2 changed files with 98 additions and 0 deletions
|
@ -189,6 +189,20 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
uri="huggingface://datasets/llamastack/gpqa_0shot_cot?split=test&name=gpqa_main",
|
uri="huggingface://datasets/llamastack/gpqa_0shot_cot?split=test&name=gpqa_main",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
|
DatasetInput(
|
||||||
|
dataset_id="gpqa_cot_diamond",
|
||||||
|
purpose=DatasetPurpose.eval_messages_answer,
|
||||||
|
source=URIDataSource(
|
||||||
|
uri="huggingface://datasets/llamastack/gpqa_0shot_cot?split=test&name=gpqa_diamond",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
DatasetInput(
|
||||||
|
dataset_id="gpqa",
|
||||||
|
purpose=DatasetPurpose.eval_messages_answer,
|
||||||
|
source=URIDataSource(
|
||||||
|
uri="huggingface://datasets/llamastack/gpqa_0shot?split=test&name=gpqa_main",
|
||||||
|
),
|
||||||
|
),
|
||||||
DatasetInput(
|
DatasetInput(
|
||||||
dataset_id="math_500",
|
dataset_id="math_500",
|
||||||
purpose=DatasetPurpose.eval_messages_answer,
|
purpose=DatasetPurpose.eval_messages_answer,
|
||||||
|
@ -210,6 +224,34 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
uri="huggingface://datasets/llamastack/docvqa?split=val",
|
uri="huggingface://datasets/llamastack/docvqa?split=val",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
|
DatasetInput(
|
||||||
|
dataset_id="MMMU",
|
||||||
|
purpose=DatasetPurpose.eval_messages_answer,
|
||||||
|
source=URIDataSource(
|
||||||
|
uri="huggingface://datasets/llamastack/mmmu_v3?split=validation",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
DatasetInput(
|
||||||
|
dataset_id="MMMU_Pro_standard",
|
||||||
|
purpose=DatasetPurpose.eval_messages_answer,
|
||||||
|
source=URIDataSource(
|
||||||
|
uri="huggingface://datasets/llamastack/MMMU_Pro?name=standard%20(10%20options)&split=test",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
DatasetInput(
|
||||||
|
dataset_id="MMMU_Pro_vision",
|
||||||
|
purpose=DatasetPurpose.eval_messages_answer,
|
||||||
|
source=URIDataSource(
|
||||||
|
uri="huggingface://datasets/llamastack/MMMU_Pro?name=vision&split=test",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
DatasetInput(
|
||||||
|
dataset_id="ai2d",
|
||||||
|
purpose=DatasetPurpose.eval_messages_answer,
|
||||||
|
source=URIDataSource(
|
||||||
|
uri="huggingface://datasets/llamastack/ai2d?split=test",
|
||||||
|
),
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
default_benchmarks = [
|
default_benchmarks = [
|
||||||
|
@ -243,6 +285,16 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
dataset_id="docvqa",
|
dataset_id="docvqa",
|
||||||
scoring_functions=["basic::docvqa"],
|
scoring_functions=["basic::docvqa"],
|
||||||
),
|
),
|
||||||
|
BenchmarkInput(
|
||||||
|
benchmark_id="meta-reference-MMMU_Pro_standard",
|
||||||
|
dataset_id="MMMU_Pro_standard",
|
||||||
|
scoring_functions=["basic::regex_parser_multiple_choice_answer"],
|
||||||
|
),
|
||||||
|
BenchmarkInput(
|
||||||
|
benchmark_id="meta-reference-MMMU_Pro_vision",
|
||||||
|
dataset_id="MMMU_Pro_vision",
|
||||||
|
scoring_functions=["basic::regex_parser_multiple_choice_answer"],
|
||||||
|
),
|
||||||
]
|
]
|
||||||
return DistributionTemplate(
|
return DistributionTemplate(
|
||||||
name=name,
|
name=name,
|
||||||
|
|
|
@ -176,6 +176,18 @@ datasets:
|
||||||
uri: huggingface://datasets/llamastack/gpqa_0shot_cot?split=test&name=gpqa_main
|
uri: huggingface://datasets/llamastack/gpqa_0shot_cot?split=test&name=gpqa_main
|
||||||
metadata: {}
|
metadata: {}
|
||||||
dataset_id: gpqa_cot
|
dataset_id: gpqa_cot
|
||||||
|
- purpose: eval/messages-answer
|
||||||
|
source:
|
||||||
|
type: uri
|
||||||
|
uri: huggingface://datasets/llamastack/gpqa_0shot_cot?split=test&name=gpqa_diamond
|
||||||
|
metadata: {}
|
||||||
|
dataset_id: gpqa_cot_diamond
|
||||||
|
- purpose: eval/messages-answer
|
||||||
|
source:
|
||||||
|
type: uri
|
||||||
|
uri: huggingface://datasets/llamastack/gpqa_0shot?split=test&name=gpqa_main
|
||||||
|
metadata: {}
|
||||||
|
dataset_id: gpqa
|
||||||
- purpose: eval/messages-answer
|
- purpose: eval/messages-answer
|
||||||
source:
|
source:
|
||||||
type: uri
|
type: uri
|
||||||
|
@ -194,6 +206,30 @@ datasets:
|
||||||
uri: huggingface://datasets/llamastack/docvqa?split=val
|
uri: huggingface://datasets/llamastack/docvqa?split=val
|
||||||
metadata: {}
|
metadata: {}
|
||||||
dataset_id: docvqa
|
dataset_id: docvqa
|
||||||
|
- purpose: eval/messages-answer
|
||||||
|
source:
|
||||||
|
type: uri
|
||||||
|
uri: huggingface://datasets/llamastack/mmmu_v3?split=validation
|
||||||
|
metadata: {}
|
||||||
|
dataset_id: MMMU
|
||||||
|
- purpose: eval/messages-answer
|
||||||
|
source:
|
||||||
|
type: uri
|
||||||
|
uri: huggingface://datasets/llamastack/MMMU_Pro?name=standard%20(10%20options)&split=test
|
||||||
|
metadata: {}
|
||||||
|
dataset_id: MMMU_Pro_standard
|
||||||
|
- purpose: eval/messages-answer
|
||||||
|
source:
|
||||||
|
type: uri
|
||||||
|
uri: huggingface://datasets/llamastack/MMMU_Pro?name=vision&split=test
|
||||||
|
metadata: {}
|
||||||
|
dataset_id: MMMU_Pro_vision
|
||||||
|
- purpose: eval/messages-answer
|
||||||
|
source:
|
||||||
|
type: uri
|
||||||
|
uri: huggingface://datasets/llamastack/ai2d?split=test
|
||||||
|
metadata: {}
|
||||||
|
dataset_id: ai2d
|
||||||
scoring_fns: []
|
scoring_fns: []
|
||||||
benchmarks:
|
benchmarks:
|
||||||
- dataset_id: simpleqa
|
- dataset_id: simpleqa
|
||||||
|
@ -226,6 +262,16 @@ benchmarks:
|
||||||
- basic::docvqa
|
- basic::docvqa
|
||||||
metadata: {}
|
metadata: {}
|
||||||
benchmark_id: meta-reference-docvqa
|
benchmark_id: meta-reference-docvqa
|
||||||
|
- dataset_id: MMMU_Pro_standard
|
||||||
|
scoring_functions:
|
||||||
|
- basic::regex_parser_multiple_choice_answer
|
||||||
|
metadata: {}
|
||||||
|
benchmark_id: meta-reference-MMMU_Pro_standard
|
||||||
|
- dataset_id: MMMU_Pro_vision
|
||||||
|
scoring_functions:
|
||||||
|
- basic::regex_parser_multiple_choice_answer
|
||||||
|
metadata: {}
|
||||||
|
benchmark_id: meta-reference-MMMU_Pro_vision
|
||||||
tool_groups:
|
tool_groups:
|
||||||
- toolgroup_id: builtin::websearch
|
- toolgroup_id: builtin::websearch
|
||||||
provider_id: tavily-search
|
provider_id: tavily-search
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue