Merge remote-tracking branch 'origin/main' into if_eval

This commit is contained in:
Botao Chen 2025-03-19 15:17:05 -07:00
commit 507e4e17e6
8 changed files with 289 additions and 13 deletions

View file

@ -210,6 +210,13 @@ def get_distribution_template() -> DistributionTemplate:
uri="huggingface://datasets/llamastack/IfEval?split=train",
),
),
DatasetInput(
dataset_id="docvqa",
purpose=DatasetPurpose.eval_messages_answer,
source=URIDataSource(
uri="huggingface://datasets/llamastack/docvqa?split=val",
)
)
]
default_benchmarks = [
@ -243,6 +250,11 @@ def get_distribution_template() -> DistributionTemplate:
dataset_id="ifeval",
scoring_functions=["basic::ifeval"],
),
BenchmarkInput(
benchmark_id="meta-reference-docvqa",
dataset_id="docvqa",
scoring_functions=["basic::docvqa"],
)
]
return DistributionTemplate(
name=name,

View file

@ -188,12 +188,6 @@ datasets:
uri: huggingface://datasets/llamastack/bfcl_v3?split=train
metadata: {}
dataset_id: bfcl
- purpose: eval/messages-answer
source:
type: uri
uri: huggingface://datasets/llamastack/IfEval?split=train
metadata: {}
dataset_id: ifeval
scoring_fns: []
benchmarks:
- dataset_id: simpleqa
@ -221,11 +215,6 @@ benchmarks:
- basic::bfcl
metadata: {}
benchmark_id: meta-reference-bfcl
- dataset_id: ifeval
scoring_functions:
- basic::ifeval
metadata: {}
benchmark_id: meta-reference-ifeval
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search