This commit is contained in:
Xi Yan 2025-03-23 15:48:14 -07:00
commit a54d757ade
197 changed files with 9392 additions and 3089 deletions

View file

@ -167,7 +167,6 @@ def get_distribution_template() -> DistributionTemplate:
default_datasets = [
DatasetInput(
dataset_id="simpleqa",
provider_id="huggingface",
purpose=DatasetPurpose.eval_messages_answer,
source=URIDataSource(
uri="huggingface://datasets/llamastack/simpleqa?split=train",
@ -175,7 +174,6 @@ def get_distribution_template() -> DistributionTemplate:
),
DatasetInput(
dataset_id="mmlu_cot",
provider_id="huggingface",
purpose=DatasetPurpose.eval_messages_answer,
source=URIDataSource(
uri="huggingface://datasets/llamastack/mmlu_cot?split=test&name=all",
@ -183,7 +181,6 @@ def get_distribution_template() -> DistributionTemplate:
),
DatasetInput(
dataset_id="gpqa_cot",
provider_id="huggingface",
purpose=DatasetPurpose.eval_messages_answer,
source=URIDataSource(
uri="huggingface://datasets/llamastack/gpqa_0shot_cot?split=test&name=gpqa_main",
@ -191,7 +188,6 @@ def get_distribution_template() -> DistributionTemplate:
),
DatasetInput(
dataset_id="math_500",
provider_id="huggingface",
purpose=DatasetPurpose.eval_messages_answer,
source=URIDataSource(
uri="huggingface://datasets/llamastack/math_500?split=test",
@ -199,12 +195,25 @@ def get_distribution_template() -> DistributionTemplate:
),
DatasetInput(
dataset_id="bfcl",
provider_id="huggingface",
purpose=DatasetPurpose.eval_messages_answer,
source=URIDataSource(
uri="huggingface://datasets/llamastack/bfcl_v3?split=train",
),
),
DatasetInput(
dataset_id="ifeval",
purpose=DatasetPurpose.eval_messages_answer,
source=URIDataSource(
uri="huggingface://datasets/llamastack/IfEval?split=train",
),
),
DatasetInput(
dataset_id="docvqa",
purpose=DatasetPurpose.eval_messages_answer,
source=URIDataSource(
uri="huggingface://datasets/llamastack/docvqa?split=val",
),
),
]
# TODO(xiyan): fix this back as registerable resources
@ -234,6 +243,16 @@ def get_distribution_template() -> DistributionTemplate:
# dataset_id="bfcl",
# grader_ids=["basic::bfcl"],
# ),
# BenchmarkInput(
# benchmark_id="meta-reference-ifeval",
# dataset_id="ifeval",
# grader_ids=["basic::ifeval"],
# ),
# BenchmarkInput(
# benchmark_id="meta-reference-docvqa",
# dataset_id="docvqa",
# grader_ids=["basic::docvqa"],
# ),
# ]
return DistributionTemplate(
@ -258,7 +277,7 @@ def get_distribution_template() -> DistributionTemplate:
},
run_config_env_vars={
"LLAMA_STACK_PORT": (
"5001",
"8321",
"Port for the Llama Stack distribution server",
),
"TOGETHER_API_KEY": (

View file

@ -66,7 +66,6 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/open-benchmark/trace_store.db}
datasetio:
@ -143,28 +142,24 @@ datasets:
uri: huggingface://datasets/llamastack/simpleqa?split=train
metadata: {}
dataset_id: simpleqa
provider_id: huggingface
- purpose: eval/messages-answer
source:
type: uri
uri: huggingface://datasets/llamastack/mmlu_cot?split=test&name=all
metadata: {}
dataset_id: mmlu_cot
provider_id: huggingface
- purpose: eval/messages-answer
source:
type: uri
uri: huggingface://datasets/llamastack/gpqa_0shot_cot?split=test&name=gpqa_main
metadata: {}
dataset_id: gpqa_cot
provider_id: huggingface
- purpose: eval/messages-answer
source:
type: uri
uri: huggingface://datasets/llamastack/math_500?split=test
metadata: {}
dataset_id: math_500
provider_id: huggingface
- purpose: eval/messages-answer
source:
type: uri