merge

2025-03-23 15:48:14 -07:00 · 2025-03-23 15:48:14 -07:00 · a54d757ade
commit a54d757ade
parent c1d18283d2 b1513e66d5
197 changed files with 9392 additions and 3089 deletions
--- a/llama_stack/templates/open-benchmark/open_benchmark.py
+++ b/llama_stack/templates/open-benchmark/open_benchmark.py
@ -167,7 +167,6 @@ def get_distribution_template() -> DistributionTemplate:
    default_datasets = [
        DatasetInput(
            dataset_id="simpleqa",
-            provider_id="huggingface",
            purpose=DatasetPurpose.eval_messages_answer,
            source=URIDataSource(
                uri="huggingface://datasets/llamastack/simpleqa?split=train",
@ -175,7 +174,6 @@ def get_distribution_template() -> DistributionTemplate:
        ),
        DatasetInput(
            dataset_id="mmlu_cot",
-            provider_id="huggingface",
            purpose=DatasetPurpose.eval_messages_answer,
            source=URIDataSource(
                uri="huggingface://datasets/llamastack/mmlu_cot?split=test&name=all",
@ -183,7 +181,6 @@ def get_distribution_template() -> DistributionTemplate:
        ),
        DatasetInput(
            dataset_id="gpqa_cot",
-            provider_id="huggingface",
            purpose=DatasetPurpose.eval_messages_answer,
            source=URIDataSource(
                uri="huggingface://datasets/llamastack/gpqa_0shot_cot?split=test&name=gpqa_main",
@ -191,7 +188,6 @@ def get_distribution_template() -> DistributionTemplate:
        ),
        DatasetInput(
            dataset_id="math_500",
-            provider_id="huggingface",
            purpose=DatasetPurpose.eval_messages_answer,
            source=URIDataSource(
                uri="huggingface://datasets/llamastack/math_500?split=test",
@ -199,12 +195,25 @@ def get_distribution_template() -> DistributionTemplate:
        ),
        DatasetInput(
            dataset_id="bfcl",
-            provider_id="huggingface",
            purpose=DatasetPurpose.eval_messages_answer,
            source=URIDataSource(
                uri="huggingface://datasets/llamastack/bfcl_v3?split=train",
            ),
        ),
+        DatasetInput(
+            dataset_id="ifeval",
+            purpose=DatasetPurpose.eval_messages_answer,
+            source=URIDataSource(
+                uri="huggingface://datasets/llamastack/IfEval?split=train",
+            ),
+        ),
+        DatasetInput(
+            dataset_id="docvqa",
+            purpose=DatasetPurpose.eval_messages_answer,
+            source=URIDataSource(
+                uri="huggingface://datasets/llamastack/docvqa?split=val",
+            ),
+        ),
    ]

    # TODO(xiyan): fix this back as registerable resources
@ -234,6 +243,16 @@ def get_distribution_template() -> DistributionTemplate:
    #         dataset_id="bfcl",
    #         grader_ids=["basic::bfcl"],
    #     ),
+    #     BenchmarkInput(
+    #         benchmark_id="meta-reference-ifeval",
+    #         dataset_id="ifeval",
+    #         grader_ids=["basic::ifeval"],
+    #     ),
+    #     BenchmarkInput(
+    #         benchmark_id="meta-reference-docvqa",
+    #         dataset_id="docvqa",
+    #         grader_ids=["basic::docvqa"],
+    #     ),
    # ]

    return DistributionTemplate(
@ -258,7 +277,7 @@ def get_distribution_template() -> DistributionTemplate:
        },
        run_config_env_vars={
            "LLAMA_STACK_PORT": (
-                "5001",
+                "8321",
                "Port for the Llama Stack distribution server",
            ),
            "TOGETHER_API_KEY": (
--- a/llama_stack/templates/open-benchmark/run.yaml
+++ b/llama_stack/templates/open-benchmark/run.yaml
@ -66,7 +66,6 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/open-benchmark/trace_store.db}
  datasetio:
@ -143,28 +142,24 @@ datasets:
    uri: huggingface://datasets/llamastack/simpleqa?split=train
  metadata: {}
  dataset_id: simpleqa
-  provider_id: huggingface
 - purpose: eval/messages-answer
  source:
    type: uri
    uri: huggingface://datasets/llamastack/mmlu_cot?split=test&name=all
  metadata: {}
  dataset_id: mmlu_cot
-  provider_id: huggingface
 - purpose: eval/messages-answer
  source:
    type: uri
    uri: huggingface://datasets/llamastack/gpqa_0shot_cot?split=test&name=gpqa_main
  metadata: {}
  dataset_id: gpqa_cot
-  provider_id: huggingface
 - purpose: eval/messages-answer
  source:
    type: uri
    uri: huggingface://datasets/llamastack/math_500?split=test
  metadata: {}
  dataset_id: math_500
-  provider_id: huggingface
 - purpose: eval/messages-answer
  source:
    type: uri