feat(api): (1/n) datasets api clean up (#1573)

## PR Stack - https://github.com/meta-llama/llama-stack/pull/1573 - https://github.com/meta-llama/llama-stack/pull/1625 - https://github.com/meta-llama/llama-stack/pull/1656 - https://github.com/meta-llama/llama-stack/pull/1657 - https://github.com/meta-llama/llama-stack/pull/1658 - https://github.com/meta-llama/llama-stack/pull/1659 - https://github.com/meta-llama/llama-stack/pull/1660 **Client SDK** - https://github.com/meta-llama/llama-stack-client-python/pull/203 **CI** - 1391130488 <img width="1042" alt="image" src="https://github.com/user-attachments/assets/69636067-376d-436b-9204-896e2dd490ca" /> -- the test_rag_agent_with_attachments is flaky and not related to this PR ## Doc <img width="789" alt="image" src="https://github.com/user-attachments/assets/b88390f3-73d6-4483-b09a-a192064e32d9" /> ## Client Usage ```python client.datasets.register( source={ "type": "uri", "uri": "lsfs://mydata.jsonl", }, schema="jsonl_messages", # optional dataset_id="my_first_train_data" ) # quick prototype debugging client.datasets.register( data_reference={ "type": "rows", "rows": [ "messages": [...], ], }, schema="jsonl_messages", ) ``` ## Test Plan - CI: 1387805545 ``` LLAMA_STACK_CONFIG=fireworks pytest -v tests/integration/datasets/test_datasets.py ``` ``` LLAMA_STACK_CONFIG=fireworks pytest -v tests/integration/scoring/test_scoring.py ``` ``` pytest -v -s --nbval-lax ./docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb ```
2025-12-04 18:13:44 +00:00 · 2025-03-17 16:55:45 -07:00 · 2025-03-17 16:55:45 -07:00 · 5287b437ae
commit 5287b437ae
parent 3b35a39b8b
29 changed files with 2593 additions and 2296 deletions
--- a/llama_stack/templates/open-benchmark/open_benchmark.py
+++ b/llama_stack/templates/open-benchmark/open_benchmark.py
@ -6,7 +6,7 @@

 from typing import Dict, List, Tuple

-from llama_stack.apis.common.content_types import URL
+from llama_stack.apis.datasets import DatasetPurpose, URIDataSource
 from llama_stack.apis.models.models import ModelType
 from llama_stack.distribution.datatypes import (
    BenchmarkInput,
@ -171,76 +171,42 @@ def get_distribution_template() -> DistributionTemplate:
        DatasetInput(
            dataset_id="simpleqa",
            provider_id="huggingface",
-            url=URL(uri="https://huggingface.co/datasets/llamastack/simpleqa"),
-            metadata={
-                "path": "llamastack/simpleqa",
-                "split": "train",
-            },
-            dataset_schema={
-                "input_query": {"type": "string"},
-                "expected_answer": {"type": "string"},
-                "chat_completion_input": {"type": "string"},
-            },
+            purpose=DatasetPurpose.eval_messages_answer,
+            source=URIDataSource(
+                uri="huggingface://datasets/llamastack/simpleqa?split=train",
+            ),
        ),
        DatasetInput(
            dataset_id="mmlu_cot",
            provider_id="huggingface",
-            url=URL(uri="https://huggingface.co/datasets/llamastack/mmlu_cot"),
-            metadata={
-                "path": "llamastack/mmlu_cot",
-                "name": "all",
-                "split": "test",
-            },
-            dataset_schema={
-                "input_query": {"type": "string"},
-                "expected_answer": {"type": "string"},
-                "chat_completion_input": {"type": "string"},
-            },
+            purpose=DatasetPurpose.eval_messages_answer,
+            source=URIDataSource(
+                uri="huggingface://datasets/llamastack/mmlu_cot?split=test&name=all",
+            ),
        ),
        DatasetInput(
            dataset_id="gpqa_cot",
            provider_id="huggingface",
-            url=URL(uri="https://huggingface.co/datasets/llamastack/gpqa_0shot_cot"),
-            metadata={
-                "path": "llamastack/gpqa_0shot_cot",
-                "name": "gpqa_main",
-                "split": "train",
-            },
-            dataset_schema={
-                "input_query": {"type": "string"},
-                "expected_answer": {"type": "string"},
-                "chat_completion_input": {"type": "string"},
-            },
+            purpose=DatasetPurpose.eval_messages_answer,
+            source=URIDataSource(
+                uri="huggingface://datasets/llamastack/gpqa_0shot_cot?split=test&name=gpqa_main",
+            ),
        ),
        DatasetInput(
            dataset_id="math_500",
            provider_id="huggingface",
-            url=URL(uri="https://huggingface.co/datasets/llamastack/math_500"),
-            metadata={
-                "path": "llamastack/math_500",
-                "split": "test",
-            },
-            dataset_schema={
-                "input_query": {"type": "string"},
-                "expected_answer": {"type": "string"},
-                "chat_completion_input": {"type": "string"},
-            },
+            purpose=DatasetPurpose.eval_messages_answer,
+            source=URIDataSource(
+                uri="huggingface://datasets/llamastack/math_500?split=test",
+            ),
        ),
        DatasetInput(
            dataset_id="bfcl",
            provider_id="huggingface",
-            url=URL(uri="https://huggingface.co/datasets/llamastack/bfcl_v3"),
-            metadata={
-                "path": "llamastack/bfcl_v3",
-                "split": "train",
-            },
-            dataset_schema={
-                "function": {"type": "string"},
-                "language": {"type": "string"},
-                "ground_truth": {"type": "string"},
-                "id": {"type": "string"},
-                "chat_completion_input": {"type": "string"},
-            },
+            purpose=DatasetPurpose.eval_messages_answer,
+            source=URIDataSource(
+                uri="huggingface://datasets/llamastack/bfcl_v3?split=train",
+            ),
        ),
    ]