wip add datatypes

This commit is contained in:
Xi Yan 2024-10-10 19:56:19 -07:00
parent 99ed1425fc
commit 9816c9aae6
5 changed files with 175 additions and 57 deletions

View file

@ -5,19 +5,25 @@
# the root directory of this source tree.
# TODO: make these import config based
# from .dataset import CustomDataset, HFDataset
# from .dataset_registry import DatasetRegistry
from llama_stack.apis.dataset import * # noqa: F403
from .dataset import CustomDataset, HuggingfaceDataset
from .dataset_registry import DatasetRegistry
# DATASETS_REGISTRY = {
# "mmlu-simple-eval-en": CustomDataset(
# name="mmlu_eval",
# url="https://openaipublic.blob.core.windows.net/simple-evals/mmlu.csv",
# ),
# "hellaswag": HFDataset(
# name="hellaswag",
# url="hf://hellaswag?split=validation&trust_remote_code=True",
# ),
# }
DATASETS_REGISTRY = [
CustomDataset(
config=CustomDatasetDef(
identifier="mmlu-simple-eval-en",
url="https://openaipublic.blob.core.windows.net/simple-evals/mmlu.csv",
)
),
HuggingfaceDataset(
config=HuggingfaceDatasetDef(
identifier="hellaswag",
dataset_name="hellaswag",
kwargs={"split": "validation", "trust_remote_code": True},
)
),
]
# for k, v in DATASETS_REGISTRY.items():
# DatasetRegistry.register(k, v)
for d in DATASETS_REGISTRY:
DatasetRegistry.register(d.dataset_id, d)