add dataset datatypes

This commit is contained in:
Xi Yan 2024-10-10 17:19:18 -07:00
parent c8de439d9f
commit 99ed1425fc
5 changed files with 155 additions and 67 deletions

View file

@ -5,19 +5,19 @@
# the root directory of this source tree.
# TODO: make these import config based
from .dataset import CustomDataset, HFDataset
from .dataset_registry import DatasetRegistry
# from .dataset import CustomDataset, HFDataset
# from .dataset_registry import DatasetRegistry
DATASETS_REGISTRY = {
"mmlu-simple-eval-en": CustomDataset(
name="mmlu_eval",
url="https://openaipublic.blob.core.windows.net/simple-evals/mmlu.csv",
),
"hellaswag": HFDataset(
name="hellaswag",
url="hf://hellaswag?split=validation&trust_remote_code=True",
),
}
# DATASETS_REGISTRY = {
# "mmlu-simple-eval-en": CustomDataset(
# name="mmlu_eval",
# url="https://openaipublic.blob.core.windows.net/simple-evals/mmlu.csv",
# ),
# "hellaswag": HFDataset(
# name="hellaswag",
# url="hf://hellaswag?split=validation&trust_remote_code=True",
# ),
# }
for k, v in DATASETS_REGISTRY.items():
DatasetRegistry.register(k, v)
# for k, v in DATASETS_REGISTRY.items():
# DatasetRegistry.register(k, v)