diff --git a/llama_stack/providers/adapters/datasetio/huggingface/benchmarks/__init__.py b/llama_stack/providers/adapters/datasetio/huggingface/benchmarks/__init__.py index c608f6fff..30ab690a4 100644 --- a/llama_stack/providers/adapters/datasetio/huggingface/benchmarks/__init__.py +++ b/llama_stack/providers/adapters/datasetio/huggingface/benchmarks/__init__.py @@ -3,4 +3,4 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .llamastack_mmlu import llamastack_mmlu # noqa: F401 +from .mmlu import mmlu # noqa: F401 diff --git a/llama_stack/providers/adapters/datasetio/huggingface/benchmarks/llamastack_mmlu.py b/llama_stack/providers/adapters/datasetio/huggingface/benchmarks/mmlu.py similarity index 67% rename from llama_stack/providers/adapters/datasetio/huggingface/benchmarks/llamastack_mmlu.py rename to llama_stack/providers/adapters/datasetio/huggingface/benchmarks/mmlu.py index 36464c63f..dbd14df31 100644 --- a/llama_stack/providers/adapters/datasetio/huggingface/benchmarks/llamastack_mmlu.py +++ b/llama_stack/providers/adapters/datasetio/huggingface/benchmarks/mmlu.py @@ -8,19 +8,17 @@ from llama_models.llama3.api.datatypes import URL from llama_stack.apis.common.type_system import ChatCompletionInputType, StringType from llama_stack.apis.datasetio import DatasetDef -llamastack_mmlu = DatasetDef( - identifier="llamastack_mmlu", - url=URL( - uri="https://huggingface.co/datasets/llamastack/Llama-3.2-1B-Instruct-evals" - ), +mmlu = DatasetDef( + identifier="mmlu", + url=URL(uri="https://huggingface.co/datasets/llamastack/evals"), dataset_schema={ "input_query": StringType(), "expected_answer": StringType(), "chat_completion_input": ChatCompletionInputType(), }, metadata={ - "path": "llamastack/Llama-3.2-1B-Instruct-evals", - "name": "Llama-3.2-1B-Instruct-evals__mmlu__details", + "path": "llamastack/evals", + "name": "evals__mmlu__details", "split": "train", }, ) diff --git a/llama_stack/providers/adapters/datasetio/huggingface/huggingface.py b/llama_stack/providers/adapters/datasetio/huggingface/huggingface.py index fadd54209..02a3be8fb 100644 --- a/llama_stack/providers/adapters/datasetio/huggingface/huggingface.py +++ b/llama_stack/providers/adapters/datasetio/huggingface/huggingface.py @@ -12,7 +12,7 @@ import datasets as hf_datasets from llama_stack.providers.datatypes import DatasetsProtocolPrivate from llama_stack.providers.utils.datasetio.url_utils import get_dataframe_from_url -from .benchmarks import llamastack_mmlu +from .benchmarks import mmlu from .config import HuggingfaceDatasetIOConfig @@ -38,7 +38,7 @@ class HuggingfaceDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate): async def initialize(self) -> None: # pre-registered benchmark datasets - pre_registered_datasets = [llamastack_mmlu] + pre_registered_datasets = [mmlu] self.dataset_infos = {x.identifier: x for x in pre_registered_datasets} async def shutdown(self) -> None: ... diff --git a/llama_stack/providers/inline/meta_reference/eval/eval_task_defs/meta_reference_mmlu.py b/llama_stack/providers/inline/meta_reference/eval/eval_task_defs/meta_reference_mmlu.py index 938055316..f8fc284a9 100644 --- a/llama_stack/providers/inline/meta_reference/eval/eval_task_defs/meta_reference_mmlu.py +++ b/llama_stack/providers/inline/meta_reference/eval/eval_task_defs/meta_reference_mmlu.py @@ -8,6 +8,6 @@ from llama_stack.apis.eval import EvalTaskDef meta_reference_mmlu = EvalTaskDef( identifier="meta-reference-mmlu", - dataset_id="llamastack_mmlu", + dataset_id="mmlu", scoring_functions=["meta-reference::regex_parser_multiple_choice_answer"], )