diff --git a/llama_stack/providers/adapters/datasetio/huggingface/benchmarks/__init__.py b/llama_stack/providers/adapters/datasetio/huggingface/benchmarks/__init__.py
deleted file mode 100644
index d7b596a39..000000000
--- a/llama_stack/providers/adapters/datasetio/huggingface/benchmarks/__init__.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# # Copyright (c) Meta Platforms, Inc. and affiliates.
-# # All rights reserved.
-# #
-# # This source code is licensed under the terms described in the LICENSE file in
-# # the root directory of this source tree.
-# from .mmlu import mmlu  # noqa: F401
diff --git a/llama_stack/providers/adapters/datasetio/huggingface/benchmarks/mmlu.py b/llama_stack/providers/adapters/datasetio/huggingface/benchmarks/mmlu.py
deleted file mode 100644
index 671b4de1c..000000000
--- a/llama_stack/providers/adapters/datasetio/huggingface/benchmarks/mmlu.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# # Copyright (c) Meta Platforms, Inc. and affiliates.
-# # All rights reserved.
-# #
-# # This source code is licensed under the terms described in the LICENSE file in
-# # the root directory of this source tree.
-
-# from llama_models.llama3.api.datatypes import URL
-# from llama_stack.apis.common.type_system import ChatCompletionInputType, StringType
-# from llama_stack.apis.datasetio import DatasetDef
-
-# mmlu = DatasetDef(
-#     identifier="mmlu",
-#     url=URL(uri="https://huggingface.co/datasets/llamastack/evals"),
-#     dataset_schema={
-#         "input_query": StringType(),
-#         "expected_answer": StringType(),
-#         "chat_completion_input": ChatCompletionInputType(),
-#     },
-#     metadata={
-#         "path": "llamastack/evals",
-#         "name": "evals__mmlu__details",
-#         "split": "train",
-#     },
-# )
diff --git a/llama_stack/providers/inline/meta_reference/eval/eval.py b/llama_stack/providers/inline/meta_reference/eval/eval.py
index c1602cedb..d13e10d7c 100644
--- a/llama_stack/providers/inline/meta_reference/eval/eval.py
+++ b/llama_stack/providers/inline/meta_reference/eval/eval.py
@@ -19,7 +19,6 @@ from llama_stack.apis.scoring import Scoring
 from llama_stack.providers.datatypes import EvalTasksProtocolPrivate
 
 from .config import MetaReferenceEvalConfig
-from .eval_task_defs.meta_reference_mmlu import meta_reference_mmlu
 
 
 class ColumnName(Enum):
@@ -51,9 +50,10 @@ class MetaReferenceEvalImpl(Eval, EvalTasksProtocolPrivate):
         self.eval_tasks = {}
 
     async def initialize(self) -> None:
-        # pre-register eval tasks
-        benchmark_tasks = [meta_reference_mmlu]
-        self.eval_tasks = {x.identifier: x for x in benchmark_tasks}
+        pass
+        # # pre-register eval tasks
+        # benchmark_tasks = [meta_reference_mmlu]
+        # self.eval_tasks = {x.identifier: x for x in benchmark_tasks}
 
     async def shutdown(self) -> None: ...
 
diff --git a/llama_stack/providers/inline/meta_reference/eval/eval_task_defs/__init__.py b/llama_stack/providers/inline/meta_reference/eval/eval_task_defs/__init__.py
deleted file mode 100644
index 756f351d8..000000000
--- a/llama_stack/providers/inline/meta_reference/eval/eval_task_defs/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
diff --git a/llama_stack/providers/inline/meta_reference/eval/eval_task_defs/meta_reference_mmlu.py b/llama_stack/providers/inline/meta_reference/eval/eval_task_defs/meta_reference_mmlu.py
deleted file mode 100644
index f8fc284a9..000000000
--- a/llama_stack/providers/inline/meta_reference/eval/eval_task_defs/meta_reference_mmlu.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from llama_stack.apis.eval import EvalTaskDef
-
-meta_reference_mmlu = EvalTaskDef(
-    identifier="meta-reference-mmlu",
-    dataset_id="mmlu",
-    scoring_functions=["meta-reference::regex_parser_multiple_choice_answer"],
-)