diff --git a/llama_stack/providers/adapters/datasetio/huggingface/benchmarks/__init__.py b/llama_stack/providers/adapters/datasetio/huggingface/benchmarks/__init__.py deleted file mode 100644 index d7b596a39..000000000 --- a/llama_stack/providers/adapters/datasetio/huggingface/benchmarks/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -# # Copyright (c) Meta Platforms, Inc. and affiliates. -# # All rights reserved. -# # -# # This source code is licensed under the terms described in the LICENSE file in -# # the root directory of this source tree. -# from .mmlu import mmlu # noqa: F401 diff --git a/llama_stack/providers/adapters/datasetio/huggingface/benchmarks/mmlu.py b/llama_stack/providers/adapters/datasetio/huggingface/benchmarks/mmlu.py deleted file mode 100644 index 671b4de1c..000000000 --- a/llama_stack/providers/adapters/datasetio/huggingface/benchmarks/mmlu.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -# # Copyright (c) Meta Platforms, Inc. and affiliates. -# # All rights reserved. -# # -# # This source code is licensed under the terms described in the LICENSE file in -# # the root directory of this source tree. - -# from llama_models.llama3.api.datatypes import URL -# from llama_stack.apis.common.type_system import ChatCompletionInputType, StringType -# from llama_stack.apis.datasetio import DatasetDef - -# mmlu = DatasetDef( -# identifier="mmlu", -# url=URL(uri="https://huggingface.co/datasets/llamastack/evals"), -# dataset_schema={ -# "input_query": StringType(), -# "expected_answer": StringType(), -# "chat_completion_input": ChatCompletionInputType(), -# }, -# metadata={ -# "path": "llamastack/evals", -# "name": "evals__mmlu__details", -# "split": "train", -# }, -# ) diff --git a/llama_stack/providers/inline/meta_reference/eval/eval.py b/llama_stack/providers/inline/meta_reference/eval/eval.py index c1602cedb..d13e10d7c 100644 --- a/llama_stack/providers/inline/meta_reference/eval/eval.py +++ b/llama_stack/providers/inline/meta_reference/eval/eval.py @@ -19,7 +19,6 @@ from llama_stack.apis.scoring import Scoring from llama_stack.providers.datatypes import EvalTasksProtocolPrivate from .config import MetaReferenceEvalConfig -from .eval_task_defs.meta_reference_mmlu import meta_reference_mmlu class ColumnName(Enum): @@ -51,9 +50,10 @@ class MetaReferenceEvalImpl(Eval, EvalTasksProtocolPrivate): self.eval_tasks = {} async def initialize(self) -> None: - # pre-register eval tasks - benchmark_tasks = [meta_reference_mmlu] - self.eval_tasks = {x.identifier: x for x in benchmark_tasks} + pass + # # pre-register eval tasks + # benchmark_tasks = [meta_reference_mmlu] + # self.eval_tasks = {x.identifier: x for x in benchmark_tasks} async def shutdown(self) -> None: ... diff --git a/llama_stack/providers/inline/meta_reference/eval/eval_task_defs/__init__.py b/llama_stack/providers/inline/meta_reference/eval/eval_task_defs/__init__.py deleted file mode 100644 index 756f351d8..000000000 --- a/llama_stack/providers/inline/meta_reference/eval/eval_task_defs/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. diff --git a/llama_stack/providers/inline/meta_reference/eval/eval_task_defs/meta_reference_mmlu.py b/llama_stack/providers/inline/meta_reference/eval/eval_task_defs/meta_reference_mmlu.py deleted file mode 100644 index f8fc284a9..000000000 --- a/llama_stack/providers/inline/meta_reference/eval/eval_task_defs/meta_reference_mmlu.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from llama_stack.apis.eval import EvalTaskDef - -meta_reference_mmlu = EvalTaskDef( - identifier="meta-reference-mmlu", - dataset_id="mmlu", - scoring_functions=["meta-reference::regex_parser_multiple_choice_answer"], -)