From 989f070bc07584eb2646603cf400496ee9224089 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Thu, 7 Nov 2024 21:35:02 -0800 Subject: [PATCH] move benchmark task def to file --- .../providers/inline/meta_reference/eval/eval.py | 11 ++--------- .../meta_reference/eval/eval_task_defs/__init__.py | 5 +++++ .../eval/eval_task_defs/meta_reference_mmlu.py | 13 +++++++++++++ 3 files changed, 20 insertions(+), 9 deletions(-) create mode 100644 llama_stack/providers/inline/meta_reference/eval/eval_task_defs/__init__.py create mode 100644 llama_stack/providers/inline/meta_reference/eval/eval_task_defs/meta_reference_mmlu.py diff --git a/llama_stack/providers/inline/meta_reference/eval/eval.py b/llama_stack/providers/inline/meta_reference/eval/eval.py index 05f34f2fa..c1602cedb 100644 --- a/llama_stack/providers/inline/meta_reference/eval/eval.py +++ b/llama_stack/providers/inline/meta_reference/eval/eval.py @@ -19,6 +19,7 @@ from llama_stack.apis.scoring import Scoring from llama_stack.providers.datatypes import EvalTasksProtocolPrivate from .config import MetaReferenceEvalConfig +from .eval_task_defs.meta_reference_mmlu import meta_reference_mmlu class ColumnName(Enum): @@ -51,15 +52,7 @@ class MetaReferenceEvalImpl(Eval, EvalTasksProtocolPrivate): async def initialize(self) -> None: # pre-register eval tasks - benchmark_tasks = [ - EvalTaskDef( - identifier="meta-reference-mmlu", - dataset_id="llamastack_mmlu_loose", - scoring_functions=[ - "meta-reference::regex_parser_multiple_choice_answer" - ], - ) - ] + benchmark_tasks = [meta_reference_mmlu] self.eval_tasks = {x.identifier: x for x in benchmark_tasks} async def shutdown(self) -> None: ... diff --git a/llama_stack/providers/inline/meta_reference/eval/eval_task_defs/__init__.py b/llama_stack/providers/inline/meta_reference/eval/eval_task_defs/__init__.py new file mode 100644 index 000000000..756f351d8 --- /dev/null +++ b/llama_stack/providers/inline/meta_reference/eval/eval_task_defs/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. diff --git a/llama_stack/providers/inline/meta_reference/eval/eval_task_defs/meta_reference_mmlu.py b/llama_stack/providers/inline/meta_reference/eval/eval_task_defs/meta_reference_mmlu.py new file mode 100644 index 000000000..c14ce439c --- /dev/null +++ b/llama_stack/providers/inline/meta_reference/eval/eval_task_defs/meta_reference_mmlu.py @@ -0,0 +1,13 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack.apis.eval import EvalTaskDef + +meta_reference_mmlu = EvalTaskDef( + identifier="meta-reference-mmlu", + dataset_id="llamastack_mmlu_loose", + scoring_functions=["meta-reference::regex_parser_multiple_choice_answer"], +)