tasks registry

2025-12-09 11:20:58 +00:00 · 2024-10-07 15:57:39 -07:00 · 2024-10-07 15:57:39 -07:00 · 4764762dd4
commit 4764762dd4
parent 041634192a
9 changed files with 74 additions and 35 deletions
--- a/llama_stack/providers/impls/meta_reference/evals/evals.py
+++ b/llama_stack/providers/impls/meta_reference/evals/evals.py
@ -8,12 +8,15 @@ from llama_stack.apis.inference import *  # noqa: F403
 from llama_stack.apis.evals import *  # noqa: F403
 from termcolor import cprint

+from llama_stack.distribution.registry.tasks.task_registry import TaskRegistry
+
 from llama_stack.providers.impls.meta_reference.evals.datas.dataset_registry import (
    get_dataset,
 )
-from llama_stack.providers.impls.meta_reference.evals.tasks.task_registry import (
-    get_task,
-)
+
+# from llama_stack.providers.impls.meta_reference.evals.tasks.task_registry import (
+#     get_task,
+# )

 from .config import MetaReferenceEvalsImplConfig

@ -36,7 +39,8 @@ class MetaReferenceEvalsImpl(Evals):
    ) -> EvaluateResponse:
        cprint(f"model={model}, dataset={dataset}, task={task}", "red")
        dataset = get_dataset(dataset)
-        task_impl = get_task(task, dataset)
+        task_impl = TaskRegistry.get_task(task)(dataset)
+
        x1 = task_impl.preprocess()

        # TODO: replace w/ batch inference & async return eval job
--- a/llama_stack/providers/impls/meta_reference/evals/tasks/mmlu_task.py
+++ b/llama_stack/providers/impls/meta_reference/evals/tasks/mmlu_task.py
@ -5,8 +5,8 @@
 # the root directory of this source tree.
 import re

-from .task import BaseTask
 from llama_stack.apis.evals import *  # noqa: F403
+from llama_stack.distribution.registry.tasks.task import BaseTask

 QUERY_TEMPLATE_MULTICHOICE = """
 Answer the following multiple choice question and make the answer very simple. The last line of your response should be of the following format: 'Answer: $LETTER' (without quotes) where LETTER is one of ABCD.
--- a/llama_stack/providers/impls/meta_reference/evals/tasks/task.py
+++ b/llama_stack/providers/impls/meta_reference/evals/tasks/task.py
@ -1,48 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-from abc import ABC, abstractmethod
-
-
-class BaseTask(ABC):
-    """
-    Base class for all evaluation tasks. Each task needs to implement the following methods:
-    - F1: preprocess_sample(self)
-    - F2: postprocess_sample(self)
-    - F3: score_sample(self)
-    """
-
-    def __init__(self, dataset, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self._name = self.__class__.__name__
-        self.dataset = dataset
-
-    @abstractmethod
-    def preprocess_sample(self, sample):
-        raise NotImplementedError()
-
-    @abstractmethod
-    def postprocess_sample(self, sample):
-        raise NotImplementedError()
-
-    @abstractmethod
-    def score_sample(self, sample, ground_truth):
-        raise NotImplementedError()
-
-    @abstractmethod
-    def aggregate_results(self, eval_results):
-        raise NotImplementedError()
-
-    def preprocess(self):
-        return [self.preprocess_sample(sample) for sample in self.dataset]
-
-    def postprocess(self, generation):
-        return [self.postprocess_sample(sample) for sample in generation]
-
-    def score(self, postprocessed):
-        return [
-            self.score_sample(sample, ground_truth)
-            for sample, ground_truth in zip(postprocessed, self.dataset)
-        ]
--- a/llama_stack/providers/impls/meta_reference/evals/tasks/task_registry.py
+++ b/llama_stack/providers/impls/meta_reference/evals/tasks/task_registry.py
@ -1,16 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-from .mmlu_task import MMLUTask
-
-# TODO: make this into a config based registry
-TASKS_REGISTRY = {
-    "mmlu": MMLUTask,
-}
-
-
-def get_task(task_id: str, dataset):
-    task_impl = TASKS_REGISTRY[task_id]
-    return task_impl(dataset)