generator + scorer Api for MMLU

2025-12-08 19:10:56 +00:00 · 2024-10-13 23:27:02 -07:00 · 2024-10-13 23:27:02 -07:00 · a25aff290e
commit a25aff290e
parent fb565dfb06
14 changed files with 618 additions and 131 deletions
--- a/llama_stack/distribution/registry/datasets/dataset.py
+++ b/llama_stack/distribution/registry/datasets/dataset.py
@ -25,23 +25,27 @@ class CustomDataset(BaseDataset[DictSample]):
            self.load()
        return (DictSample(data=x) for x in self.dataset)

-    def __str__(self):
+    def __str__(self) -> str:
        return f"CustomDataset({self.config})"

-    def __len__(self):
+    def __len__(self) -> int:
        if not self.dataset:
            self.load()
        return len(self.dataset)

-    def load(self):
+    def load(self, n_samples: Optional[int] = None) -> None:
        if self.dataset:
            return
+
        # TODO: better support w/ data url
        if self.config.url.endswith(".csv"):
            df = pandas.read_csv(self.config.url)
        elif self.config.url.endswith(".xlsx"):
            df = pandas.read_excel(self.config.url)

+        if n_samples is not None:
+            df = df.sample(n=n_samples)
+
        self.dataset = Dataset.from_pandas(df)


--- a/llama_stack/distribution/registry/scorers/init.py
+++ b/llama_stack/distribution/registry/scorers/init.py
@ -0,0 +1,6 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+# TODO: make these import config based
--- a/llama_stack/distribution/registry/scorers/scorer_registry.py
+++ b/llama_stack/distribution/registry/scorers/scorer_registry.py
@ -0,0 +1,32 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from typing import AbstractSet, Dict
+
+from llama_stack.apis.evals import BaseScorer
+
+
+class ScorerRegistry:
+    _REGISTRY: Dict[str, BaseScorer] = {}
+
+    @staticmethod
+    def names() -> AbstractSet[str]:
+        return ScorerRegistry._REGISTRY.keys()
+
+    @staticmethod
+    def register(name: str, scorer: BaseScorer) -> None:
+        if name in ScorerRegistry._REGISTRY:
+            raise ValueError(f"Task {name} already exists.")
+        ScorerRegistry._REGISTRY[name] = task
+
+    @staticmethod
+    def get_scorer(name: str) -> BaseScorer:
+        if name not in ScorerRegistry._REGISTRY:
+            raise ValueError(f"Task {name} not found.")
+        return ScorerRegistry._REGISTRY[name]
+
+    @staticmethod
+    def reset() -> None:
+        ScorerRegistry._REGISTRY = {}
--- a/llama_stack/distribution/registry/tasks/init.py
+++ b/llama_stack/distribution/registry/tasks/init.py
@ -3,11 +3,3 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-# TODO: make these import config based
-from llama_stack.providers.impls.meta_reference.evals.tasks.mmlu_task import MMLUTask
-from .task_registry import TaskRegistry
-
-TaskRegistry.register(
-    "mmlu",
-    MMLUTask,
-)