move folder

2025-12-09 11:20:58 +00:00 · 2024-10-05 11:57:21 -07:00 · 2024-10-05 11:57:21 -07:00 · 041634192a
commit 041634192a
parent 6234dd97d5
9 changed files with 18 additions and 6 deletions
--- a/llama_stack/providers/impls/meta_reference/evals/tasks/mmlu_task.py
+++ b/llama_stack/providers/impls/meta_reference/evals/tasks/mmlu_task.py
@ -146,5 +146,5 @@ class MMLUTask(BaseTask):

    def aggregate_results(self, eval_results):
        return EvaluateResponse(
-            metrics={"score": sum(eval_results) / len(eval_results)}
+            metrics={"score": str(sum(eval_results) / len(eval_results))}
        )
--- a/llama_stack/providers/impls/third_party/evals/init.py
+++ b/llama_stack/providers/impls/third_party/evals/init.py
@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
--- a/llama_stack/providers/impls/third_party/evals/eleuther/init.py
+++ b/llama_stack/providers/impls/third_party/evals/eleuther/init.py
@ -0,0 +1,19 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .config import EleutherEvalsImplConfig  # noqa
+from llama_stack.apis.inference import *  # noqa: F403
+from llama_stack.distribution.datatypes import Api, ProviderSpec
+
+
+async def get_provider_impl(
+    config: EleutherEvalsImplConfig, deps: Dict[Api, ProviderSpec]
+):
+    from .eleuther import EleutherEvalsAdapter
+
+    impl = EleutherEvalsAdapter(config, deps[Api.inference])
+    await impl.initialize()
+    return impl
--- a/llama_stack/providers/impls/third_party/evals/eleuther/config.py
+++ b/llama_stack/providers/impls/third_party/evals/eleuther/config.py
@ -0,0 +1,10 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from pydantic import BaseModel
+
+
+class EleutherEvalsImplConfig(BaseModel): ...
--- a/llama_stack/providers/impls/third_party/evals/eleuther/eleuther.py
+++ b/llama_stack/providers/impls/third_party/evals/eleuther/eleuther.py
@ -0,0 +1,120 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.apis.inference import *  # noqa: F403
+from llama_stack.apis.evals import *  # noqa: F403
+import random
+
+import lm_eval
+from lm_eval.api.model import LM
+from lm_eval.evaluator import evaluate, get_task_list
+from lm_eval.tasks import get_task_dict, TaskManager
+
+from .config import EleutherEvalsImplConfig  # noqa
+
+
+class EleutherEvalsWrapper(LM):
+    def __init__(
+        self,
+        inference_api: Inference,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self.inference_api = inference_api
+        self.tokenizer = None
+        self.tokenized_requests = False
+        self.kwargs = kwargs
+
+    @property
+    def eot_token_id(self):
+        raise NotImplementedError("Not implemented")
+
+    @property
+    def max_length(self) -> int:
+        return NotImplementedError("Not implemented")
+
+    @property
+    def max_gen_toks(self) -> int:
+        return NotImplementedError("Not implemented")
+
+    @property
+    def batch_size(self):
+        # Isn't used because we override _loglikelihood_tokens
+        raise NotImplementedError("No support for logits.")
+
+    @property
+    def device(self):
+        # Isn't used because we override _loglikelihood_tokens
+        raise NotImplementedError("No support for logits.")
+
+    @property
+    def world_size(self):
+        return 1
+
+    def tok_encode(self, string: str) -> List[int]:
+        return NotImplementedError("Not implemented")
+
+    def tok_decode(self, tokens: List[int]) -> str:
+        return NotImplementedError("Not implemented")
+
+    def _loglikelihood_tokens(self, requests, disable_tqdm: bool = False):
+        raise NotImplementedError("No support for logits.")
+
+    def _model_call(self, inps):
+        # Isn't used because we override _loglikelihood_tokens
+        raise NotImplementedError()
+
+    def _model_generate(self, context, max_length, eos_token_id):
+        # Isn't used because we override generate_until
+        raise NotImplementedError()
+
+    def loglikelihood(self, requests, disable_tqdm: bool = False):
+        # TODO: implement inference completion with loglikelihood
+        res = []
+        for req in requests:
+            res.append((-random.random(), False))
+
+        return res
+
+    def loglikelihood_rolling(self, requests, disable_tqdm: bool = False):
+        raise NotImplementedError("No support for logits.")
+
+    def generate_until(self, requests, disable_tqdm: bool = False) -> List[str]:
+        return NotImplementedError("Not implemented")
+
+
+class EleutherEvalsAdapter(Evals):
+    def __init__(self, config: EleutherEvalsImplConfig, inference_api: Inference):
+        self.inference_api = inference_api
+        self.eluther_wrapper = EleutherEvalsWrapper(inference_api)
+
+    async def initialize(self) -> None:
+        pass
+
+    async def shutdown(self) -> None:
+        pass
+
+    async def run_evals(
+        self,
+        model: str,
+        dataset: str,
+        task: str,
+    ) -> EvaluateResponse:
+        task_manager = TaskManager()
+        task_dict = get_task_dict(task, task_manager)
+        task_types = set([t.task.OUTPUT_TYPE for t in get_task_list(task_dict)])
+
+        output = evaluate(
+            self.eluther_wrapper,
+            task_dict,
+            limit=2,
+        )
+        formatted_output = lm_eval.utils.make_table(output)
+        return EvaluateResponse(
+            metrics={
+                "metrics_table": formatted_output,
+            },
+        )