register to client

2025-07-31 16:01:46 +00:00 · 2024-11-11 11:03:01 -05:00 · 2024-11-11 11:03:01 -05:00 · 8bebe3fd1f
commit 8bebe3fd1f
parent 75ccc05296
4 changed files with 67 additions and 24 deletions
--- a/llama_stack/providers/adapters/datasetio/huggingface/benchmarks/init.py
+++ b/llama_stack/providers/adapters/datasetio/huggingface/benchmarks/init.py
@ -3,4 +3,10 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from .mmlu import mmlu  # noqa: F401
+
+# # Copyright (c) Meta Platforms, Inc. and affiliates.
+# # All rights reserved.
+# #
+# # This source code is licensed under the terms described in the LICENSE file in
+# # the root directory of this source tree.
+# from .mmlu import mmlu  # noqa: F401
--- a/llama_stack/providers/adapters/datasetio/huggingface/benchmarks/mmlu.py
+++ b/llama_stack/providers/adapters/datasetio/huggingface/benchmarks/mmlu.py
@ -4,21 +4,27 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-from llama_models.llama3.api.datatypes import URL
-from llama_stack.apis.common.type_system import ChatCompletionInputType, StringType
-from llama_stack.apis.datasetio import DatasetDef
+# # Copyright (c) Meta Platforms, Inc. and affiliates.
+# # All rights reserved.
+# #
+# # This source code is licensed under the terms described in the LICENSE file in
+# # the root directory of this source tree.

-mmlu = DatasetDef(
-    identifier="mmlu",
-    url=URL(uri="https://huggingface.co/datasets/llamastack/evals"),
-    dataset_schema={
-        "input_query": StringType(),
-        "expected_answer": StringType(),
-        "chat_completion_input": ChatCompletionInputType(),
-    },
-    metadata={
-        "path": "llamastack/evals",
-        "name": "evals__mmlu__details",
-        "split": "train",
-    },
-)
+# from llama_models.llama3.api.datatypes import URL
+# from llama_stack.apis.common.type_system import ChatCompletionInputType, StringType
+# from llama_stack.apis.datasetio import DatasetDef
+
+# mmlu = DatasetDef(
+#     identifier="mmlu",
+#     url=URL(uri="https://huggingface.co/datasets/llamastack/evals"),
+#     dataset_schema={
+#         "input_query": StringType(),
+#         "expected_answer": StringType(),
+#         "chat_completion_input": ChatCompletionInputType(),
+#     },
+#     metadata={
+#         "path": "llamastack/evals",
+#         "name": "evals__mmlu__details",
+#         "split": "train",
+#     },
+# )
--- a/llama_stack/providers/adapters/datasetio/huggingface/huggingface.py
+++ b/llama_stack/providers/adapters/datasetio/huggingface/huggingface.py
@ -12,8 +12,6 @@ import datasets as hf_datasets
 from llama_stack.providers.datatypes import DatasetsProtocolPrivate
 from llama_stack.providers.utils.datasetio.url_utils import get_dataframe_from_url

-from .benchmarks import mmlu
-
 from .config import HuggingfaceDatasetIOConfig


@ -37,9 +35,10 @@ class HuggingfaceDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate):
        self.dataset_infos = {}

    async def initialize(self) -> None:
+        pass
        # pre-registered benchmark datasets
-        pre_registered_datasets = [mmlu]
-        self.dataset_infos = {x.identifier: x for x in pre_registered_datasets}
+        # pre_registered_datasets = [mmlu]
+        # self.dataset_infos = {x.identifier: x for x in pre_registered_datasets}

    async def shutdown(self) -> None: ...

--- a/llama_stack/providers/tests/eval/test_eval.py
+++ b/llama_stack/providers/tests/eval/test_eval.py
@ -7,7 +7,11 @@

 import pytest

-from llama_models.llama3.api import SamplingParams
+from llama_models.llama3.api import SamplingParams, URL
+
+from llama_stack.apis.common.type_system import ChatCompletionInputType, StringType
+
+from llama_stack.apis.datasetio.datasetio import DatasetDefWithProvider

 from llama_stack.apis.eval.eval import (
    AppEvalTaskConfig,
@ -153,8 +157,36 @@ class Testeval:
        assert len(response) > 0
        if response[0].provider_id != "huggingface":
            pytest.skip(
-                "Only huggingface provider supports pre-registered benchmarks datasets"
+                "Only huggingface provider supports pre-registered remote datasets"
            )
+        # register dataset
+        mmlu = DatasetDefWithProvider(
+            identifier="mmlu",
+            url=URL(uri="https://huggingface.co/datasets/llamastack/evals"),
+            dataset_schema={
+                "input_query": StringType(),
+                "expected_answer": StringType(),
+                "chat_completion_input": ChatCompletionInputType(),
+            },
+            metadata={
+                "path": "llamastack/evals",
+                "name": "evals__mmlu__details",
+                "split": "train",
+            },
+            provider_id="",
+        )
+
+        await datasets_impl.register_dataset(mmlu)
+
+        # register eval task
+        meta_reference_mmlu = EvalTaskDefWithProvider(
+            identifier="meta-reference-mmlu",
+            dataset_id="mmlu",
+            scoring_functions=["meta-reference::regex_parser_multiple_choice_answer"],
+            provider_id="",
+        )
+
+        await eval_tasks_impl.register_eval_task(meta_reference_mmlu)

        # list benchmarks
        response = await eval_tasks_impl.list_eval_tasks()