diff --git a/llama_stack/providers/inline/eval/meta_reference/config.py b/llama_stack/providers/inline/eval/meta_reference/config.py index 1892da2a2..8538d32ad 100644 --- a/llama_stack/providers/inline/eval/meta_reference/config.py +++ b/llama_stack/providers/inline/eval/meta_reference/config.py @@ -3,7 +3,15 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.eval import * # noqa: F401, F403 +from llama_stack.distribution.utils.config_dirs import RUNTIME_BASE_DIR +from llama_stack.providers.utils.kvstore.config import ( + KVStoreConfig, + SqliteKVStoreConfig, +) +from pydantic import BaseModel -class MetaReferenceEvalConfig(BaseModel): ... +class MetaReferenceEvalConfig(BaseModel): + kvstore: KVStoreConfig = SqliteKVStoreConfig( + db_path=(RUNTIME_BASE_DIR / "meta_reference_eval.db").as_posix() + ) # Uses SQLite config specific to Meta Reference Eval storage diff --git a/llama_stack/providers/inline/eval/meta_reference/eval.py b/llama_stack/providers/inline/eval/meta_reference/eval.py index 35df90788..aa22ad31b 100644 --- a/llama_stack/providers/inline/eval/meta_reference/eval.py +++ b/llama_stack/providers/inline/eval/meta_reference/eval.py @@ -15,10 +15,13 @@ from llama_stack.apis.eval_tasks import EvalTask from llama_stack.apis.inference import Inference from llama_stack.apis.scoring import Scoring from llama_stack.providers.datatypes import EvalTasksProtocolPrivate +from llama_stack.providers.utils.kvstore import kvstore_impl from tqdm import tqdm from .config import MetaReferenceEvalConfig +EVAL_TASKS_PREFIX = "eval_tasks:" + class ColumnName(Enum): input_query = "input_query" @@ -49,11 +52,25 @@ class MetaReferenceEvalImpl(Eval, EvalTasksProtocolPrivate): self.eval_tasks = {} async def initialize(self) -> None: - pass + self.kvstore = await kvstore_impl(self.config.kvstore) + # Load existing eval_tasks from kvstore + start_key = EVAL_TASKS_PREFIX + end_key = f"{EVAL_TASKS_PREFIX}\xff" + stored_eval_tasks = await self.kvstore.range(start_key, end_key) + + for eval_task in stored_eval_tasks: + eval_task = EvalTask.model_validate_json(eval_task) + self.eval_tasks[eval_task.identifier] = eval_task async def shutdown(self) -> None: ... async def register_eval_task(self, task_def: EvalTask) -> None: + # Store in kvstore + key = f"{EVAL_TASKS_PREFIX}{task_def.identifier}" + await self.kvstore.set( + key=key, + value=task_def.json(), + ) self.eval_tasks[task_def.identifier] = task_def async def validate_eval_input_dataset_schema(self, dataset_id: str) -> None: