local persistence for eval tasks (#453)

# What does this PR do?

- add local persistence for eval tasks
- follow https://github.com/meta-llama/llama-stack/pull/375

## Test Plan

1. fresh llama stack run
2. kill server
3. restart server: llama stack run

<img width="690" alt="image"
src="https://github.com/user-attachments/assets/3d76e477-b91a-43a6-86ea-8e3ef2d04ed3">

Using run.yaml
```yaml
eval_tasks:
  - eval_task_id: meta-reference-mmlu
    provider_id: meta-reference-0
    dataset_id: mmlu
    scoring_functions:
      - basic::regex_parser_multiple_choice_answer
```

## Before submitting

- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Ran pre-commit to handle lint / formatting issues.
- [ ] Read the [contributor
guideline](https://github.com/meta-llama/llama-stack/blob/main/CONTRIBUTING.md),
      Pull Request section?
- [ ] Updated relevant documentation.
- [ ] Wrote necessary unit or integration tests.
This commit is contained in:
Xi Yan 2024-11-14 10:36:23 -05:00 committed by GitHub
parent 46f0b6606a
commit 58381dbe78
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 28 additions and 3 deletions

View file

@ -15,10 +15,13 @@ from llama_stack.apis.eval_tasks import EvalTask
from llama_stack.apis.inference import Inference
from llama_stack.apis.scoring import Scoring
from llama_stack.providers.datatypes import EvalTasksProtocolPrivate
from llama_stack.providers.utils.kvstore import kvstore_impl
from tqdm import tqdm
from .config import MetaReferenceEvalConfig
EVAL_TASKS_PREFIX = "eval_tasks:"
class ColumnName(Enum):
input_query = "input_query"
@ -49,11 +52,25 @@ class MetaReferenceEvalImpl(Eval, EvalTasksProtocolPrivate):
self.eval_tasks = {}
async def initialize(self) -> None:
pass
self.kvstore = await kvstore_impl(self.config.kvstore)
# Load existing eval_tasks from kvstore
start_key = EVAL_TASKS_PREFIX
end_key = f"{EVAL_TASKS_PREFIX}\xff"
stored_eval_tasks = await self.kvstore.range(start_key, end_key)
for eval_task in stored_eval_tasks:
eval_task = EvalTask.model_validate_json(eval_task)
self.eval_tasks[eval_task.identifier] = eval_task
async def shutdown(self) -> None: ...
async def register_eval_task(self, task_def: EvalTask) -> None:
# Store in kvstore
key = f"{EVAL_TASKS_PREFIX}{task_def.identifier}"
await self.kvstore.set(
key=key,
value=task_def.json(),
)
self.eval_tasks[task_def.identifier] = task_def
async def validate_eval_input_dataset_schema(self, dataset_id: str) -> None: