llama-stack/llama_stack/providers/inline/evaluation/meta_reference/evaluation.py

# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.

from llama_stack.apis.agents import Agents
from llama_stack.apis.datasetio import DatasetIO
from llama_stack.apis.datasets import Datasets
from llama_stack.apis.inference import Inference
from llama_stack.providers.datatypes import BenchmarksProtocolPrivate

from .....apis.benchmarks import Benchmark
from .....apis.evaluation.evaluation import (
    Evaluation,
    EvaluationCandidate,
    EvaluationJob,
    EvaluationResponse,
    EvaluationTask,
)
from .config import MetaReferenceEvaluationConfig

EVAL_TASKS_PREFIX = "benchmarks:"


class MetaReferenceEvaluationImpl(
    Evaluation,
    BenchmarksProtocolPrivate,
):
    def __init__(
        self,
        config: MetaReferenceEvaluationConfig,
        datasetio_api: DatasetIO,
        datasets_api: Datasets,
        inference_api: Inference,
        agents_api: Agents,
    ) -> None:
        self.config = config
        self.datasetio_api = datasetio_api
        self.datasets_api = datasets_api
        self.inference_api = inference_api
        self.agents_api = agents_api

    async def initialize(self) -> None:
        pass

    async def shutdown(self) -> None:
        pass

    async def register_benchmark(self, benchmark: Benchmark) -> None:
        pass

    async def run(
        self,
        task: EvaluationTask,
        candidate: EvaluationCandidate,
    ) -> EvaluationJob:
        raise NotImplementedError("Run is not implemented yet")

    async def run_sync(
        self,
        task: EvaluationTask,
        candidate: EvaluationCandidate,
    ) -> EvaluationResponse:
        raise NotImplementedError("Run sync is not implemented yet")

    async def grade(self, task: EvaluationTask) -> EvaluationJob:
        raise NotImplementedError("Grade is not implemented yet")

    async def grade_sync(self, task: EvaluationTask) -> EvaluationResponse:
        raise NotImplementedError("Grade sync is not implemented yet")