cleanup original BaseTask

This commit is contained in:
Xi Yan 2024-10-13 23:30:21 -07:00
parent a25aff290e
commit 8890de7322
2 changed files with 0 additions and 93 deletions

View file

@ -235,47 +235,6 @@ class BaseTask(ABC):
raise NotImplementedError() raise NotImplementedError()
# class BaseTask(ABC, Generic[TDatasetSample, TProcessedSample]):
# """
# A task represents a single evaluation benchmark, including it's dataset, preprocessing, postprocessing and scoring methods.
# Base class for all evaluation tasks. Each task needs to implement the following methods:
# - F1: preprocess_sample(self)
# - F2: postprocess_sample(self)
# - F3: score_sample(self)
# """
# def __init__(self, *args, **kwargs) -> None:
# super().__init__(*args, **kwargs)
# self._name = self.__class__.__name__
# @abstractmethod
# def preprocess_sample(self, sample: TDatasetSample) -> TProcessedSample:
# raise NotImplementedError()
# @abstractmethod
# def postprocess_sample(self, sample: TProcessedSample) -> TProcessedSample:
# raise NotImplementedError()
# @abstractmethod
# def score_sample(self, sample: TProcessedSample) -> SingleEvalResult:
# raise NotImplementedError()
# @abstractmethod
# def aggregate_results(self, eval_results: List[SingleEvalResult]) -> EvalResult:
# raise NotImplementedError()
# def preprocess(
# self, dataset: BaseDataset[TProcessedSample]
# ) -> List[TProcessedSample]:
# return [self.preprocess_sample(sample) for sample in dataset]
# def postprocess(self, generation: List[TProcessedSample]) -> List[TProcessedSample]:
# return [self.postprocess_sample(sample) for sample in generation]
# def score(self, postprocessed: List[TProcessedSample]) -> List[SingleEvalResult]:
# return [self.score_sample(sample) for sample in postprocessed]
class Evals(Protocol): class Evals(Protocol):
@webmethod(route="/evals/run_eval_task") @webmethod(route="/evals/run_eval_task")

View file

@ -136,55 +136,3 @@ class MetaReferenceEvalsImpl(Evals):
return EvaluateResponse( return EvaluateResponse(
eval_result={}, eval_result={},
) )
# async def run_evals(
# self,
# model: str,
# task: str,
# dataset: Optional[str] = None,
# eval_task_config: Optional[EvaluateTaskConfig] = None,
# ) -> EvaluateResponse:
# cprint(
# f"model={model}, dataset={dataset}, task={task}, eval_task_config={eval_task_config}",
# "red",
# )
# if not dataset:
# raise ValueError("dataset must be specified for mete-reference evals")
# dataset = DatasetRegistry.get_dataset(dataset)
# dataset.load()
# task_impl = TaskRegistry.get_task(task)()
# preprocessed = task_impl.preprocess(dataset)
# # TODO: replace w/ batch inference & async return eval job
# generation_outputs = []
# if eval_task_config is None:
# eval_task_config = EvaluateTaskConfig(n_samples=len(preprocessed))
# if eval_task_config.n_samples is None or eval_task_config.n_samples > len(
# preprocessed
# ):
# eval_task_config.n_samples = len(preprocessed)
# print(
# f"Eval generation start, generate on {eval_task_config.n_samples} samples"
# )
# for sample in preprocessed[: eval_task_config.n_samples]:
# print("generation: ", sample)
# response = await self.inference_api.chat_completion(
# model=model,
# messages=sample.preprocessed["messages"],
# stream=False,
# )
# sample.prediction = PredictionSample(
# completion_message=response.completion_message.content
# )
# generation_outputs.append(sample)
# postprocessed = task_impl.postprocess(generation_outputs)
# eval_results = task_impl.score(postprocessed)
# aggr_result = task_impl.aggregate_results(eval_results)
# return EvaluateResponse(
# eval_result=aggr_result,
# )