mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-09 19:29:18 +00:00
cleanup original BaseTask
This commit is contained in:
parent
a25aff290e
commit
8890de7322
2 changed files with 0 additions and 93 deletions
|
|
@ -136,55 +136,3 @@ class MetaReferenceEvalsImpl(Evals):
|
|||
return EvaluateResponse(
|
||||
eval_result={},
|
||||
)
|
||||
|
||||
# async def run_evals(
|
||||
# self,
|
||||
# model: str,
|
||||
# task: str,
|
||||
# dataset: Optional[str] = None,
|
||||
# eval_task_config: Optional[EvaluateTaskConfig] = None,
|
||||
# ) -> EvaluateResponse:
|
||||
# cprint(
|
||||
# f"model={model}, dataset={dataset}, task={task}, eval_task_config={eval_task_config}",
|
||||
# "red",
|
||||
# )
|
||||
# if not dataset:
|
||||
# raise ValueError("dataset must be specified for mete-reference evals")
|
||||
|
||||
# dataset = DatasetRegistry.get_dataset(dataset)
|
||||
# dataset.load()
|
||||
|
||||
# task_impl = TaskRegistry.get_task(task)()
|
||||
# preprocessed = task_impl.preprocess(dataset)
|
||||
|
||||
# # TODO: replace w/ batch inference & async return eval job
|
||||
# generation_outputs = []
|
||||
# if eval_task_config is None:
|
||||
# eval_task_config = EvaluateTaskConfig(n_samples=len(preprocessed))
|
||||
# if eval_task_config.n_samples is None or eval_task_config.n_samples > len(
|
||||
# preprocessed
|
||||
# ):
|
||||
# eval_task_config.n_samples = len(preprocessed)
|
||||
|
||||
# print(
|
||||
# f"Eval generation start, generate on {eval_task_config.n_samples} samples"
|
||||
# )
|
||||
|
||||
# for sample in preprocessed[: eval_task_config.n_samples]:
|
||||
# print("generation: ", sample)
|
||||
# response = await self.inference_api.chat_completion(
|
||||
# model=model,
|
||||
# messages=sample.preprocessed["messages"],
|
||||
# stream=False,
|
||||
# )
|
||||
# sample.prediction = PredictionSample(
|
||||
# completion_message=response.completion_message.content
|
||||
# )
|
||||
# generation_outputs.append(sample)
|
||||
|
||||
# postprocessed = task_impl.postprocess(generation_outputs)
|
||||
# eval_results = task_impl.score(postprocessed)
|
||||
# aggr_result = task_impl.aggregate_results(eval_results)
|
||||
# return EvaluateResponse(
|
||||
# eval_result=aggr_result,
|
||||
# )
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue