From c8f6849291eb7db098930fc81e462e8956688a44 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Mon, 14 Oct 2024 20:42:22 -0700 Subject: [PATCH] full accuracy --- llama_stack/apis/evals/client.py | 6 +++--- llama_stack/apis/evals/evals.py | 12 ------------ 2 files changed, 3 insertions(+), 15 deletions(-) diff --git a/llama_stack/apis/evals/client.py b/llama_stack/apis/evals/client.py index e7c5a475d..1db7afac1 100644 --- a/llama_stack/apis/evals/client.py +++ b/llama_stack/apis/evals/client.py @@ -99,7 +99,6 @@ async def run_main(host: str, port: int): # cprint(f"Response: {response}", "green") # Scoring Task - # 1. register huggingface dataset response = await dataset_client.create_dataset( dataset_def=HuggingfaceDatasetDef( @@ -119,7 +118,7 @@ async def run_main(host: str, port: int): response = await client.run_scorer( dataset_config=EvaluateDatasetConfig( dataset_identifier="Llama-3.1-8B-Instruct-evals__mmlu_pro__details", - row_limit=10, + # row_limit=10, ), eval_scoring_config=EvaluateScoringConfig( scorer_config_list=[ @@ -128,7 +127,8 @@ async def run_main(host: str, port: int): ), ) - cprint(response, "green") + for k, v in response.eval_result.metrics.items(): + cprint(f"{k}: {v}", "green") # Eleuther Eval Task # response = await client.run_evals( diff --git a/llama_stack/apis/evals/evals.py b/llama_stack/apis/evals/evals.py index 6a3ed8ce2..a02394ee4 100644 --- a/llama_stack/apis/evals/evals.py +++ b/llama_stack/apis/evals/evals.py @@ -49,13 +49,6 @@ class EvaluationJobStatusResponse(BaseModel): job_uuid: str -@json_schema_type -class EvaluationJobArtifactsResponse(BaseModel): - """Artifacts of a evaluation job.""" - - job_uuid: str - - @json_schema_type class EvaluationJobCreateResponse(BaseModel): """Response to create a evaluation job.""" @@ -267,8 +260,3 @@ class Evals(Protocol): # @webmethod(route="/evals/job/cancel") # def cancel_evaluation_job(self, job_uuid: str) -> None: ... - - # @webmethod(route="/evals/job/artifacts") - # def get_evaluation_job_artifacts( - # self, job_uuid: str - # ) -> EvaluationJobArtifactsResponse: ...