full accuracy

This commit is contained in:
Xi Yan 2024-10-14 20:42:22 -07:00
parent fcb8dea1ef
commit c8f6849291
2 changed files with 3 additions and 15 deletions

View file

@ -99,7 +99,6 @@ async def run_main(host: str, port: int):
# cprint(f"Response: {response}", "green")
# Scoring Task
# 1. register huggingface dataset
response = await dataset_client.create_dataset(
dataset_def=HuggingfaceDatasetDef(
@ -119,7 +118,7 @@ async def run_main(host: str, port: int):
response = await client.run_scorer(
dataset_config=EvaluateDatasetConfig(
dataset_identifier="Llama-3.1-8B-Instruct-evals__mmlu_pro__details",
row_limit=10,
# row_limit=10,
),
eval_scoring_config=EvaluateScoringConfig(
scorer_config_list=[
@ -128,7 +127,8 @@ async def run_main(host: str, port: int):
),
)
cprint(response, "green")
for k, v in response.eval_result.metrics.items():
cprint(f"{k}: {v}", "green")
# Eleuther Eval Task
# response = await client.run_evals(

View file

@ -49,13 +49,6 @@ class EvaluationJobStatusResponse(BaseModel):
job_uuid: str
@json_schema_type
class EvaluationJobArtifactsResponse(BaseModel):
"""Artifacts of a evaluation job."""
job_uuid: str
@json_schema_type
class EvaluationJobCreateResponse(BaseModel):
"""Response to create a evaluation job."""
@ -267,8 +260,3 @@ class Evals(Protocol):
# @webmethod(route="/evals/job/cancel")
# def cancel_evaluation_job(self, job_uuid: str) -> None: ...
# @webmethod(route="/evals/job/artifacts")
# def get_evaluation_job_artifacts(
# self, job_uuid: str
# ) -> EvaluationJobArtifactsResponse: ...