mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-29 07:14:20 +00:00
full accuracy
This commit is contained in:
parent
fcb8dea1ef
commit
c8f6849291
2 changed files with 3 additions and 15 deletions
|
@ -99,7 +99,6 @@ async def run_main(host: str, port: int):
|
|||
# cprint(f"Response: {response}", "green")
|
||||
|
||||
# Scoring Task
|
||||
|
||||
# 1. register huggingface dataset
|
||||
response = await dataset_client.create_dataset(
|
||||
dataset_def=HuggingfaceDatasetDef(
|
||||
|
@ -119,7 +118,7 @@ async def run_main(host: str, port: int):
|
|||
response = await client.run_scorer(
|
||||
dataset_config=EvaluateDatasetConfig(
|
||||
dataset_identifier="Llama-3.1-8B-Instruct-evals__mmlu_pro__details",
|
||||
row_limit=10,
|
||||
# row_limit=10,
|
||||
),
|
||||
eval_scoring_config=EvaluateScoringConfig(
|
||||
scorer_config_list=[
|
||||
|
@ -128,7 +127,8 @@ async def run_main(host: str, port: int):
|
|||
),
|
||||
)
|
||||
|
||||
cprint(response, "green")
|
||||
for k, v in response.eval_result.metrics.items():
|
||||
cprint(f"{k}: {v}", "green")
|
||||
|
||||
# Eleuther Eval Task
|
||||
# response = await client.run_evals(
|
||||
|
|
|
@ -49,13 +49,6 @@ class EvaluationJobStatusResponse(BaseModel):
|
|||
job_uuid: str
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class EvaluationJobArtifactsResponse(BaseModel):
|
||||
"""Artifacts of a evaluation job."""
|
||||
|
||||
job_uuid: str
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class EvaluationJobCreateResponse(BaseModel):
|
||||
"""Response to create a evaluation job."""
|
||||
|
@ -267,8 +260,3 @@ class Evals(Protocol):
|
|||
|
||||
# @webmethod(route="/evals/job/cancel")
|
||||
# def cancel_evaluation_job(self, job_uuid: str) -> None: ...
|
||||
|
||||
# @webmethod(route="/evals/job/artifacts")
|
||||
# def get_evaluation_job_artifacts(
|
||||
# self, job_uuid: str
|
||||
# ) -> EvaluationJobArtifactsResponse: ...
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue