mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-29 07:14:20 +00:00
full accuracy
This commit is contained in:
parent
fcb8dea1ef
commit
c8f6849291
2 changed files with 3 additions and 15 deletions
|
@ -99,7 +99,6 @@ async def run_main(host: str, port: int):
|
||||||
# cprint(f"Response: {response}", "green")
|
# cprint(f"Response: {response}", "green")
|
||||||
|
|
||||||
# Scoring Task
|
# Scoring Task
|
||||||
|
|
||||||
# 1. register huggingface dataset
|
# 1. register huggingface dataset
|
||||||
response = await dataset_client.create_dataset(
|
response = await dataset_client.create_dataset(
|
||||||
dataset_def=HuggingfaceDatasetDef(
|
dataset_def=HuggingfaceDatasetDef(
|
||||||
|
@ -119,7 +118,7 @@ async def run_main(host: str, port: int):
|
||||||
response = await client.run_scorer(
|
response = await client.run_scorer(
|
||||||
dataset_config=EvaluateDatasetConfig(
|
dataset_config=EvaluateDatasetConfig(
|
||||||
dataset_identifier="Llama-3.1-8B-Instruct-evals__mmlu_pro__details",
|
dataset_identifier="Llama-3.1-8B-Instruct-evals__mmlu_pro__details",
|
||||||
row_limit=10,
|
# row_limit=10,
|
||||||
),
|
),
|
||||||
eval_scoring_config=EvaluateScoringConfig(
|
eval_scoring_config=EvaluateScoringConfig(
|
||||||
scorer_config_list=[
|
scorer_config_list=[
|
||||||
|
@ -128,7 +127,8 @@ async def run_main(host: str, port: int):
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
cprint(response, "green")
|
for k, v in response.eval_result.metrics.items():
|
||||||
|
cprint(f"{k}: {v}", "green")
|
||||||
|
|
||||||
# Eleuther Eval Task
|
# Eleuther Eval Task
|
||||||
# response = await client.run_evals(
|
# response = await client.run_evals(
|
||||||
|
|
|
@ -49,13 +49,6 @@ class EvaluationJobStatusResponse(BaseModel):
|
||||||
job_uuid: str
|
job_uuid: str
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
|
||||||
class EvaluationJobArtifactsResponse(BaseModel):
|
|
||||||
"""Artifacts of a evaluation job."""
|
|
||||||
|
|
||||||
job_uuid: str
|
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class EvaluationJobCreateResponse(BaseModel):
|
class EvaluationJobCreateResponse(BaseModel):
|
||||||
"""Response to create a evaluation job."""
|
"""Response to create a evaluation job."""
|
||||||
|
@ -267,8 +260,3 @@ class Evals(Protocol):
|
||||||
|
|
||||||
# @webmethod(route="/evals/job/cancel")
|
# @webmethod(route="/evals/job/cancel")
|
||||||
# def cancel_evaluation_job(self, job_uuid: str) -> None: ...
|
# def cancel_evaluation_job(self, job_uuid: str) -> None: ...
|
||||||
|
|
||||||
# @webmethod(route="/evals/job/artifacts")
|
|
||||||
# def get_evaluation_job_artifacts(
|
|
||||||
# self, job_uuid: str
|
|
||||||
# ) -> EvaluationJobArtifactsResponse: ...
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue