api refactor

This commit is contained in:
Xi Yan 2024-11-07 13:54:26 -08:00
parent 97dcd5704c
commit 51c20f9c29
8 changed files with 64 additions and 59 deletions

View file

@ -38,14 +38,16 @@ EvalCandidate = Annotated[
@json_schema_type
class BenchmarkEvalTaskConfig(BaseModel):
type: Literal["benchmark"] = "benchmark"
eval_candidate: EvalCandidate
@json_schema_type
class AppEvalTaskConfig(BaseModel):
type: Literal["app"] = "app"
eval_candidate: EvalCandidate
scoring_params: Dict[str, ScoringFnParams] = Field(
description="Map between scoring function id and parameters",
description="Map between scoring function id and parameters for each scoring function you want to run",
default_factory=dict,
)
# we could optinally add any specific dataset config here
@ -64,18 +66,18 @@ class EvaluateResponse(BaseModel):
class Eval(Protocol):
@webmethod(route="/eval/run_benchmark_eval", method="POST")
async def run_benchmark_eval(
@webmethod(route="/eval/run_benchmark", method="POST")
async def run_benchmark(
self,
benchmark_id: str,
eval_task_config: BenchmarkEvalTaskConfig,
benchmark_config: BenchmarkEvalTaskConfig,
) -> Job: ...
@webmethod(route="/eval/run_eval", method="POST")
async def run_eval(
self,
eval_task_def: EvalTaskDef,
eval_task_config: EvalTaskConfig,
task: EvalTaskDef,
task_config: AppEvalTaskConfig,
) -> Job: ...
@webmethod(route="/eval/evaluate_rows", method="POST")

View file

@ -48,8 +48,7 @@ class Scoring(Protocol):
async def score_batch(
self,
dataset_id: str,
scoring_functions: List[str],
scoring_params: Optional[Dict[str, ScoringFnParams]] = None,
scoring_functions: Optional[Dict[str, ScoringFnParams]] = None,
save_results_dataset: bool = False,
) -> ScoreBatchResponse: ...
@ -57,6 +56,5 @@ class Scoring(Protocol):
async def score(
self,
input_rows: List[Dict[str, Any]],
scoring_functions: List[str],
scoring_params: Optional[Dict[str, ScoringFnParams]] = None,
scoring_functions: Optional[Dict[str, ScoringFnParams]] = None,
) -> ScoreResponse: ...

View file

@ -76,7 +76,7 @@ class ScoringFnDef(BaseModel):
description="The return type of the deterministic function",
)
params: Optional[ScoringFnParams] = Field( # type: ignore
description="The parameters for the scoring function for benchmark eval, we could override this for app eval",
description="The parameters for the scoring function for benchmark eval, these can be overridden for app eval",
default=None,
)
# We can optionally add information here to support packaging of code, etc.