mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-16 06:53:47 +00:00
api refactor
This commit is contained in:
parent
97dcd5704c
commit
51c20f9c29
8 changed files with 64 additions and 59 deletions
|
@ -38,14 +38,16 @@ EvalCandidate = Annotated[
|
|||
|
||||
@json_schema_type
|
||||
class BenchmarkEvalTaskConfig(BaseModel):
|
||||
type: Literal["benchmark"] = "benchmark"
|
||||
eval_candidate: EvalCandidate
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class AppEvalTaskConfig(BaseModel):
|
||||
type: Literal["app"] = "app"
|
||||
eval_candidate: EvalCandidate
|
||||
scoring_params: Dict[str, ScoringFnParams] = Field(
|
||||
description="Map between scoring function id and parameters",
|
||||
description="Map between scoring function id and parameters for each scoring function you want to run",
|
||||
default_factory=dict,
|
||||
)
|
||||
# we could optinally add any specific dataset config here
|
||||
|
@ -64,18 +66,18 @@ class EvaluateResponse(BaseModel):
|
|||
|
||||
|
||||
class Eval(Protocol):
|
||||
@webmethod(route="/eval/run_benchmark_eval", method="POST")
|
||||
async def run_benchmark_eval(
|
||||
@webmethod(route="/eval/run_benchmark", method="POST")
|
||||
async def run_benchmark(
|
||||
self,
|
||||
benchmark_id: str,
|
||||
eval_task_config: BenchmarkEvalTaskConfig,
|
||||
benchmark_config: BenchmarkEvalTaskConfig,
|
||||
) -> Job: ...
|
||||
|
||||
@webmethod(route="/eval/run_eval", method="POST")
|
||||
async def run_eval(
|
||||
self,
|
||||
eval_task_def: EvalTaskDef,
|
||||
eval_task_config: EvalTaskConfig,
|
||||
task: EvalTaskDef,
|
||||
task_config: AppEvalTaskConfig,
|
||||
) -> Job: ...
|
||||
|
||||
@webmethod(route="/eval/evaluate_rows", method="POST")
|
||||
|
|
|
@ -48,8 +48,7 @@ class Scoring(Protocol):
|
|||
async def score_batch(
|
||||
self,
|
||||
dataset_id: str,
|
||||
scoring_functions: List[str],
|
||||
scoring_params: Optional[Dict[str, ScoringFnParams]] = None,
|
||||
scoring_functions: Optional[Dict[str, ScoringFnParams]] = None,
|
||||
save_results_dataset: bool = False,
|
||||
) -> ScoreBatchResponse: ...
|
||||
|
||||
|
@ -57,6 +56,5 @@ class Scoring(Protocol):
|
|||
async def score(
|
||||
self,
|
||||
input_rows: List[Dict[str, Any]],
|
||||
scoring_functions: List[str],
|
||||
scoring_params: Optional[Dict[str, ScoringFnParams]] = None,
|
||||
scoring_functions: Optional[Dict[str, ScoringFnParams]] = None,
|
||||
) -> ScoreResponse: ...
|
||||
|
|
|
@ -76,7 +76,7 @@ class ScoringFnDef(BaseModel):
|
|||
description="The return type of the deterministic function",
|
||||
)
|
||||
params: Optional[ScoringFnParams] = Field( # type: ignore
|
||||
description="The parameters for the scoring function for benchmark eval, we could override this for app eval",
|
||||
description="The parameters for the scoring function for benchmark eval, these can be overridden for app eval",
|
||||
default=None,
|
||||
)
|
||||
# We can optionally add information here to support packaging of code, etc.
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue