forked from phoenix-oss/llama-stack-mirror
[Evals API][3/n] scoring_functions / scoring meta-reference implementations (#296)
* wip * dataset validation * test_scoring * cleanup * clean up test * comments * error checking * dataset client * test client: * datasetio client * clean up * basic scoring function works * scorer wip * equality scorer * score batch impl * score batch * update scoring test * refactor * validate scorer input * address comments * add all rows scores to ScoringResult * bugfix * scoring function def rename
This commit is contained in:
parent
e70420a06e
commit
cb84034567
28 changed files with 904 additions and 51 deletions
|
@ -13,18 +13,27 @@ from llama_models.llama3.api.datatypes import * # noqa: F403
|
|||
from llama_stack.apis.scoring_functions import * # noqa: F403
|
||||
|
||||
|
||||
ScoringResult = Dict[str, Any]
|
||||
# mapping of metric to value
|
||||
ScoringResultRow = Dict[str, Any]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ScoringResult(BaseModel):
|
||||
score_rows: List[ScoringResultRow]
|
||||
# aggregated metrics to value
|
||||
aggregated_results: Dict[str, Any]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ScoreBatchResponse(BaseModel):
|
||||
dataset_id: str
|
||||
dataset_id: Optional[str] = None
|
||||
results: Dict[str, ScoringResult]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ScoreResponse(BaseModel):
|
||||
# each key in the dict is a scoring function name
|
||||
results: List[Dict[str, ScoringResult]]
|
||||
results: Dict[str, ScoringResult]
|
||||
|
||||
|
||||
class ScoringFunctionStore(Protocol):
|
||||
|
@ -37,7 +46,10 @@ class Scoring(Protocol):
|
|||
|
||||
@webmethod(route="/scoring/score_batch")
|
||||
async def score_batch(
|
||||
self, dataset_id: str, scoring_functions: List[str]
|
||||
self,
|
||||
dataset_id: str,
|
||||
scoring_functions: List[str],
|
||||
save_results_dataset: bool = False,
|
||||
) -> ScoreBatchResponse: ...
|
||||
|
||||
@webmethod(route="/scoring/score")
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue