[Evals API][3/n] scoring_functions / scoring meta-reference implementations (#296)

* wip * dataset validation * test_scoring * cleanup * clean up test * comments * error checking * dataset client * test client: * datasetio client * clean up * basic scoring function works * scorer wip * equality scorer * score batch impl * score batch * update scoring test * refactor * validate scorer input * address comments * add all rows scores to ScoringResult * bugfix * scoring function def rename
2025-10-04 12:07:34 +00:00 · 2024-10-24 14:52:30 -07:00 · 2024-10-24 14:52:30 -07:00 · cb84034567
commit cb84034567
parent e70420a06e
28 changed files with 904 additions and 51 deletions
--- a/llama_stack/distribution/resolver.py
+++ b/llama_stack/distribution/resolver.py
@ -20,6 +20,8 @@ from llama_stack.apis.memory import Memory
 from llama_stack.apis.memory_banks import MemoryBanks
 from llama_stack.apis.models import Models
 from llama_stack.apis.safety import Safety
+from llama_stack.apis.scoring import Scoring
+from llama_stack.apis.scoring_functions import ScoringFunctions
 from llama_stack.apis.shields import Shields
 from llama_stack.apis.telemetry import Telemetry
 from llama_stack.distribution.distribution import (
@ -42,6 +44,8 @@ def api_protocol_map() -> Dict[Api, Any]:
        Api.telemetry: Telemetry,
        Api.datasets: Datasets,
        Api.datasetio: DatasetIO,
+        Api.scoring_functions: ScoringFunctions,
+        Api.scoring: Scoring,
    }