migrate scoring fns to resource (#422)

* fix after rebase

* remove print

---------

Co-authored-by: Dinesh Yeduguru <dineshyv@fb.com>
This commit is contained in:
Dinesh Yeduguru 2024-11-11 17:28:48 -08:00 committed by GitHub
parent 3802edfc50
commit 0a3b3d5fb6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 113 additions and 62 deletions

View file

@ -48,7 +48,7 @@ class BraintrustScoringImpl(Scoring, ScoringFunctionsProtocolPrivate):
async def shutdown(self) -> None: ...
async def list_scoring_functions(self) -> List[ScoringFnDef]:
async def list_scoring_functions(self) -> List[ScoringFn]:
scoring_fn_defs_list = [x for x in self.supported_fn_defs_registry.values()]
for f in scoring_fn_defs_list:
assert f.identifier.startswith(
@ -57,7 +57,7 @@ class BraintrustScoringImpl(Scoring, ScoringFunctionsProtocolPrivate):
return scoring_fn_defs_list
async def register_scoring_function(self, function_def: ScoringFnDef) -> None:
async def register_scoring_function(self, scoring_fn: ScoringFn) -> None:
raise NotImplementedError(
"Registering scoring function not allowed for braintrust provider"
)

View file

@ -5,12 +5,14 @@
# the root directory of this source tree.
from llama_stack.apis.common.type_system import NumberType
from llama_stack.apis.scoring_functions import ScoringFnDef
from llama_stack.apis.scoring_functions import ScoringFn
answer_correctness_fn_def = ScoringFnDef(
answer_correctness_fn_def = ScoringFn(
identifier="braintrust::answer-correctness",
description="Test whether an output is factual, compared to an original (`expected`) value. One of Braintrust LLM basd scorer https://github.com/braintrustdata/autoevals/blob/main/py/autoevals/llm.py",
parameters=[],
params=None,
provider_id="braintrust",
provider_resource_id="answer-correctness",
return_type=NumberType(),
)

View file

@ -5,12 +5,14 @@
# the root directory of this source tree.
from llama_stack.apis.common.type_system import NumberType
from llama_stack.apis.scoring_functions import ScoringFnDef
from llama_stack.apis.scoring_functions import ScoringFn
factuality_fn_def = ScoringFnDef(
factuality_fn_def = ScoringFn(
identifier="braintrust::factuality",
description="Test whether an output is factual, compared to an original (`expected`) value. One of Braintrust LLM basd scorer https://github.com/braintrustdata/autoevals/blob/main/py/autoevals/llm.py",
parameters=[],
params=None,
provider_id="braintrust",
provider_resource_id="factuality",
return_type=NumberType(),
)