diff --git a/llama_stack/providers/impls/braintrust/scoring/scoring_fn/braintrust_scoring_fn.py b/llama_stack/providers/impls/braintrust/scoring/scoring_fn/braintrust_scoring_fn.py index 4663886a5..fbf9e0bf8 100644 --- a/llama_stack/providers/impls/braintrust/scoring/scoring_fn/braintrust_scoring_fn.py +++ b/llama_stack/providers/impls/braintrust/scoring/scoring_fn/braintrust_scoring_fn.py @@ -20,6 +20,12 @@ from llama_stack.apis.scoring_functions import * # noqa: F403 from llama_stack.apis.common.type_system import * # noqa: F403 from autoevals.llm import Factuality from autoevals.ragas import AnswerCorrectness +from llama_stack.providers.impls.braintrust.scoring.scoring_fn.fn_defs.answer_correctness import ( + answer_correctness_fn_def, +) +from llama_stack.providers.impls.braintrust.scoring.scoring_fn.fn_defs.factuality import ( + factuality_fn_def, +) BRAINTRUST_FN_DEFS_PATH = Path(__file__).parent / "fn_defs" @@ -36,9 +42,10 @@ class BraintrustScoringFn(BaseScoringFn): "braintrust::factuality": Factuality(), "braintrust::answer-correctness": AnswerCorrectness(), } - self.defs_paths = [ - str(x) for x in sorted(BRAINTRUST_FN_DEFS_PATH.glob("*.json")) - ] + self.supported_fn_defs_registry = { + factuality_fn_def.identifier: factuality_fn_def, + answer_correctness_fn_def.identifier: answer_correctness_fn_def, + } async def score_row( self, diff --git a/llama_stack/providers/impls/braintrust/scoring/scoring_fn/fn_defs/__init__.py b/llama_stack/providers/impls/braintrust/scoring/scoring_fn/fn_defs/__init__.py new file mode 100644 index 000000000..756f351d8 --- /dev/null +++ b/llama_stack/providers/impls/braintrust/scoring/scoring_fn/fn_defs/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. diff --git a/llama_stack/providers/impls/braintrust/scoring/scoring_fn/fn_defs/answer-correctness.json b/llama_stack/providers/impls/braintrust/scoring/scoring_fn/fn_defs/answer-correctness.json deleted file mode 100644 index 3fc2957a3..000000000 --- a/llama_stack/providers/impls/braintrust/scoring/scoring_fn/fn_defs/answer-correctness.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "identifier": "braintrust::answer-correctness", - "description": "Test whether an output is factual, compared to an original (`expected`) value. One of Braintrust LLM basd scorer https://github.com/braintrustdata/autoevals/blob/main/py/autoevals/llm.py", - "metadata": {}, - "parameters": [], - "return_type": { - "type": "number" - }, - "context": null -} diff --git a/llama_stack/providers/impls/braintrust/scoring/scoring_fn/fn_defs/answer_correctness.py b/llama_stack/providers/impls/braintrust/scoring/scoring_fn/fn_defs/answer_correctness.py new file mode 100644 index 000000000..ca6a46d0e --- /dev/null +++ b/llama_stack/providers/impls/braintrust/scoring/scoring_fn/fn_defs/answer_correctness.py @@ -0,0 +1,16 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack.apis.common.type_system import NumberType +from llama_stack.apis.scoring_functions import ScoringFnDef + + +answer_correctness_fn_def = ScoringFnDef( + identifier="braintrust::answer-correctness", + description="Test whether an output is factual, compared to an original (`expected`) value. One of Braintrust LLM basd scorer https://github.com/braintrustdata/autoevals/blob/main/py/autoevals/llm.py", + parameters=[], + return_type=NumberType(), +) diff --git a/llama_stack/providers/impls/braintrust/scoring/scoring_fn/fn_defs/factuality.json b/llama_stack/providers/impls/braintrust/scoring/scoring_fn/fn_defs/factuality.json deleted file mode 100644 index 210901d6f..000000000 --- a/llama_stack/providers/impls/braintrust/scoring/scoring_fn/fn_defs/factuality.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "identifier": "braintrust::factuality", - "description": "Test whether an output is factual, compared to an original (`expected`) value. One of Braintrust LLM basd scorer https://github.com/braintrustdata/autoevals/blob/main/py/autoevals/llm.py", - "metadata": {}, - "parameters": [], - "return_type": { - "type": "number" - }, - "context": null -} diff --git a/llama_stack/providers/impls/braintrust/scoring/scoring_fn/fn_defs/factuality.py b/llama_stack/providers/impls/braintrust/scoring/scoring_fn/fn_defs/factuality.py new file mode 100644 index 000000000..cbf9cd01c --- /dev/null +++ b/llama_stack/providers/impls/braintrust/scoring/scoring_fn/fn_defs/factuality.py @@ -0,0 +1,16 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack.apis.common.type_system import NumberType +from llama_stack.apis.scoring_functions import ScoringFnDef + + +factuality_fn_def = ScoringFnDef( + identifier="braintrust::factuality", + description="Test whether an output is factual, compared to an original (`expected`) value. One of Braintrust LLM basd scorer https://github.com/braintrustdata/autoevals/blob/main/py/autoevals/llm.py", + parameters=[], + return_type=NumberType(), +)