more scoring function for rag

2025-12-18 01:49:47 +00:00 · 2024-12-19 17:13:16 -08:00 · 2024-12-19 17:13:16 -08:00 · 40b8ec3185
commit 40b8ec3185
parent 9aa4a405ca
5 changed files with 132 additions and 0 deletions
--- a/llama_stack/providers/inline/scoring/braintrust/braintrust.py
+++ b/llama_stack/providers/inline/scoring/braintrust/braintrust.py
@ -19,6 +19,10 @@ from autoevals.ragas import (
    AnswerCorrectness,
    AnswerRelevancy,
    AnswerSimilarity,
    ContextEntityRecall,
    ContextPrecision,
    ContextRecall,
    ContextRelevancy,
    Faithfulness,
 )
 from pydantic import BaseModel
@ -34,6 +38,10 @@ from .config import BraintrustScoringConfig
 from .scoring_fn.fn_defs.answer_correctness import answer_correctness_fn_def
 from .scoring_fn.fn_defs.answer_relevancy import answer_relevancy_fn_def
 from .scoring_fn.fn_defs.answer_similarity import answer_similarity_fn_def
 from .scoring_fn.fn_defs.context_entity_recall import context_entity_recall_fn_def
 from .scoring_fn.fn_defs.context_precision import context_precision_fn_def
 from .scoring_fn.fn_defs.context_recall import context_recall_fn_def
 from .scoring_fn.fn_defs.context_relevancy import context_relevancy_fn_def
 from .scoring_fn.fn_defs.factuality import factuality_fn_def
 from .scoring_fn.fn_defs.faithfulness import faithfulness_fn_def
@ -70,6 +78,26 @@ SUPPORTED_BRAINTRUST_SCORING_FN_ENTRY = [
        evaluator=Faithfulness(),
        fn_def=faithfulness_fn_def,
    ),
    BraintrustScoringFnEntry(
        identifier="braintrust::context-entity-recall",
        evaluator=ContextEntityRecall(),
        fn_def=context_entity_recall_fn_def,
    ),
    BraintrustScoringFnEntry(
        identifier="braintrust::context-precision",
        evaluator=ContextPrecision(),
        fn_def=context_precision_fn_def,
    ),
    BraintrustScoringFnEntry(
        identifier="braintrust::context-recall",
        evaluator=ContextRecall(),
        fn_def=context_recall_fn_def,
    ),
    BraintrustScoringFnEntry(
        identifier="braintrust::context-relevancy",
        evaluator=ContextRelevancy(),
        fn_def=context_relevancy_fn_def,
    ),
 ]
--- a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py
+++ b/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py
@ -0,0 +1,26 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from llama_stack.apis.common.type_system import NumberType
 from llama_stack.apis.scoring_functions import (
    AggregationFunctionType,
    BasicScoringFnParams,
    ScoringFn,
 )
 context_entity_recall_fn_def = ScoringFn(
    identifier="braintrust::context-entity-recall",
    description=(
        "Evaluates how well the context captures the named entities present in the "
        "reference answer. See: github.com/braintrustdata/autoevals"
    ),
    provider_id="braintrust",
    provider_resource_id="context-entity-recall",
    return_type=NumberType(),
    params=BasicScoringFnParams(
        aggregation_functions=[AggregationFunctionType.average]
    ),
 )
--- a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py
+++ b/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py
@ -0,0 +1,26 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from llama_stack.apis.common.type_system import NumberType
 from llama_stack.apis.scoring_functions import (
    AggregationFunctionType,
    BasicScoringFnParams,
    ScoringFn,
 )
 context_precision_fn_def = ScoringFn(
    identifier="braintrust::context-precision",
    description=(
        "Measures how much of the provided context is actually relevant to answering the "
        "question. See: github.com/braintrustdata/autoevals"
    ),
    provider_id="braintrust",
    provider_resource_id="context-precision",
    return_type=NumberType(),
    params=BasicScoringFnParams(
        aggregation_functions=[AggregationFunctionType.average]
    ),
 )
--- a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py
+++ b/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py
@ -0,0 +1,26 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from llama_stack.apis.common.type_system import NumberType
 from llama_stack.apis.scoring_functions import (
    AggregationFunctionType,
    BasicScoringFnParams,
    ScoringFn,
 )
 context_recall_fn_def = ScoringFn(
    identifier="braintrust::context-recall",
    description=(
        "Evaluates how well the context covers the information needed to answer the "
        "question. See: github.com/braintrustdata/autoevals"
    ),
    provider_id="braintrust",
    provider_resource_id="context-recall",
    return_type=NumberType(),
    params=BasicScoringFnParams(
        aggregation_functions=[AggregationFunctionType.average]
    ),
 )
--- a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py
+++ b/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py
@ -0,0 +1,26 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from llama_stack.apis.common.type_system import NumberType
 from llama_stack.apis.scoring_functions import (
    AggregationFunctionType,
    BasicScoringFnParams,
    ScoringFn,
 )
 context_relevancy_fn_def = ScoringFn(
    identifier="braintrust::context-relevancy",
    description=(
        "Assesses how relevant the provided context is to the given question. "
        "See: github.com/braintrustdata/autoevals"
    ),
    provider_id="braintrust",
    provider_resource_id="context-relevancy",
    return_type=NumberType(),
    params=BasicScoringFnParams(
        aggregation_functions=[AggregationFunctionType.average]
    ),
 )