mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-03 17:29:01 +00:00
more scoring function for rag
This commit is contained in:
parent
9aa4a405ca
commit
40b8ec3185
5 changed files with 132 additions and 0 deletions
|
@ -19,6 +19,10 @@ from autoevals.ragas import (
|
||||||
AnswerCorrectness,
|
AnswerCorrectness,
|
||||||
AnswerRelevancy,
|
AnswerRelevancy,
|
||||||
AnswerSimilarity,
|
AnswerSimilarity,
|
||||||
|
ContextEntityRecall,
|
||||||
|
ContextPrecision,
|
||||||
|
ContextRecall,
|
||||||
|
ContextRelevancy,
|
||||||
Faithfulness,
|
Faithfulness,
|
||||||
)
|
)
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
@ -34,6 +38,10 @@ from .config import BraintrustScoringConfig
|
||||||
from .scoring_fn.fn_defs.answer_correctness import answer_correctness_fn_def
|
from .scoring_fn.fn_defs.answer_correctness import answer_correctness_fn_def
|
||||||
from .scoring_fn.fn_defs.answer_relevancy import answer_relevancy_fn_def
|
from .scoring_fn.fn_defs.answer_relevancy import answer_relevancy_fn_def
|
||||||
from .scoring_fn.fn_defs.answer_similarity import answer_similarity_fn_def
|
from .scoring_fn.fn_defs.answer_similarity import answer_similarity_fn_def
|
||||||
|
from .scoring_fn.fn_defs.context_entity_recall import context_entity_recall_fn_def
|
||||||
|
from .scoring_fn.fn_defs.context_precision import context_precision_fn_def
|
||||||
|
from .scoring_fn.fn_defs.context_recall import context_recall_fn_def
|
||||||
|
from .scoring_fn.fn_defs.context_relevancy import context_relevancy_fn_def
|
||||||
from .scoring_fn.fn_defs.factuality import factuality_fn_def
|
from .scoring_fn.fn_defs.factuality import factuality_fn_def
|
||||||
from .scoring_fn.fn_defs.faithfulness import faithfulness_fn_def
|
from .scoring_fn.fn_defs.faithfulness import faithfulness_fn_def
|
||||||
|
|
||||||
|
@ -70,6 +78,26 @@ SUPPORTED_BRAINTRUST_SCORING_FN_ENTRY = [
|
||||||
evaluator=Faithfulness(),
|
evaluator=Faithfulness(),
|
||||||
fn_def=faithfulness_fn_def,
|
fn_def=faithfulness_fn_def,
|
||||||
),
|
),
|
||||||
|
BraintrustScoringFnEntry(
|
||||||
|
identifier="braintrust::context-entity-recall",
|
||||||
|
evaluator=ContextEntityRecall(),
|
||||||
|
fn_def=context_entity_recall_fn_def,
|
||||||
|
),
|
||||||
|
BraintrustScoringFnEntry(
|
||||||
|
identifier="braintrust::context-precision",
|
||||||
|
evaluator=ContextPrecision(),
|
||||||
|
fn_def=context_precision_fn_def,
|
||||||
|
),
|
||||||
|
BraintrustScoringFnEntry(
|
||||||
|
identifier="braintrust::context-recall",
|
||||||
|
evaluator=ContextRecall(),
|
||||||
|
fn_def=context_recall_fn_def,
|
||||||
|
),
|
||||||
|
BraintrustScoringFnEntry(
|
||||||
|
identifier="braintrust::context-relevancy",
|
||||||
|
evaluator=ContextRelevancy(),
|
||||||
|
fn_def=context_relevancy_fn_def,
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,26 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from llama_stack.apis.common.type_system import NumberType
|
||||||
|
from llama_stack.apis.scoring_functions import (
|
||||||
|
AggregationFunctionType,
|
||||||
|
BasicScoringFnParams,
|
||||||
|
ScoringFn,
|
||||||
|
)
|
||||||
|
|
||||||
|
context_entity_recall_fn_def = ScoringFn(
|
||||||
|
identifier="braintrust::context-entity-recall",
|
||||||
|
description=(
|
||||||
|
"Evaluates how well the context captures the named entities present in the "
|
||||||
|
"reference answer. See: github.com/braintrustdata/autoevals"
|
||||||
|
),
|
||||||
|
provider_id="braintrust",
|
||||||
|
provider_resource_id="context-entity-recall",
|
||||||
|
return_type=NumberType(),
|
||||||
|
params=BasicScoringFnParams(
|
||||||
|
aggregation_functions=[AggregationFunctionType.average]
|
||||||
|
),
|
||||||
|
)
|
|
@ -0,0 +1,26 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from llama_stack.apis.common.type_system import NumberType
|
||||||
|
from llama_stack.apis.scoring_functions import (
|
||||||
|
AggregationFunctionType,
|
||||||
|
BasicScoringFnParams,
|
||||||
|
ScoringFn,
|
||||||
|
)
|
||||||
|
|
||||||
|
context_precision_fn_def = ScoringFn(
|
||||||
|
identifier="braintrust::context-precision",
|
||||||
|
description=(
|
||||||
|
"Measures how much of the provided context is actually relevant to answering the "
|
||||||
|
"question. See: github.com/braintrustdata/autoevals"
|
||||||
|
),
|
||||||
|
provider_id="braintrust",
|
||||||
|
provider_resource_id="context-precision",
|
||||||
|
return_type=NumberType(),
|
||||||
|
params=BasicScoringFnParams(
|
||||||
|
aggregation_functions=[AggregationFunctionType.average]
|
||||||
|
),
|
||||||
|
)
|
|
@ -0,0 +1,26 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from llama_stack.apis.common.type_system import NumberType
|
||||||
|
from llama_stack.apis.scoring_functions import (
|
||||||
|
AggregationFunctionType,
|
||||||
|
BasicScoringFnParams,
|
||||||
|
ScoringFn,
|
||||||
|
)
|
||||||
|
|
||||||
|
context_recall_fn_def = ScoringFn(
|
||||||
|
identifier="braintrust::context-recall",
|
||||||
|
description=(
|
||||||
|
"Evaluates how well the context covers the information needed to answer the "
|
||||||
|
"question. See: github.com/braintrustdata/autoevals"
|
||||||
|
),
|
||||||
|
provider_id="braintrust",
|
||||||
|
provider_resource_id="context-recall",
|
||||||
|
return_type=NumberType(),
|
||||||
|
params=BasicScoringFnParams(
|
||||||
|
aggregation_functions=[AggregationFunctionType.average]
|
||||||
|
),
|
||||||
|
)
|
|
@ -0,0 +1,26 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from llama_stack.apis.common.type_system import NumberType
|
||||||
|
from llama_stack.apis.scoring_functions import (
|
||||||
|
AggregationFunctionType,
|
||||||
|
BasicScoringFnParams,
|
||||||
|
ScoringFn,
|
||||||
|
)
|
||||||
|
|
||||||
|
context_relevancy_fn_def = ScoringFn(
|
||||||
|
identifier="braintrust::context-relevancy",
|
||||||
|
description=(
|
||||||
|
"Assesses how relevant the provided context is to the given question. "
|
||||||
|
"See: github.com/braintrustdata/autoevals"
|
||||||
|
),
|
||||||
|
provider_id="braintrust",
|
||||||
|
provider_resource_id="context-relevancy",
|
||||||
|
return_type=NumberType(),
|
||||||
|
params=BasicScoringFnParams(
|
||||||
|
aggregation_functions=[AggregationFunctionType.average]
|
||||||
|
),
|
||||||
|
)
|
Loading…
Add table
Add a link
Reference in a new issue