From f6340a47d13a531b6f0c1b4d45ed5cf7a9f949a0 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Thu, 24 Oct 2024 16:13:49 -0700 Subject: [PATCH] inclusion->subsetof --- .../scorer/{inclusion_scorer.py => subset_of_scorer.py} | 4 ++-- .../providers/impls/meta_reference/scoring/scoring.py | 6 +++--- llama_stack/providers/tests/eval/test_eval.py | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) rename llama_stack/providers/impls/meta_reference/scoring/scorer/{inclusion_scorer.py => subset_of_scorer.py} (95%) diff --git a/llama_stack/providers/impls/meta_reference/scoring/scorer/inclusion_scorer.py b/llama_stack/providers/impls/meta_reference/scoring/scorer/subset_of_scorer.py similarity index 95% rename from llama_stack/providers/impls/meta_reference/scoring/scorer/inclusion_scorer.py rename to llama_stack/providers/impls/meta_reference/scoring/scorer/subset_of_scorer.py index 651fbf65e..e72b5ed0f 100644 --- a/llama_stack/providers/impls/meta_reference/scoring/scorer/inclusion_scorer.py +++ b/llama_stack/providers/impls/meta_reference/scoring/scorer/subset_of_scorer.py @@ -15,13 +15,13 @@ from llama_stack.providers.impls.meta_reference.scoring.scorer.common import ( ) -class InclusionScorer(BaseScorer): +class SubsetOfScorer(BaseScorer): """ A scorer that assigns a score of 1.0 if the expected string is included in the generated string, and 0.0 otherwise. """ scoring_function_def = ScoringFunctionDef( - identifier="inclusion", + identifier="subset_of", description="Returns 1.0 if the expected is included in generated, 0.0 otherwise.", parameters=[], return_type=NumberType(), diff --git a/llama_stack/providers/impls/meta_reference/scoring/scoring.py b/llama_stack/providers/impls/meta_reference/scoring/scoring.py index eb9cc892f..05ace33b4 100644 --- a/llama_stack/providers/impls/meta_reference/scoring/scoring.py +++ b/llama_stack/providers/impls/meta_reference/scoring/scoring.py @@ -17,15 +17,15 @@ from llama_stack.providers.impls.meta_reference.scoring.scorer.equality_scorer i EqualityScorer, ) -from llama_stack.providers.impls.meta_reference.scoring.scorer.inclusion_scorer import ( - InclusionScorer, +from llama_stack.providers.impls.meta_reference.scoring.scorer.subset_of_scorer import ( + SubsetOfScorer, ) from .config import MetaReferenceScoringConfig SUPPORTED_SCORERS = [ EqualityScorer, - InclusionScorer, + SubsetOfScorer, ] SCORER_REGISTRY = {x.scoring_function_def.identifier: x for x in SUPPORTED_SCORERS} diff --git a/llama_stack/providers/tests/eval/test_eval.py b/llama_stack/providers/tests/eval/test_eval.py index e4f47f8c3..4632cdd96 100644 --- a/llama_stack/providers/tests/eval/test_eval.py +++ b/llama_stack/providers/tests/eval/test_eval.py @@ -65,7 +65,7 @@ async def test_eval(eval_settings): model="Llama3.1-8B-Instruct", sampling_params=SamplingParams(), ), - scoring_functions=["inclusion"], + scoring_functions=["subset_of"], ) assert response.job_id == "0" job_status = await eval_impl.job_status(response.job_id) @@ -76,4 +76,4 @@ async def test_eval(eval_settings): assert eval_response is not None assert len(eval_response.generations) == 5 - assert "inclusion" in eval_response.scores + assert "subset_of" in eval_response.scores