diff --git a/llama_stack/apis/scoring/scoring.py b/llama_stack/apis/scoring/scoring.py index ec50ecab1..de3881e89 100644 --- a/llama_stack/apis/scoring/scoring.py +++ b/llama_stack/apis/scoring/scoring.py @@ -14,6 +14,7 @@ from llama_stack.apis.scoring_functions import * # noqa: F403 ScoringResult = Dict[str, Any] +SingleScoringResult = Dict[str, Any] @json_schema_type diff --git a/llama_stack/providers/impls/meta_reference/scoring/scorer/__init__.py b/llama_stack/providers/impls/meta_reference/scoring/scorer/__init__.py new file mode 100644 index 000000000..756f351d8 --- /dev/null +++ b/llama_stack/providers/impls/meta_reference/scoring/scorer/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. diff --git a/llama_stack/providers/impls/meta_reference/scoring/scorer/base_scorer.py b/llama_stack/providers/impls/meta_reference/scoring/scorer/base_scorer.py new file mode 100644 index 000000000..5f35f2ddd --- /dev/null +++ b/llama_stack/providers/impls/meta_reference/scoring/scorer/base_scorer.py @@ -0,0 +1,35 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. +from abc import ABC, abstractmethod +from typing import Any, Dict, List +from llama_stack.apis.scoring_functions import * # noqa: F401, F403 +from llama_stack.apis.scoring import * # noqa: F401, F403 + + +class BaseScorer(ABC): + """ + Base interface class for all meta-reference scorers. + Each scorer needs to implement the following methods: + - score_row(self, row) + - aggregate(self, scorer_results) + """ + + def __init__(self, *args, **kwargs) -> None: + super().__init__(*args, **kwargs) + + def __str__(self) -> str: + return self.__class__.__name__ + + @abstractmethod + def score_row(self, input_row: Dict[str, Any]) -> ScoringResult: + raise NotImplementedError() + + @abstractmethod + def aggregate(self, scoring_results: List[ScoringResult]) -> ScoringResult: + raise NotImplementedError() + + def score(self, input_rows: List[Dict[str, Any]]) -> List[ScoringResult]: + return [self.score_row(input_row) for input_row in input_rows] diff --git a/llama_stack/providers/impls/meta_reference/scoring/scorer/equality_scorer.py b/llama_stack/providers/impls/meta_reference/scoring/scorer/equality_scorer.py new file mode 100644 index 000000000..82ece9ebf --- /dev/null +++ b/llama_stack/providers/impls/meta_reference/scoring/scorer/equality_scorer.py @@ -0,0 +1,24 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack.providers.impls.meta_reference.scoring.scorer.base_scorer import ( + BaseScorer, +) + + +class EqualityScorer(BaseScorer): + """ + A scorer that assigns a score of 1.0 if the input string matches the target string, and 0.0 otherwise. + """ + + def __init__(self, target: str) -> None: + """ + Initialize the EqualityScorer with a target string. + + Args: + target (str): The target string to match against. + """ + self.target = target diff --git a/llama_stack/providers/impls/meta_reference/scoring/scoring.py b/llama_stack/providers/impls/meta_reference/scoring/scoring.py index 71283d97c..895d74c53 100644 --- a/llama_stack/providers/impls/meta_reference/scoring/scoring.py +++ b/llama_stack/providers/impls/meta_reference/scoring/scoring.py @@ -41,7 +41,9 @@ class MetaReferenceScoringImpl(Scoring, ScoringFunctionsProtocolPrivate): ] async def register_scoring_function(self, function_def: ScoringFunctionDef) -> None: - pass + raise NotImplementedError( + "Dynamically registering scoring functions is not supported" + ) async def score_batch( self, dataset_id: str, scoring_functions: List[str] @@ -51,4 +53,7 @@ class MetaReferenceScoringImpl(Scoring, ScoringFunctionsProtocolPrivate): async def score( self, input_rows: List[Dict[str, Any]], scoring_functions: List[str] ) -> ScoreResponse: - print("!!!!score") + print( + f"scoring input_rows {input_rows} on scoring_functions {scoring_functions}" + ) + return ScoreResponse()