diff --git a/llama_stack/providers/inline/scoring/meta_reference/__init__.py b/llama_stack/providers/inline/scoring/basic/__init__.py similarity index 100% rename from llama_stack/providers/inline/scoring/meta_reference/__init__.py rename to llama_stack/providers/inline/scoring/basic/__init__.py diff --git a/llama_stack/providers/inline/scoring/meta_reference/config.py b/llama_stack/providers/inline/scoring/basic/config.py similarity index 100% rename from llama_stack/providers/inline/scoring/meta_reference/config.py rename to llama_stack/providers/inline/scoring/basic/config.py diff --git a/llama_stack/providers/inline/scoring/meta_reference/scoring.py b/llama_stack/providers/inline/scoring/basic/scoring.py similarity index 95% rename from llama_stack/providers/inline/scoring/meta_reference/scoring.py rename to llama_stack/providers/inline/scoring/basic/scoring.py index b78379062..ff1ee6c43 100644 --- a/llama_stack/providers/inline/scoring/meta_reference/scoring.py +++ b/llama_stack/providers/inline/scoring/basic/scoring.py @@ -70,18 +70,18 @@ class MetaReferenceScoringImpl(Scoring, ScoringFunctionsProtocolPrivate): raise NotImplementedError("Register scoring function not implemented yet") async def validate_scoring_input_dataset_schema(self, dataset_id: str) -> None: - dataset_def = await self.datasets_api.get_dataset(dataset_identifier=dataset_id) - if not dataset_def.dataset_schema or len(dataset_def.dataset_schema) == 0: + dataset_def = await self.datasets_api.get_dataset(dataset_id=dataset_id) + if not dataset_def.schema or len(dataset_def.schema) == 0: raise ValueError( f"Dataset {dataset_id} does not have a schema defined. Please define a schema for the dataset." ) for required_column in ["generated_answer", "expected_answer", "input_query"]: - if required_column not in dataset_def.dataset_schema: + if required_column not in dataset_def.schema: raise ValueError( f"Dataset {dataset_id} does not have a '{required_column}' column." ) - if dataset_def.dataset_schema[required_column].type != "string": + if dataset_def.schema[required_column].type != "string": raise ValueError( f"Dataset {dataset_id} does not have a '{required_column}' column of type 'string'." ) diff --git a/llama_stack/providers/inline/scoring/meta_reference/scoring_fn/__init__.py b/llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py similarity index 100% rename from llama_stack/providers/inline/scoring/meta_reference/scoring_fn/__init__.py rename to llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py diff --git a/llama_stack/providers/inline/scoring/meta_reference/scoring_fn/base_scoring_fn.py b/llama_stack/providers/inline/scoring/basic/scoring_fn/base_scoring_fn.py similarity index 100% rename from llama_stack/providers/inline/scoring/meta_reference/scoring_fn/base_scoring_fn.py rename to llama_stack/providers/inline/scoring/basic/scoring_fn/base_scoring_fn.py diff --git a/llama_stack/providers/inline/scoring/meta_reference/scoring_fn/equality_scoring_fn.py b/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py similarity index 100% rename from llama_stack/providers/inline/scoring/meta_reference/scoring_fn/equality_scoring_fn.py rename to llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py diff --git a/llama_stack/providers/inline/scoring/meta_reference/scoring_fn/fn_defs/__init__.py b/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py similarity index 100% rename from llama_stack/providers/inline/scoring/meta_reference/scoring_fn/fn_defs/__init__.py rename to llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py diff --git a/llama_stack/providers/inline/scoring/meta_reference/scoring_fn/fn_defs/equality.py b/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py similarity index 100% rename from llama_stack/providers/inline/scoring/meta_reference/scoring_fn/fn_defs/equality.py rename to llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py diff --git a/llama_stack/providers/inline/scoring/meta_reference/scoring_fn/fn_defs/llm_as_judge_base.py b/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/llm_as_judge_base.py similarity index 100% rename from llama_stack/providers/inline/scoring/meta_reference/scoring_fn/fn_defs/llm_as_judge_base.py rename to llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/llm_as_judge_base.py diff --git a/llama_stack/providers/inline/scoring/meta_reference/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py b/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py similarity index 100% rename from llama_stack/providers/inline/scoring/meta_reference/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py rename to llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py diff --git a/llama_stack/providers/inline/scoring/meta_reference/scoring_fn/fn_defs/subset_of.py b/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py similarity index 100% rename from llama_stack/providers/inline/scoring/meta_reference/scoring_fn/fn_defs/subset_of.py rename to llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py diff --git a/llama_stack/providers/inline/scoring/meta_reference/scoring_fn/llm_as_judge_scoring_fn.py b/llama_stack/providers/inline/scoring/basic/scoring_fn/llm_as_judge_scoring_fn.py similarity index 100% rename from llama_stack/providers/inline/scoring/meta_reference/scoring_fn/llm_as_judge_scoring_fn.py rename to llama_stack/providers/inline/scoring/basic/scoring_fn/llm_as_judge_scoring_fn.py diff --git a/llama_stack/providers/inline/scoring/meta_reference/scoring_fn/regex_parser_scoring_fn.py b/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py similarity index 100% rename from llama_stack/providers/inline/scoring/meta_reference/scoring_fn/regex_parser_scoring_fn.py rename to llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py diff --git a/llama_stack/providers/inline/scoring/meta_reference/scoring_fn/subset_of_scoring_fn.py b/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py similarity index 100% rename from llama_stack/providers/inline/scoring/meta_reference/scoring_fn/subset_of_scoring_fn.py rename to llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py diff --git a/llama_stack/providers/inline/scoring/braintrust/braintrust.py b/llama_stack/providers/inline/scoring/braintrust/braintrust.py index 9105a4978..0a21bc99c 100644 --- a/llama_stack/providers/inline/scoring/braintrust/braintrust.py +++ b/llama_stack/providers/inline/scoring/braintrust/braintrust.py @@ -63,18 +63,19 @@ class BraintrustScoringImpl(Scoring, ScoringFunctionsProtocolPrivate): ) async def validate_scoring_input_dataset_schema(self, dataset_id: str) -> None: - dataset_def = await self.datasets_api.get_dataset(dataset_identifier=dataset_id) - if not dataset_def.dataset_schema or len(dataset_def.dataset_schema) == 0: + dataset_def = await self.datasets_api.get_dataset(dataset_id=dataset_id) + print(dataset_def) + if not dataset_def.schema or len(dataset_def.schema) == 0: raise ValueError( f"Dataset {dataset_id} does not have a schema defined. Please define a schema for the dataset." ) for required_column in ["generated_answer", "expected_answer", "input_query"]: - if required_column not in dataset_def.dataset_schema: + if required_column not in dataset_def.schema: raise ValueError( f"Dataset {dataset_id} does not have a '{required_column}' column." ) - if dataset_def.dataset_schema[required_column].type != "string": + if dataset_def.schema[required_column].type != "string": raise ValueError( f"Dataset {dataset_id} does not have a '{required_column}' column of type 'string'." ) diff --git a/llama_stack/providers/registry/scoring.py b/llama_stack/providers/registry/scoring.py index 70f43ad73..1787cac77 100644 --- a/llama_stack/providers/registry/scoring.py +++ b/llama_stack/providers/registry/scoring.py @@ -15,8 +15,8 @@ def available_providers() -> List[ProviderSpec]: api=Api.scoring, provider_type="meta-reference", pip_packages=[], - module="llama_stack.providers.inline.scoring.meta_reference", - config_class="llama_stack.providers.inline.scoring.meta_reference.MetaReferenceScoringConfig", + module="llama_stack.providers.inline.scoring.basic", + config_class="llama_stack.providers.inline.scoring.basic.MetaReferenceScoringConfig", api_dependencies=[ Api.datasetio, Api.datasets,