This commit is contained in:
Xi Yan 2024-12-19 16:23:38 -08:00
parent c4af8f8aba
commit b94ab8d013

View file

@ -23,7 +23,24 @@ class ColumnName(Enum):
context = "context"
def get_expected_schema_for_scoring():
class DataSchemaValidatorMixin:
def validate_dataset_schema_for_scoring(self, dataset_schema: Dict[str, Any]):
self.validate_dataset_schema(
dataset_schema, self.get_expected_schema_for_scoring()
)
def validate_dataset_schema_for_eval(self, dataset_schema: Dict[str, Any]):
self.validate_dataset_schema(
dataset_schema, self.get_expected_schema_for_eval()
)
def validate_row_schema_for_scoring(self, row_schema: Dict[str, Any]):
self.validate_row_schema(row_schema, self.get_expected_schema_for_scoring())
def validate_row_schema_for_eval(self, row_schema: Dict[str, Any]):
self.validate_row_schema(row_schema, self.get_expected_schema_for_eval())
def get_expected_schema_for_scoring(self):
return [
{
ColumnName.input_query.value: StringType(),
@ -38,8 +55,7 @@ def get_expected_schema_for_scoring():
},
]
def get_expected_schema_for_eval():
def get_expected_schema_for_eval(self):
return [
{
ColumnName.input_query.value: StringType(),
@ -53,18 +69,20 @@ def get_expected_schema_for_eval():
},
]
def validate_dataset_schema(
dataset_schema: Dict[str, Any], expected_schemas: List[Dict[str, Any]]
self,
dataset_schema: Dict[str, Any],
expected_schemas: List[Dict[str, Any]],
):
if dataset_schema not in expected_schemas:
raise ValueError(
f"Dataset does not have a correct input schema in {expected_schemas}"
)
def validate_row_schema(
input_row: Dict[str, Any], expected_schemas: List[Dict[str, Any]]
self,
input_row: Dict[str, Any],
expected_schemas: List[Dict[str, Any]],
):
for schema in expected_schemas:
if all(key in input_row for key in schema):
@ -73,17 +91,3 @@ def validate_row_schema(
raise ValueError(
f"Input row {input_row} does not match any of the expected schemas in {expected_schemas}"
)
class DataSchemaValidatorMixin:
def validate_dataset_schema_for_scoring(self, dataset_schema: Dict[str, Any]):
validate_dataset_schema(dataset_schema, get_expected_schema_for_scoring())
def validate_dataset_schema_for_eval(self, dataset_schema: Dict[str, Any]):
validate_dataset_schema(dataset_schema, get_expected_schema_for_eval())
def validate_row_schema_for_scoring(self, row_schema: Dict[str, Any]):
validate_row_schema(row_schema, get_expected_schema_for_scoring())
def validate_row_schema_for_eval(self, row_schema: Dict[str, Any]):
validate_row_schema(row_schema, get_expected_schema_for_eval())