dataset validation

This commit is contained in:
Xi Yan 2024-10-23 12:08:39 -07:00
parent aefa84e70a
commit 7c280e18fb
4 changed files with 63 additions and 69 deletions

View file

@ -62,10 +62,15 @@ class PandasDataframeDataset(BaseDataset):
if self.df is None:
self.load()
print(self.dataset_def.dataset_schema)
# get columns names
# columns = self.df[self.dataset_def.dataset_schema.keys()]
print(self.df.columns)
assert self.df is not None, "Dataset loading failed. Please check logs."
self.df = self.df[self.dataset_def.dataset_schema.keys()]
# check all columns in dataset schema are present
assert len(self.df.columns) == len(self.dataset_def.dataset_schema)
# check all types match
print(self.df.dtypes)
def load(self) -> None:
if self.df is not None: