forked from phoenix-oss/llama-stack-mirror
		
	change schema -> dataset_schema (#442)
# What does this PR do? - `schema` should not a field w/ pydantic warnings - change `schema` to `dataset_schema` <img width="855" alt="image" src="https://github.com/user-attachments/assets/47cb6bb9-4be0-46a5-8701-24d24e2eaabd"> ## Test Plan ``` pytest -v -s -m meta_reference_eval_together_inference_huggingface_datasetio eval/test_eval.py ``` ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [ ] Ran pre-commit to handle lint / formatting issues. - [ ] Read the [contributor guideline](https://github.com/meta-llama/llama-stack/blob/main/CONTRIBUTING.md), Pull Request section? - [ ] Updated relevant documentation. - [ ] Wrote necessary unit or integration tests.
This commit is contained in:
		
							parent
							
								
									c29fa56dde
								
							
						
					
					
						commit
						d5b1202c83
					
				
					 7 changed files with 15 additions and 15 deletions
				
			
		|  | @ -60,9 +60,9 @@ class PandasDataframeDataset(BaseDataset): | |||
| 
 | ||||
|     def _validate_dataset_schema(self, df) -> pandas.DataFrame: | ||||
|         # note that we will drop any columns in dataset that are not in the schema | ||||
|         df = df[self.dataset_def.schema.keys()] | ||||
|         df = df[self.dataset_def.dataset_schema.keys()] | ||||
|         # check all columns in dataset schema are present | ||||
|         assert len(df.columns) == len(self.dataset_def.schema) | ||||
|         assert len(df.columns) == len(self.dataset_def.dataset_schema) | ||||
|         # TODO: type checking against column types in dataset schema | ||||
|         return df | ||||
| 
 | ||||
|  |  | |||
|  | @ -58,7 +58,7 @@ class MetaReferenceEvalImpl(Eval, EvalTasksProtocolPrivate): | |||
| 
 | ||||
|     async def validate_eval_input_dataset_schema(self, dataset_id: str) -> None: | ||||
|         dataset_def = await self.datasets_api.get_dataset(dataset_id=dataset_id) | ||||
|         if not dataset_def.schema or len(dataset_def.schema) == 0: | ||||
|         if not dataset_def.dataset_schema or len(dataset_def.dataset_schema) == 0: | ||||
|             raise ValueError(f"Dataset {dataset_id} does not have a schema defined.") | ||||
| 
 | ||||
|         expected_schemas = [ | ||||
|  | @ -74,7 +74,7 @@ class MetaReferenceEvalImpl(Eval, EvalTasksProtocolPrivate): | |||
|             }, | ||||
|         ] | ||||
| 
 | ||||
|         if dataset_def.schema not in expected_schemas: | ||||
|         if dataset_def.dataset_schema not in expected_schemas: | ||||
|             raise ValueError( | ||||
|                 f"Dataset {dataset_id} does not have a correct input schema in {expected_schemas}" | ||||
|             ) | ||||
|  |  | |||
|  | @ -60,17 +60,17 @@ class BasicScoringImpl(Scoring, ScoringFunctionsProtocolPrivate): | |||
| 
 | ||||
|     async def validate_scoring_input_dataset_schema(self, dataset_id: str) -> None: | ||||
|         dataset_def = await self.datasets_api.get_dataset(dataset_id=dataset_id) | ||||
|         if not dataset_def.schema or len(dataset_def.schema) == 0: | ||||
|         if not dataset_def.dataset_schema or len(dataset_def.dataset_schema) == 0: | ||||
|             raise ValueError( | ||||
|                 f"Dataset {dataset_id} does not have a schema defined. Please define a schema for the dataset." | ||||
|             ) | ||||
| 
 | ||||
|         for required_column in ["generated_answer", "expected_answer", "input_query"]: | ||||
|             if required_column not in dataset_def.schema: | ||||
|             if required_column not in dataset_def.dataset_schema: | ||||
|                 raise ValueError( | ||||
|                     f"Dataset {dataset_id} does not have a '{required_column}' column." | ||||
|                 ) | ||||
|             if dataset_def.schema[required_column].type != "string": | ||||
|             if dataset_def.dataset_schema[required_column].type != "string": | ||||
|                 raise ValueError( | ||||
|                     f"Dataset {dataset_id} does not have a '{required_column}' column of type 'string'." | ||||
|                 ) | ||||
|  |  | |||
|  | @ -64,17 +64,17 @@ class BraintrustScoringImpl(Scoring, ScoringFunctionsProtocolPrivate): | |||
| 
 | ||||
|     async def validate_scoring_input_dataset_schema(self, dataset_id: str) -> None: | ||||
|         dataset_def = await self.datasets_api.get_dataset(dataset_id=dataset_id) | ||||
|         if not dataset_def.schema or len(dataset_def.schema) == 0: | ||||
|         if not dataset_def.dataset_schema or len(dataset_def.dataset_schema) == 0: | ||||
|             raise ValueError( | ||||
|                 f"Dataset {dataset_id} does not have a schema defined. Please define a schema for the dataset." | ||||
|             ) | ||||
| 
 | ||||
|         for required_column in ["generated_answer", "expected_answer", "input_query"]: | ||||
|             if required_column not in dataset_def.schema: | ||||
|             if required_column not in dataset_def.dataset_schema: | ||||
|                 raise ValueError( | ||||
|                     f"Dataset {dataset_id} does not have a '{required_column}' column." | ||||
|                 ) | ||||
|             if dataset_def.schema[required_column].type != "string": | ||||
|             if dataset_def.dataset_schema[required_column].type != "string": | ||||
|                 raise ValueError( | ||||
|                     f"Dataset {dataset_id} does not have a '{required_column}' column of type 'string'." | ||||
|                 ) | ||||
|  |  | |||
|  | @ -67,17 +67,17 @@ class LlmAsJudgeScoringImpl(Scoring, ScoringFunctionsProtocolPrivate): | |||
| 
 | ||||
|     async def validate_scoring_input_dataset_schema(self, dataset_id: str) -> None: | ||||
|         dataset_def = await self.datasets_api.get_dataset(dataset_id=dataset_id) | ||||
|         if not dataset_def.schema or len(dataset_def.schema) == 0: | ||||
|         if not dataset_def.dataset_schema or len(dataset_def.dataset_schema) == 0: | ||||
|             raise ValueError( | ||||
|                 f"Dataset {dataset_id} does not have a schema defined. Please define a schema for the dataset." | ||||
|             ) | ||||
| 
 | ||||
|         for required_column in ["generated_answer", "expected_answer", "input_query"]: | ||||
|             if required_column not in dataset_def.schema: | ||||
|             if required_column not in dataset_def.dataset_schema: | ||||
|                 raise ValueError( | ||||
|                     f"Dataset {dataset_id} does not have a '{required_column}' column." | ||||
|                 ) | ||||
|             if dataset_def.schema[required_column].type != "string": | ||||
|             if dataset_def.dataset_schema[required_column].type != "string": | ||||
|                 raise ValueError( | ||||
|                     f"Dataset {dataset_id} does not have a '{required_column}' column of type 'string'." | ||||
|                 ) | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue