change schema -> dataset_schema (#442)

# What does this PR do? - `schema` should not a field w/ pydantic warnings - change `schema` to `dataset_schema` <img width="855" alt="image" src="https://github.com/user-attachments/assets/47cb6bb9-4be0-46a5-8701-24d24e2eaabd"> ## Test Plan ``` pytest -v -s -m meta_reference_eval_together_inference_huggingface_datasetio eval/test_eval.py ``` ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [ ] Ran pre-commit to handle lint / formatting issues. - [ ] Read the [contributor guideline](https://github.com/meta-llama/llama-stack/blob/main/CONTRIBUTING.md), Pull Request section? - [ ] Updated relevant documentation. - [ ] Wrote necessary unit or integration tests.
2025-06-28 02:53:30 +00:00 · 2024-11-13 10:58:12 -05:00 · 2024-11-13 10:58:12 -05:00 · d5b1202c83
commit d5b1202c83
parent c29fa56dde
7 changed files with 15 additions and 15 deletions
--- a/llama_stack/apis/datasets/datasets.py
+++ b/llama_stack/apis/datasets/datasets.py
@ -17,7 +17,7 @@ from llama_stack.apis.resource import Resource, ResourceType


 class CommonDatasetFields(BaseModel):
-    schema: Dict[str, ParamType]
+    dataset_schema: Dict[str, ParamType]
    url: URL
    metadata: Dict[str, Any] = Field(
        default_factory=dict,
--- a/llama_stack/distribution/routers/routing_tables.py
+++ b/llama_stack/distribution/routers/routing_tables.py
@ -332,7 +332,7 @@ class DatasetsRoutingTable(CommonRoutingTableImpl, Datasets):
            identifier=dataset_id,
            provider_resource_id=provider_dataset_id,
            provider_id=provider_id,
-            schema=schema,
+            dataset_schema=schema,
            url=url,
            metadata=metadata,
        )
--- a/llama_stack/providers/inline/datasetio/localfs/datasetio.py
+++ b/llama_stack/providers/inline/datasetio/localfs/datasetio.py
@ -60,9 +60,9 @@ class PandasDataframeDataset(BaseDataset):

    def _validate_dataset_schema(self, df) -> pandas.DataFrame:
        # note that we will drop any columns in dataset that are not in the schema
-        df = df[self.dataset_def.schema.keys()]
+        df = df[self.dataset_def.dataset_schema.keys()]
        # check all columns in dataset schema are present
-        assert len(df.columns) == len(self.dataset_def.schema)
+        assert len(df.columns) == len(self.dataset_def.dataset_schema)
        # TODO: type checking against column types in dataset schema
        return df

--- a/llama_stack/providers/inline/eval/meta_reference/eval.py
+++ b/llama_stack/providers/inline/eval/meta_reference/eval.py
@ -58,7 +58,7 @@ class MetaReferenceEvalImpl(Eval, EvalTasksProtocolPrivate):

    async def validate_eval_input_dataset_schema(self, dataset_id: str) -> None:
        dataset_def = await self.datasets_api.get_dataset(dataset_id=dataset_id)
-        if not dataset_def.schema or len(dataset_def.schema) == 0:
+        if not dataset_def.dataset_schema or len(dataset_def.dataset_schema) == 0:
            raise ValueError(f"Dataset {dataset_id} does not have a schema defined.")

        expected_schemas = [
@ -74,7 +74,7 @@ class MetaReferenceEvalImpl(Eval, EvalTasksProtocolPrivate):
            },
        ]

-        if dataset_def.schema not in expected_schemas:
+        if dataset_def.dataset_schema not in expected_schemas:
            raise ValueError(
                f"Dataset {dataset_id} does not have a correct input schema in {expected_schemas}"
            )
--- a/llama_stack/providers/inline/scoring/basic/scoring.py
+++ b/llama_stack/providers/inline/scoring/basic/scoring.py
@ -60,17 +60,17 @@ class BasicScoringImpl(Scoring, ScoringFunctionsProtocolPrivate):

    async def validate_scoring_input_dataset_schema(self, dataset_id: str) -> None:
        dataset_def = await self.datasets_api.get_dataset(dataset_id=dataset_id)
-        if not dataset_def.schema or len(dataset_def.schema) == 0:
+        if not dataset_def.dataset_schema or len(dataset_def.dataset_schema) == 0:
            raise ValueError(
                f"Dataset {dataset_id} does not have a schema defined. Please define a schema for the dataset."
            )

        for required_column in ["generated_answer", "expected_answer", "input_query"]:
-            if required_column not in dataset_def.schema:
+            if required_column not in dataset_def.dataset_schema:
                raise ValueError(
                    f"Dataset {dataset_id} does not have a '{required_column}' column."
                )
-            if dataset_def.schema[required_column].type != "string":
+            if dataset_def.dataset_schema[required_column].type != "string":
                raise ValueError(
                    f"Dataset {dataset_id} does not have a '{required_column}' column of type 'string'."
                )
--- a/llama_stack/providers/inline/scoring/braintrust/braintrust.py
+++ b/llama_stack/providers/inline/scoring/braintrust/braintrust.py
@ -64,17 +64,17 @@ class BraintrustScoringImpl(Scoring, ScoringFunctionsProtocolPrivate):

    async def validate_scoring_input_dataset_schema(self, dataset_id: str) -> None:
        dataset_def = await self.datasets_api.get_dataset(dataset_id=dataset_id)
-        if not dataset_def.schema or len(dataset_def.schema) == 0:
+        if not dataset_def.dataset_schema or len(dataset_def.dataset_schema) == 0:
            raise ValueError(
                f"Dataset {dataset_id} does not have a schema defined. Please define a schema for the dataset."
            )

        for required_column in ["generated_answer", "expected_answer", "input_query"]:
-            if required_column not in dataset_def.schema:
+            if required_column not in dataset_def.dataset_schema:
                raise ValueError(
                    f"Dataset {dataset_id} does not have a '{required_column}' column."
                )
-            if dataset_def.schema[required_column].type != "string":
+            if dataset_def.dataset_schema[required_column].type != "string":
                raise ValueError(
                    f"Dataset {dataset_id} does not have a '{required_column}' column of type 'string'."
                )
--- a/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py
+++ b/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py
@ -67,17 +67,17 @@ class LlmAsJudgeScoringImpl(Scoring, ScoringFunctionsProtocolPrivate):

    async def validate_scoring_input_dataset_schema(self, dataset_id: str) -> None:
        dataset_def = await self.datasets_api.get_dataset(dataset_id=dataset_id)
-        if not dataset_def.schema or len(dataset_def.schema) == 0:
+        if not dataset_def.dataset_schema or len(dataset_def.dataset_schema) == 0:
            raise ValueError(
                f"Dataset {dataset_id} does not have a schema defined. Please define a schema for the dataset."
            )

        for required_column in ["generated_answer", "expected_answer", "input_query"]:
-            if required_column not in dataset_def.schema:
+            if required_column not in dataset_def.dataset_schema:
                raise ValueError(
                    f"Dataset {dataset_id} does not have a '{required_column}' column."
                )
-            if dataset_def.schema[required_column].type != "string":
+            if dataset_def.dataset_schema[required_column].type != "string":
                raise ValueError(
                    f"Dataset {dataset_id} does not have a '{required_column}' column of type 'string'."
                )