diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index e3c81ddb9..2cec07632 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -8160,7 +8160,7 @@ }, "description": "The rows in the current page." }, - "next_index": { + "next_start_index": { "type": "integer", "description": "Index into dataset for the first row in the next page. None if there are no more rows." } diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index a3d4dbcc9..843db26df 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -5557,7 +5557,7 @@ components: - type: array - type: object description: The rows in the current page. - next_index: + next_start_index: type: integer description: >- Index into dataset for the first row in the next page. None if there are diff --git a/llama_stack/apis/datasetio/datasetio.py b/llama_stack/apis/datasetio/datasetio.py index 6079e5b99..b1eaffa17 100644 --- a/llama_stack/apis/datasetio/datasetio.py +++ b/llama_stack/apis/datasetio/datasetio.py @@ -18,11 +18,11 @@ class IterrowsResponse(BaseModel): A paginated list of rows from a dataset. :param data: The rows in the current page. - :param next_index: Index into dataset for the first row in the next page. None if there are no more rows. + :param next_start_index: Index into dataset for the first row in the next page. None if there are no more rows. """ data: List[Dict[str, Any]] - next_index: Optional[int] = None + next_start_index: Optional[int] = None class DatasetStore(Protocol): @@ -46,9 +46,11 @@ class DatasetIO(Protocol): :param dataset_id: The ID of the dataset to get the rows from. :param start_index: Index into dataset for the first row to get. Get all rows if None. - :param limit: The number of rows to get per page. + :param limit: The number of rows to get. """ ... @webmethod(route="/datasetio/append-rows/{dataset_id:path}", method="POST") - async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None: ... + async def append_rows( + self, dataset_id: str, rows: List[Dict[str, Any]] + ) -> None: ... diff --git a/llama_stack/providers/inline/datasetio/localfs/datasetio.py b/llama_stack/providers/inline/datasetio/localfs/datasetio.py index 3b0d01edd..958c7d387 100644 --- a/llama_stack/providers/inline/datasetio/localfs/datasetio.py +++ b/llama_stack/providers/inline/datasetio/localfs/datasetio.py @@ -44,7 +44,9 @@ class PandasDataframeDataset: elif self.dataset_def.source.type == "rows": self.df = pandas.DataFrame(self.dataset_def.source.rows) else: - raise ValueError(f"Unsupported dataset source type: {self.dataset_def.source.type}") + raise ValueError( + f"Unsupported dataset source type: {self.dataset_def.source.type}" + ) if self.df is None: raise ValueError(f"Failed to load dataset from {self.dataset_def.url}") @@ -108,7 +110,7 @@ class LocalFSDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate): return IterrowsResponse( data=rows, - next_index=end if end < len(dataset_impl) else None, + next_start_index=end if end < len(dataset_impl) else None, ) async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None: @@ -117,4 +119,6 @@ class LocalFSDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate): dataset_impl.load() new_rows_df = pandas.DataFrame(rows) - dataset_impl.df = pandas.concat([dataset_impl.df, new_rows_df], ignore_index=True) + dataset_impl.df = pandas.concat( + [dataset_impl.df, new_rows_df], ignore_index=True + ) diff --git a/llama_stack/providers/remote/datasetio/huggingface/huggingface.py b/llama_stack/providers/remote/datasetio/huggingface/huggingface.py index 41ce747f7..db6edbce3 100644 --- a/llama_stack/providers/remote/datasetio/huggingface/huggingface.py +++ b/llama_stack/providers/remote/datasetio/huggingface/huggingface.py @@ -86,7 +86,7 @@ class HuggingfaceDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate): return IterrowsResponse( data=rows, - next_index=end if end < len(loaded_dataset) else None, + next_start_index=end if end < len(loaded_dataset) else None, ) async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None: @@ -98,9 +98,13 @@ class HuggingfaceDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate): new_dataset = hf_datasets.Dataset.from_list(rows) # Concatenate the new rows with existing dataset - updated_dataset = hf_datasets.concatenate_datasets([loaded_dataset, new_dataset]) + updated_dataset = hf_datasets.concatenate_datasets( + [loaded_dataset, new_dataset] + ) if dataset_def.metadata.get("path", None): updated_dataset.push_to_hub(dataset_def.metadata["path"]) else: - raise NotImplementedError("Uploading to URL-based datasets is not supported yet") + raise NotImplementedError( + "Uploading to URL-based datasets is not supported yet" + )