next_index -> next_start_index

This commit is contained in:
Xi Yan 2025-03-17 14:14:05 -07:00
parent d9264a0925
commit 6a8bd19ba2
5 changed files with 22 additions and 12 deletions

View file

@ -8160,7 +8160,7 @@
}, },
"description": "The rows in the current page." "description": "The rows in the current page."
}, },
"next_index": { "next_start_index": {
"type": "integer", "type": "integer",
"description": "Index into dataset for the first row in the next page. None if there are no more rows." "description": "Index into dataset for the first row in the next page. None if there are no more rows."
} }

View file

@ -5557,7 +5557,7 @@ components:
- type: array - type: array
- type: object - type: object
description: The rows in the current page. description: The rows in the current page.
next_index: next_start_index:
type: integer type: integer
description: >- description: >-
Index into dataset for the first row in the next page. None if there are Index into dataset for the first row in the next page. None if there are

View file

@ -18,11 +18,11 @@ class IterrowsResponse(BaseModel):
A paginated list of rows from a dataset. A paginated list of rows from a dataset.
:param data: The rows in the current page. :param data: The rows in the current page.
:param next_index: Index into dataset for the first row in the next page. None if there are no more rows. :param next_start_index: Index into dataset for the first row in the next page. None if there are no more rows.
""" """
data: List[Dict[str, Any]] data: List[Dict[str, Any]]
next_index: Optional[int] = None next_start_index: Optional[int] = None
class DatasetStore(Protocol): class DatasetStore(Protocol):
@ -46,9 +46,11 @@ class DatasetIO(Protocol):
:param dataset_id: The ID of the dataset to get the rows from. :param dataset_id: The ID of the dataset to get the rows from.
:param start_index: Index into dataset for the first row to get. Get all rows if None. :param start_index: Index into dataset for the first row to get. Get all rows if None.
:param limit: The number of rows to get per page. :param limit: The number of rows to get.
""" """
... ...
@webmethod(route="/datasetio/append-rows/{dataset_id:path}", method="POST") @webmethod(route="/datasetio/append-rows/{dataset_id:path}", method="POST")
async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None: ... async def append_rows(
self, dataset_id: str, rows: List[Dict[str, Any]]
) -> None: ...

View file

@ -44,7 +44,9 @@ class PandasDataframeDataset:
elif self.dataset_def.source.type == "rows": elif self.dataset_def.source.type == "rows":
self.df = pandas.DataFrame(self.dataset_def.source.rows) self.df = pandas.DataFrame(self.dataset_def.source.rows)
else: else:
raise ValueError(f"Unsupported dataset source type: {self.dataset_def.source.type}") raise ValueError(
f"Unsupported dataset source type: {self.dataset_def.source.type}"
)
if self.df is None: if self.df is None:
raise ValueError(f"Failed to load dataset from {self.dataset_def.url}") raise ValueError(f"Failed to load dataset from {self.dataset_def.url}")
@ -108,7 +110,7 @@ class LocalFSDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate):
return IterrowsResponse( return IterrowsResponse(
data=rows, data=rows,
next_index=end if end < len(dataset_impl) else None, next_start_index=end if end < len(dataset_impl) else None,
) )
async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None: async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None:
@ -117,4 +119,6 @@ class LocalFSDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate):
dataset_impl.load() dataset_impl.load()
new_rows_df = pandas.DataFrame(rows) new_rows_df = pandas.DataFrame(rows)
dataset_impl.df = pandas.concat([dataset_impl.df, new_rows_df], ignore_index=True) dataset_impl.df = pandas.concat(
[dataset_impl.df, new_rows_df], ignore_index=True
)

View file

@ -86,7 +86,7 @@ class HuggingfaceDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate):
return IterrowsResponse( return IterrowsResponse(
data=rows, data=rows,
next_index=end if end < len(loaded_dataset) else None, next_start_index=end if end < len(loaded_dataset) else None,
) )
async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None: async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None:
@ -98,9 +98,13 @@ class HuggingfaceDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate):
new_dataset = hf_datasets.Dataset.from_list(rows) new_dataset = hf_datasets.Dataset.from_list(rows)
# Concatenate the new rows with existing dataset # Concatenate the new rows with existing dataset
updated_dataset = hf_datasets.concatenate_datasets([loaded_dataset, new_dataset]) updated_dataset = hf_datasets.concatenate_datasets(
[loaded_dataset, new_dataset]
)
if dataset_def.metadata.get("path", None): if dataset_def.metadata.get("path", None):
updated_dataset.push_to_hub(dataset_def.metadata["path"]) updated_dataset.push_to_hub(dataset_def.metadata["path"])
else: else:
raise NotImplementedError("Uploading to URL-based datasets is not supported yet") raise NotImplementedError(
"Uploading to URL-based datasets is not supported yet"
)