feat(api): (1.2/n) datasets.iterrorws pagination api updates (#1656)

# What does this PR do? - as title - uses "cursor" pagination scheme for iterrows [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan <img width="1226" alt="image" src="https://github.com/user-attachments/assets/3220eaac-7117-4d0a-b344-2bbb77a22065" /> [//]: # (## Documentation)
2025-03-15 13:58:47 -07:00 · 2025-03-15 13:58:47 -07:00 · 39f4dfbf50
commit 39f4dfbf50
parent c7d741d89e
3 changed files with 50 additions and 75 deletions
--- a/llama_stack/apis/datasetio/datasetio.py
+++ b/llama_stack/apis/datasetio/datasetio.py
@ -13,19 +13,16 @@ from llama_stack.schema_utils import json_schema_type, webmethod


@json_schema_type
-class PaginatedRowsResult(BaseModel):
+class IterrowsResponse(BaseModel):
    """
    A paginated list of rows from a dataset.

-    :param rows: The rows in the current page.
-    :param total_count: The total number of rows in the dataset.
-    :param next_page_token: The token to get the next page of rows.
+    :param data: The rows in the current page.
+    :param next_index: Index into dataset for the first row in the next page. None if there are no more rows.
    """

-    # the rows obey the DatasetSchema for the given dataset
-    rows: List[Dict[str, Any]]
-    total_count: int
-    next_page_token: Optional[str] = None
+    data: List[Dict[str, Any]]
+    next_index: Optional[int] = None


 class DatasetStore(Protocol):
@ -41,18 +38,18 @@ class DatasetIO(Protocol):
    async def iterrows(
        self,
        dataset_id: str,
-        rows_in_page: int,
-        page_token: Optional[str] = None,
-        filter_condition: Optional[str] = None,
-    ) -> PaginatedRowsResult:
-        """Get a paginated list of rows from a dataset.
+        start_index: Optional[int] = None,
+        limit: Optional[int] = None,
+    ) -> IterrowsResponse:
+        """Get a paginated list of rows from a dataset. Uses cursor-based pagination.

        :param dataset_id: The ID of the dataset to get the rows from.
-        :param rows_in_page: The number of rows to get per page.
-        :param page_token: The token to get the next page of rows.
-        :param filter_condition: (Optional) A condition to filter the rows by.
+        :param start_index: Index into dataset for the first row to get. Get all rows if None.
+        :param limit: The number of rows to get per page.
        """
        ...

-    @webmethod(route="/datasets/{dataset_id}/rows", method="POST")
-    async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None: ...
+    @webmethod(route="/datasets/{dataset_id}/append-rows", method="POST")
+    async def append_rows(
+        self, dataset_id: str, rows: List[Dict[str, Any]]
+    ) -> None: ...