feat(api): (1.2/n) datasets.iterrorws pagination api updates (#1656)

# What does this PR do? - as title - uses "cursor" pagination scheme for iterrows [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan <img width="1226" alt="image" src="https://github.com/user-attachments/assets/3220eaac-7117-4d0a-b344-2bbb77a22065" /> [//]: # (## Documentation)
2025-03-15 13:58:47 -07:00 · 2025-03-15 13:58:47 -07:00 · 39f4dfbf50
commit 39f4dfbf50
parent c7d741d89e
3 changed files with 50 additions and 75 deletions
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@ -40,7 +40,7 @@
        }
    ],
    "paths": {
-        "/v1/datasets/{dataset_id}/rows": {
+        "/v1/datasets/{dataset_id}/append-rows": {
            "post": {
                "responses": {
                    "200": {
@ -2185,7 +2185,7 @@
                        "content": {
                            "application/json": {
                                "schema": {
-                                    "$ref": "#/components/schemas/PaginatedRowsResult"
+                                    "$ref": "#/components/schemas/IterrowsResponse"
                                }
                            }
                        }
@ -2206,7 +2206,7 @@
                "tags": [
                    "DatasetIO"
                ],
-                "description": "Get a paginated list of rows from a dataset.",
+                "description": "Get a paginated list of rows from a dataset. Uses cursor-based pagination.",
                "parameters": [
                    {
                        "name": "dataset_id",
@ -2218,30 +2218,21 @@
                        }
                    },
                    {
-                        "name": "rows_in_page",
+                        "name": "start_index",
                        "in": "query",
-                        "description": "The number of rows to get per page.",
+                        "description": "Index into dataset for the first row to get. Get all rows if None.",
-                        "required": true,
+                        "required": false,
                        "schema": {
                            "type": "integer"
                        }
                    },
                    {
-                        "name": "page_token",
+                        "name": "limit",
                        "in": "query",
-                        "description": "The token to get the next page of rows.",
+                        "description": "The number of rows to get per page.",
                        "required": false,
                        "schema": {
-                            "type": "string"
+                            "type": "integer"
                        }
                    },
                    {
                        "name": "filter_condition",
                        "in": "query",
                        "description": "(Optional) A condition to filter the rows by.",
                        "required": false,
                        "schema": {
                            "type": "string"
                        }
                    }
                ]
@ -8137,10 +8128,10 @@
                ],
                "title": "ToolInvocationResult"
            },
-            "PaginatedRowsResult": {
+            "IterrowsResponse": {
                "type": "object",
                "properties": {
-                    "rows": {
+                    "data": {
                        "type": "array",
                        "items": {
                            "type": "object",
@ -8169,21 +8160,16 @@
                        },
                        "description": "The rows in the current page."
                    },
-                    "total_count": {
+                    "next_index": {
                        "type": "integer",
-                        "description": "The total number of rows in the dataset."
+                        "description": "Index into dataset for the first row in the next page. None if there are no more rows."
                    },
                    "next_page_token": {
                        "type": "string",
                        "description": "The token to get the next page of rows."
                    }
                },
                "additionalProperties": false,
                "required": [
-                    "rows",
+                    "data"
                    "total_count"
                ],
-                "title": "PaginatedRowsResult",
+                "title": "IterrowsResponse",
                "description": "A paginated list of rows from a dataset."
            },
            "ListAgentSessionsResponse": {
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -10,7 +10,7 @@ info:
 servers:
  - url: http://any-hosted-llama-stack.com
 paths:
-  /v1/datasets/{dataset_id}/rows:
+  /v1/datasets/{dataset_id}/append-rows:
    post:
      responses:
        '200':
@ -1465,7 +1465,7 @@ paths:
          content:
            application/json:
              schema:
-                $ref: '#/components/schemas/PaginatedRowsResult'
+                $ref: '#/components/schemas/IterrowsResponse'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
@ -1479,7 +1479,7 @@ paths:
      tags:
        - DatasetIO
      description: >-
-        Get a paginated list of rows from a dataset.
+        Get a paginated list of rows from a dataset. Uses cursor-based pagination.
      parameters:
        - name: dataset_id
          in: path
@ -1488,25 +1488,19 @@ paths:
          required: true
          schema:
            type: string
-        - name: rows_in_page
+        - name: start_index
          in: query
          description: The number of rows to get per page.
          required: true
          schema:
            type: integer
        - name: page_token
          in: query
          description: The token to get the next page of rows.
          required: false
          schema:
            type: string
        - name: filter_condition
          in: query
          description: >-
-            (Optional) A condition to filter the rows by.
+            Index into dataset for the first row to get. Get all rows if None.
          required: false
          schema:
-            type: string
+            type: integer
        - name: limit
          in: query
          description: The number of rows to get per page.
          required: false
          schema:
            type: integer
  /v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}:
    get:
      responses:
@ -5547,10 +5541,10 @@ components:
      required:
        - content
      title: ToolInvocationResult
-    PaginatedRowsResult:
+    IterrowsResponse:
      type: object
      properties:
-        rows:
+        data:
          type: array
          items:
            type: object
@ -5563,17 +5557,15 @@ components:
                - type: array
                - type: object
          description: The rows in the current page.
-        total_count:
+        next_index:
          type: integer
-          description: The total number of rows in the dataset.
+          description: >-
-        next_page_token:
+            Index into dataset for the first row in the next page. None if there are
-          type: string
+            no more rows.
          description: The token to get the next page of rows.
      additionalProperties: false
      required:
-        - rows
+        - data
-        - total_count
+      title: IterrowsResponse
      title: PaginatedRowsResult
      description: A paginated list of rows from a dataset.
    ListAgentSessionsResponse:
      type: object
--- a/llama_stack/apis/datasetio/datasetio.py
+++ b/llama_stack/apis/datasetio/datasetio.py
@ -13,19 +13,16 @@ from llama_stack.schema_utils import json_schema_type, webmethod
@json_schema_type
-class PaginatedRowsResult(BaseModel):
+class IterrowsResponse(BaseModel):
    """
    A paginated list of rows from a dataset.
-    :param rows: The rows in the current page.
+    :param data: The rows in the current page.
-    :param total_count: The total number of rows in the dataset.
+    :param next_index: Index into dataset for the first row in the next page. None if there are no more rows.
    :param next_page_token: The token to get the next page of rows.
    """
-    # the rows obey the DatasetSchema for the given dataset
+    data: List[Dict[str, Any]]
-    rows: List[Dict[str, Any]]
+    next_index: Optional[int] = None
    total_count: int
    next_page_token: Optional[str] = None
 class DatasetStore(Protocol):
@ -41,18 +38,18 @@ class DatasetIO(Protocol):
    async def iterrows(
        self,
        dataset_id: str,
-        rows_in_page: int,
+        start_index: Optional[int] = None,
-        page_token: Optional[str] = None,
+        limit: Optional[int] = None,
-        filter_condition: Optional[str] = None,
+    ) -> IterrowsResponse:
-    ) -> PaginatedRowsResult:
+        """Get a paginated list of rows from a dataset. Uses cursor-based pagination.
        """Get a paginated list of rows from a dataset.
        :param dataset_id: The ID of the dataset to get the rows from.
-        :param rows_in_page: The number of rows to get per page.
+        :param start_index: Index into dataset for the first row to get. Get all rows if None.
-        :param page_token: The token to get the next page of rows.
+        :param limit: The number of rows to get per page.
        :param filter_condition: (Optional) A condition to filter the rows by.
        """
        ...
-    @webmethod(route="/datasets/{dataset_id}/rows", method="POST")
+    @webmethod(route="/datasets/{dataset_id}/append-rows", method="POST")
-    async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None: ...
+    async def append_rows(
        self, dataset_id: str, rows: List[Dict[str, Any]]
    ) -> None: ...