From 39f4dfbf508ec695495d582f3d25c6c230ac7aea Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Sat, 15 Mar 2025 13:58:47 -0700 Subject: [PATCH] feat(api): (1.2/n) datasets.iterrorws pagination api updates (#1656) # What does this PR do? - as title - uses "cursor" pagination scheme for iterrows [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan image [//]: # (## Documentation) --- docs/_static/llama-stack-spec.html | 44 ++++++++--------------- docs/_static/llama-stack-spec.yaml | 48 +++++++++++-------------- llama_stack/apis/datasetio/datasetio.py | 33 ++++++++--------- 3 files changed, 50 insertions(+), 75 deletions(-) diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index 743ff91c8..083678699 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -40,7 +40,7 @@ } ], "paths": { - "/v1/datasets/{dataset_id}/rows": { + "/v1/datasets/{dataset_id}/append-rows": { "post": { "responses": { "200": { @@ -2185,7 +2185,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/PaginatedRowsResult" + "$ref": "#/components/schemas/IterrowsResponse" } } } @@ -2206,7 +2206,7 @@ "tags": [ "DatasetIO" ], - "description": "Get a paginated list of rows from a dataset.", + "description": "Get a paginated list of rows from a dataset. Uses cursor-based pagination.", "parameters": [ { "name": "dataset_id", @@ -2218,30 +2218,21 @@ } }, { - "name": "rows_in_page", + "name": "start_index", "in": "query", - "description": "The number of rows to get per page.", - "required": true, + "description": "Index into dataset for the first row to get. Get all rows if None.", + "required": false, "schema": { "type": "integer" } }, { - "name": "page_token", + "name": "limit", "in": "query", - "description": "The token to get the next page of rows.", + "description": "The number of rows to get per page.", "required": false, "schema": { - "type": "string" - } - }, - { - "name": "filter_condition", - "in": "query", - "description": "(Optional) A condition to filter the rows by.", - "required": false, - "schema": { - "type": "string" + "type": "integer" } } ] @@ -8137,10 +8128,10 @@ ], "title": "ToolInvocationResult" }, - "PaginatedRowsResult": { + "IterrowsResponse": { "type": "object", "properties": { - "rows": { + "data": { "type": "array", "items": { "type": "object", @@ -8169,21 +8160,16 @@ }, "description": "The rows in the current page." }, - "total_count": { + "next_index": { "type": "integer", - "description": "The total number of rows in the dataset." - }, - "next_page_token": { - "type": "string", - "description": "The token to get the next page of rows." + "description": "Index into dataset for the first row in the next page. None if there are no more rows." } }, "additionalProperties": false, "required": [ - "rows", - "total_count" + "data" ], - "title": "PaginatedRowsResult", + "title": "IterrowsResponse", "description": "A paginated list of rows from a dataset." }, "ListAgentSessionsResponse": { diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index a44ee35ea..43880dac8 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -10,7 +10,7 @@ info: servers: - url: http://any-hosted-llama-stack.com paths: - /v1/datasets/{dataset_id}/rows: + /v1/datasets/{dataset_id}/append-rows: post: responses: '200': @@ -1465,7 +1465,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/PaginatedRowsResult' + $ref: '#/components/schemas/IterrowsResponse' '400': $ref: '#/components/responses/BadRequest400' '429': @@ -1479,7 +1479,7 @@ paths: tags: - DatasetIO description: >- - Get a paginated list of rows from a dataset. + Get a paginated list of rows from a dataset. Uses cursor-based pagination. parameters: - name: dataset_id in: path @@ -1488,25 +1488,19 @@ paths: required: true schema: type: string - - name: rows_in_page - in: query - description: The number of rows to get per page. - required: true - schema: - type: integer - - name: page_token - in: query - description: The token to get the next page of rows. - required: false - schema: - type: string - - name: filter_condition + - name: start_index in: query description: >- - (Optional) A condition to filter the rows by. + Index into dataset for the first row to get. Get all rows if None. required: false schema: - type: string + type: integer + - name: limit + in: query + description: The number of rows to get per page. + required: false + schema: + type: integer /v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}: get: responses: @@ -5547,10 +5541,10 @@ components: required: - content title: ToolInvocationResult - PaginatedRowsResult: + IterrowsResponse: type: object properties: - rows: + data: type: array items: type: object @@ -5563,17 +5557,15 @@ components: - type: array - type: object description: The rows in the current page. - total_count: + next_index: type: integer - description: The total number of rows in the dataset. - next_page_token: - type: string - description: The token to get the next page of rows. + description: >- + Index into dataset for the first row in the next page. None if there are + no more rows. additionalProperties: false required: - - rows - - total_count - title: PaginatedRowsResult + - data + title: IterrowsResponse description: A paginated list of rows from a dataset. ListAgentSessionsResponse: type: object diff --git a/llama_stack/apis/datasetio/datasetio.py b/llama_stack/apis/datasetio/datasetio.py index 9ce90da01..caa7c51df 100644 --- a/llama_stack/apis/datasetio/datasetio.py +++ b/llama_stack/apis/datasetio/datasetio.py @@ -13,19 +13,16 @@ from llama_stack.schema_utils import json_schema_type, webmethod @json_schema_type -class PaginatedRowsResult(BaseModel): +class IterrowsResponse(BaseModel): """ A paginated list of rows from a dataset. - :param rows: The rows in the current page. - :param total_count: The total number of rows in the dataset. - :param next_page_token: The token to get the next page of rows. + :param data: The rows in the current page. + :param next_index: Index into dataset for the first row in the next page. None if there are no more rows. """ - # the rows obey the DatasetSchema for the given dataset - rows: List[Dict[str, Any]] - total_count: int - next_page_token: Optional[str] = None + data: List[Dict[str, Any]] + next_index: Optional[int] = None class DatasetStore(Protocol): @@ -41,18 +38,18 @@ class DatasetIO(Protocol): async def iterrows( self, dataset_id: str, - rows_in_page: int, - page_token: Optional[str] = None, - filter_condition: Optional[str] = None, - ) -> PaginatedRowsResult: - """Get a paginated list of rows from a dataset. + start_index: Optional[int] = None, + limit: Optional[int] = None, + ) -> IterrowsResponse: + """Get a paginated list of rows from a dataset. Uses cursor-based pagination. :param dataset_id: The ID of the dataset to get the rows from. - :param rows_in_page: The number of rows to get per page. - :param page_token: The token to get the next page of rows. - :param filter_condition: (Optional) A condition to filter the rows by. + :param start_index: Index into dataset for the first row to get. Get all rows if None. + :param limit: The number of rows to get per page. """ ... - @webmethod(route="/datasets/{dataset_id}/rows", method="POST") - async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None: ... + @webmethod(route="/datasets/{dataset_id}/append-rows", method="POST") + async def append_rows( + self, dataset_id: str, rows: List[Dict[str, Any]] + ) -> None: ...