feat(api): (1.2/n) datasets.iterrorws pagination api updates (#1656)

# What does this PR do?
- as title
- uses "cursor" pagination scheme for iterrows

[//]: # (If resolving an issue, uncomment and update the line below)
[//]: # (Closes #[issue-number])

## Test Plan
<img width="1226" alt="image"
src="https://github.com/user-attachments/assets/3220eaac-7117-4d0a-b344-2bbb77a22065"
/>


[//]: # (## Documentation)
This commit is contained in:
Xi Yan 2025-03-15 13:58:47 -07:00 committed by GitHub
parent c7d741d89e
commit 39f4dfbf50
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 50 additions and 75 deletions

View file

@ -40,7 +40,7 @@
} }
], ],
"paths": { "paths": {
"/v1/datasets/{dataset_id}/rows": { "/v1/datasets/{dataset_id}/append-rows": {
"post": { "post": {
"responses": { "responses": {
"200": { "200": {
@ -2185,7 +2185,7 @@
"content": { "content": {
"application/json": { "application/json": {
"schema": { "schema": {
"$ref": "#/components/schemas/PaginatedRowsResult" "$ref": "#/components/schemas/IterrowsResponse"
} }
} }
} }
@ -2206,7 +2206,7 @@
"tags": [ "tags": [
"DatasetIO" "DatasetIO"
], ],
"description": "Get a paginated list of rows from a dataset.", "description": "Get a paginated list of rows from a dataset. Uses cursor-based pagination.",
"parameters": [ "parameters": [
{ {
"name": "dataset_id", "name": "dataset_id",
@ -2218,30 +2218,21 @@
} }
}, },
{ {
"name": "rows_in_page", "name": "start_index",
"in": "query", "in": "query",
"description": "The number of rows to get per page.", "description": "Index into dataset for the first row to get. Get all rows if None.",
"required": true, "required": false,
"schema": { "schema": {
"type": "integer" "type": "integer"
} }
}, },
{ {
"name": "page_token", "name": "limit",
"in": "query", "in": "query",
"description": "The token to get the next page of rows.", "description": "The number of rows to get per page.",
"required": false, "required": false,
"schema": { "schema": {
"type": "string" "type": "integer"
}
},
{
"name": "filter_condition",
"in": "query",
"description": "(Optional) A condition to filter the rows by.",
"required": false,
"schema": {
"type": "string"
} }
} }
] ]
@ -8137,10 +8128,10 @@
], ],
"title": "ToolInvocationResult" "title": "ToolInvocationResult"
}, },
"PaginatedRowsResult": { "IterrowsResponse": {
"type": "object", "type": "object",
"properties": { "properties": {
"rows": { "data": {
"type": "array", "type": "array",
"items": { "items": {
"type": "object", "type": "object",
@ -8169,21 +8160,16 @@
}, },
"description": "The rows in the current page." "description": "The rows in the current page."
}, },
"total_count": { "next_index": {
"type": "integer", "type": "integer",
"description": "The total number of rows in the dataset." "description": "Index into dataset for the first row in the next page. None if there are no more rows."
},
"next_page_token": {
"type": "string",
"description": "The token to get the next page of rows."
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"rows", "data"
"total_count"
], ],
"title": "PaginatedRowsResult", "title": "IterrowsResponse",
"description": "A paginated list of rows from a dataset." "description": "A paginated list of rows from a dataset."
}, },
"ListAgentSessionsResponse": { "ListAgentSessionsResponse": {

View file

@ -10,7 +10,7 @@ info:
servers: servers:
- url: http://any-hosted-llama-stack.com - url: http://any-hosted-llama-stack.com
paths: paths:
/v1/datasets/{dataset_id}/rows: /v1/datasets/{dataset_id}/append-rows:
post: post:
responses: responses:
'200': '200':
@ -1465,7 +1465,7 @@ paths:
content: content:
application/json: application/json:
schema: schema:
$ref: '#/components/schemas/PaginatedRowsResult' $ref: '#/components/schemas/IterrowsResponse'
'400': '400':
$ref: '#/components/responses/BadRequest400' $ref: '#/components/responses/BadRequest400'
'429': '429':
@ -1479,7 +1479,7 @@ paths:
tags: tags:
- DatasetIO - DatasetIO
description: >- description: >-
Get a paginated list of rows from a dataset. Get a paginated list of rows from a dataset. Uses cursor-based pagination.
parameters: parameters:
- name: dataset_id - name: dataset_id
in: path in: path
@ -1488,25 +1488,19 @@ paths:
required: true required: true
schema: schema:
type: string type: string
- name: rows_in_page - name: start_index
in: query
description: The number of rows to get per page.
required: true
schema:
type: integer
- name: page_token
in: query
description: The token to get the next page of rows.
required: false
schema:
type: string
- name: filter_condition
in: query in: query
description: >- description: >-
(Optional) A condition to filter the rows by. Index into dataset for the first row to get. Get all rows if None.
required: false required: false
schema: schema:
type: string type: integer
- name: limit
in: query
description: The number of rows to get per page.
required: false
schema:
type: integer
/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}: /v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}:
get: get:
responses: responses:
@ -5547,10 +5541,10 @@ components:
required: required:
- content - content
title: ToolInvocationResult title: ToolInvocationResult
PaginatedRowsResult: IterrowsResponse:
type: object type: object
properties: properties:
rows: data:
type: array type: array
items: items:
type: object type: object
@ -5563,17 +5557,15 @@ components:
- type: array - type: array
- type: object - type: object
description: The rows in the current page. description: The rows in the current page.
total_count: next_index:
type: integer type: integer
description: The total number of rows in the dataset. description: >-
next_page_token: Index into dataset for the first row in the next page. None if there are
type: string no more rows.
description: The token to get the next page of rows.
additionalProperties: false additionalProperties: false
required: required:
- rows - data
- total_count title: IterrowsResponse
title: PaginatedRowsResult
description: A paginated list of rows from a dataset. description: A paginated list of rows from a dataset.
ListAgentSessionsResponse: ListAgentSessionsResponse:
type: object type: object

View file

@ -13,19 +13,16 @@ from llama_stack.schema_utils import json_schema_type, webmethod
@json_schema_type @json_schema_type
class PaginatedRowsResult(BaseModel): class IterrowsResponse(BaseModel):
""" """
A paginated list of rows from a dataset. A paginated list of rows from a dataset.
:param rows: The rows in the current page. :param data: The rows in the current page.
:param total_count: The total number of rows in the dataset. :param next_index: Index into dataset for the first row in the next page. None if there are no more rows.
:param next_page_token: The token to get the next page of rows.
""" """
# the rows obey the DatasetSchema for the given dataset data: List[Dict[str, Any]]
rows: List[Dict[str, Any]] next_index: Optional[int] = None
total_count: int
next_page_token: Optional[str] = None
class DatasetStore(Protocol): class DatasetStore(Protocol):
@ -41,18 +38,18 @@ class DatasetIO(Protocol):
async def iterrows( async def iterrows(
self, self,
dataset_id: str, dataset_id: str,
rows_in_page: int, start_index: Optional[int] = None,
page_token: Optional[str] = None, limit: Optional[int] = None,
filter_condition: Optional[str] = None, ) -> IterrowsResponse:
) -> PaginatedRowsResult: """Get a paginated list of rows from a dataset. Uses cursor-based pagination.
"""Get a paginated list of rows from a dataset.
:param dataset_id: The ID of the dataset to get the rows from. :param dataset_id: The ID of the dataset to get the rows from.
:param rows_in_page: The number of rows to get per page. :param start_index: Index into dataset for the first row to get. Get all rows if None.
:param page_token: The token to get the next page of rows. :param limit: The number of rows to get per page.
:param filter_condition: (Optional) A condition to filter the rows by.
""" """
... ...
@webmethod(route="/datasets/{dataset_id}/rows", method="POST") @webmethod(route="/datasets/{dataset_id}/append-rows", method="POST")
async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None: ... async def append_rows(
self, dataset_id: str, rows: List[Dict[str, Any]]
) -> None: ...