llama-stack-mirror/llama_stack/providers/utils/datasetio/pagination.py
Sébastien Han 2ffa2b77ed
refactor: extract pagination logic into shared helper function (#1770)
# What does this PR do?

Move pagination logic from LocalFS and HuggingFace implementations into
a common helper function to ensure consistent pagination behavior across
providers. This reduces code duplication and centralizes pagination
logic in one place.


## Test Plan

Run this script:

```
from llama_stack_client import LlamaStackClient

# Initialize the client
client = LlamaStackClient(base_url="http://localhost:8321")

# Register a dataset
response = client.datasets.register(
    purpose="eval/messages-answer",  # or "eval/question-answer" or "post-training/messages"
    source={"type": "uri", "uri": "huggingface://datasets/llamastack/simpleqa?split=train"},
    dataset_id="my_dataset",  # optional, will be auto-generated if not provided
    metadata={"description": "My evaluation dataset"},  # optional
)

# Verify the dataset was registered by listing all datasets
datasets = client.datasets.list()
print(f"Registered datasets: {[d.identifier for d in datasets]}")

# You can then access the data using the datasetio API
# rows = client.datasets.iterrows(dataset_id="my_dataset", start_index=1, limit=2)
rows = client.datasets.iterrows(dataset_id="my_dataset")
print(f"Data: {rows.data}")
```

And play with `start_index` and `limit`.

[//]: # (## Documentation)

Signed-off-by: Sébastien Han <seb@redhat.com>
2025-03-31 13:08:29 -07:00

43 lines
1.4 KiB
Python

# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import Any, Dict, List
from llama_stack.apis.common.responses import PaginatedResponse
def paginate_records(
records: List[Dict[str, Any]],
start_index: int | None = None,
limit: int | None = None,
) -> PaginatedResponse:
"""Helper function to handle pagination of records consistently across implementations.
Inspired by stripe's pagination: https://docs.stripe.com/api/pagination
:param records: List of records to paginate
:param start_index: The starting index (0-based). If None, starts from beginning.
:param limit: Number of items to return. If None or -1, returns all items.
:return: PaginatedResponse with the paginated data
"""
# Handle special case for fetching all rows
if limit is None or limit == -1:
return PaginatedResponse(
data=records,
has_more=False,
)
# Use offset-based pagination
start_index = start_index or 0
end_index = min(start_index + limit, len(records))
page_data = records[start_index:end_index]
# Calculate if there are more records
has_more = end_index < len(records)
return PaginatedResponse(
data=page_data,
has_more=has_more,
)