Add and test pagination for vector store files list

Signed-off-by: Ben Browning <bbrownin@redhat.com>
This commit is contained in:
Ben Browning 2025-06-18 15:50:48 -04:00
parent f0d56316a0
commit 866c0b0029
6 changed files with 204 additions and 38 deletions

View file

@ -177,6 +177,10 @@ class VectorStoreFileLastError(BaseModel):
message: str
VectorStoreFileStatus = Literal["completed"] | Literal["in_progress"] | Literal["cancelled"] | Literal["failed"]
register_schema(VectorStoreFileStatus, name="VectorStoreFileStatus")
@json_schema_type
class VectorStoreFileObject(BaseModel):
"""OpenAI Vector Store File object."""
@ -187,7 +191,7 @@ class VectorStoreFileObject(BaseModel):
chunking_strategy: VectorStoreChunkingStrategy
created_at: int
last_error: VectorStoreFileLastError | None = None
status: Literal["completed"] | Literal["in_progress"] | Literal["cancelled"] | Literal["failed"]
status: VectorStoreFileStatus
usage_bytes: int = 0
vector_store_id: str
@ -198,6 +202,9 @@ class VectorStoreListFilesResponse(BaseModel):
object: str = "list"
data: list[VectorStoreFileObject]
first_id: str | None = None
last_id: str | None = None
has_more: bool = False
@json_schema_type
@ -399,6 +406,11 @@ class VectorIO(Protocol):
async def openai_list_files_in_vector_store(
self,
vector_store_id: str,
limit: int | None = 20,
order: str | None = "desc",
after: str | None = None,
before: str | None = None,
filter: VectorStoreFileStatus | None = None,
) -> VectorStoreListFilesResponse:
"""List files in a vector store.

View file

@ -26,6 +26,7 @@ from llama_stack.apis.vector_io.vector_io import (
VectorStoreFileContentsResponse,
VectorStoreFileDeleteResponse,
VectorStoreFileObject,
VectorStoreFileStatus,
)
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable
@ -287,12 +288,22 @@ class VectorIORouter(VectorIO):
async def openai_list_files_in_vector_store(
self,
vector_store_id: str,
limit: int | None = 20,
order: str | None = "desc",
after: str | None = None,
before: str | None = None,
filter: VectorStoreFileStatus | None = None,
) -> list[VectorStoreFileObject]:
logger.debug(f"VectorIORouter.openai_list_files_in_vector_store: {vector_store_id}")
# Route based on vector store ID
provider = self.routing_table.get_provider_impl(vector_store_id)
return await provider.openai_list_files_in_vector_store(
vector_store_id=vector_store_id,
limit=limit,
order=order,
after=after,
before=before,
filter=filter,
)
async def openai_retrieve_vector_store_file(

View file

@ -35,6 +35,7 @@ from llama_stack.apis.vector_io.vector_io import (
VectorStoreFileDeleteResponse,
VectorStoreFileLastError,
VectorStoreFileObject,
VectorStoreFileStatus,
VectorStoreListFilesResponse,
)
from llama_stack.providers.utils.memory.vector_store import content_from_data_and_mime_type, make_overlapped_chunks
@ -592,21 +593,56 @@ class OpenAIVectorStoreMixin(ABC):
async def openai_list_files_in_vector_store(
self,
vector_store_id: str,
limit: int | None = 20,
order: str | None = "desc",
after: str | None = None,
before: str | None = None,
filter: VectorStoreFileStatus | None = None,
) -> VectorStoreListFilesResponse:
"""List files in a vector store."""
limit = limit or 20
order = order or "desc"
if vector_store_id not in self.openai_vector_stores:
raise ValueError(f"Vector store {vector_store_id} not found")
store_info = self.openai_vector_stores[vector_store_id]
file_objects = []
file_objects: list[VectorStoreFileObject] = []
for file_id in store_info["file_ids"]:
file_info = await self._load_openai_vector_store_file(vector_store_id, file_id)
file_objects.append(VectorStoreFileObject(**file_info))
file_object = VectorStoreFileObject(**file_info)
if filter and file_object.status != filter:
continue
file_objects.append(file_object)
# Sort by created_at
reverse_order = order == "desc"
file_objects.sort(key=lambda x: x.created_at, reverse=reverse_order)
# Apply cursor-based pagination
if after:
after_index = next((i for i, file in enumerate(file_objects) if file.id == after), -1)
if after_index >= 0:
file_objects = file_objects[after_index + 1 :]
if before:
before_index = next((i for i, file in enumerate(file_objects) if file.id == before), len(file_objects))
file_objects = file_objects[:before_index]
# Apply limit
limited_files = file_objects[:limit]
# Determine pagination info
has_more = len(file_objects) > limit
first_id = file_objects[0].id if file_objects else None
last_id = file_objects[-1].id if file_objects else None
return VectorStoreListFilesResponse(
data=file_objects,
data=limited_files,
has_more=has_more,
first_id=first_id,
last_id=last_id,
)
async def openai_retrieve_vector_store_file(