Add and test pagination for vector store files list

Signed-off-by: Ben Browning <bbrownin@redhat.com>
This commit is contained in:
Ben Browning 2025-06-18 15:50:48 -04:00
parent f0d56316a0
commit 866c0b0029
6 changed files with 204 additions and 38 deletions

View file

@ -3279,6 +3279,46 @@
"schema": { "schema": {
"type": "string" "type": "string"
} }
},
{
"name": "limit",
"in": "query",
"required": false,
"schema": {
"type": "integer"
}
},
{
"name": "order",
"in": "query",
"required": false,
"schema": {
"type": "string"
}
},
{
"name": "after",
"in": "query",
"required": false,
"schema": {
"type": "string"
}
},
{
"name": "before",
"in": "query",
"required": false,
"schema": {
"type": "string"
}
},
{
"name": "filter",
"in": "query",
"required": false,
"schema": {
"$ref": "#/components/schemas/VectorStoreFileStatus"
}
} }
] ]
}, },
@ -12357,24 +12397,7 @@
"$ref": "#/components/schemas/VectorStoreFileLastError" "$ref": "#/components/schemas/VectorStoreFileLastError"
}, },
"status": { "status": {
"oneOf": [ "$ref": "#/components/schemas/VectorStoreFileStatus"
{
"type": "string",
"const": "completed"
},
{
"type": "string",
"const": "in_progress"
},
{
"type": "string",
"const": "cancelled"
},
{
"type": "string",
"const": "failed"
}
]
}, },
"usage_bytes": { "usage_bytes": {
"type": "integer", "type": "integer",
@ -12398,6 +12421,26 @@
"title": "VectorStoreFileObject", "title": "VectorStoreFileObject",
"description": "OpenAI Vector Store File object." "description": "OpenAI Vector Store File object."
}, },
"VectorStoreFileStatus": {
"oneOf": [
{
"type": "string",
"const": "completed"
},
{
"type": "string",
"const": "in_progress"
},
{
"type": "string",
"const": "cancelled"
},
{
"type": "string",
"const": "failed"
}
]
},
"OpenAIJSONSchema": { "OpenAIJSONSchema": {
"type": "object", "type": "object",
"properties": { "properties": {
@ -13665,12 +13708,23 @@
"items": { "items": {
"$ref": "#/components/schemas/VectorStoreFileObject" "$ref": "#/components/schemas/VectorStoreFileObject"
} }
},
"first_id": {
"type": "string"
},
"last_id": {
"type": "string"
},
"has_more": {
"type": "boolean",
"default": false
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"object", "object",
"data" "data",
"has_more"
], ],
"title": "VectorStoreListFilesResponse", "title": "VectorStoreListFilesResponse",
"description": "Response from listing vector stores." "description": "Response from listing vector stores."

View file

@ -2294,6 +2294,31 @@ paths:
required: true required: true
schema: schema:
type: string type: string
- name: limit
in: query
required: false
schema:
type: integer
- name: order
in: query
required: false
schema:
type: string
- name: after
in: query
required: false
schema:
type: string
- name: before
in: query
required: false
schema:
type: string
- name: filter
in: query
required: false
schema:
$ref: '#/components/schemas/VectorStoreFileStatus'
post: post:
responses: responses:
'200': '200':
@ -8641,15 +8666,7 @@ components:
last_error: last_error:
$ref: '#/components/schemas/VectorStoreFileLastError' $ref: '#/components/schemas/VectorStoreFileLastError'
status: status:
oneOf: $ref: '#/components/schemas/VectorStoreFileStatus'
- type: string
const: completed
- type: string
const: in_progress
- type: string
const: cancelled
- type: string
const: failed
usage_bytes: usage_bytes:
type: integer type: integer
default: 0 default: 0
@ -8667,6 +8684,16 @@ components:
- vector_store_id - vector_store_id
title: VectorStoreFileObject title: VectorStoreFileObject
description: OpenAI Vector Store File object. description: OpenAI Vector Store File object.
VectorStoreFileStatus:
oneOf:
- type: string
const: completed
- type: string
const: in_progress
- type: string
const: cancelled
- type: string
const: failed
OpenAIJSONSchema: OpenAIJSONSchema:
type: object type: object
properties: properties:
@ -9551,10 +9578,18 @@ components:
type: array type: array
items: items:
$ref: '#/components/schemas/VectorStoreFileObject' $ref: '#/components/schemas/VectorStoreFileObject'
first_id:
type: string
last_id:
type: string
has_more:
type: boolean
default: false
additionalProperties: false additionalProperties: false
required: required:
- object - object
- data - data
- has_more
title: VectorStoreListFilesResponse title: VectorStoreListFilesResponse
description: Response from listing vector stores. description: Response from listing vector stores.
OpenAIModel: OpenAIModel:

View file

@ -177,6 +177,10 @@ class VectorStoreFileLastError(BaseModel):
message: str message: str
VectorStoreFileStatus = Literal["completed"] | Literal["in_progress"] | Literal["cancelled"] | Literal["failed"]
register_schema(VectorStoreFileStatus, name="VectorStoreFileStatus")
@json_schema_type @json_schema_type
class VectorStoreFileObject(BaseModel): class VectorStoreFileObject(BaseModel):
"""OpenAI Vector Store File object.""" """OpenAI Vector Store File object."""
@ -187,7 +191,7 @@ class VectorStoreFileObject(BaseModel):
chunking_strategy: VectorStoreChunkingStrategy chunking_strategy: VectorStoreChunkingStrategy
created_at: int created_at: int
last_error: VectorStoreFileLastError | None = None last_error: VectorStoreFileLastError | None = None
status: Literal["completed"] | Literal["in_progress"] | Literal["cancelled"] | Literal["failed"] status: VectorStoreFileStatus
usage_bytes: int = 0 usage_bytes: int = 0
vector_store_id: str vector_store_id: str
@ -198,6 +202,9 @@ class VectorStoreListFilesResponse(BaseModel):
object: str = "list" object: str = "list"
data: list[VectorStoreFileObject] data: list[VectorStoreFileObject]
first_id: str | None = None
last_id: str | None = None
has_more: bool = False
@json_schema_type @json_schema_type
@ -399,6 +406,11 @@ class VectorIO(Protocol):
async def openai_list_files_in_vector_store( async def openai_list_files_in_vector_store(
self, self,
vector_store_id: str, vector_store_id: str,
limit: int | None = 20,
order: str | None = "desc",
after: str | None = None,
before: str | None = None,
filter: VectorStoreFileStatus | None = None,
) -> VectorStoreListFilesResponse: ) -> VectorStoreListFilesResponse:
"""List files in a vector store. """List files in a vector store.

View file

@ -26,6 +26,7 @@ from llama_stack.apis.vector_io.vector_io import (
VectorStoreFileContentsResponse, VectorStoreFileContentsResponse,
VectorStoreFileDeleteResponse, VectorStoreFileDeleteResponse,
VectorStoreFileObject, VectorStoreFileObject,
VectorStoreFileStatus,
) )
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable
@ -287,12 +288,22 @@ class VectorIORouter(VectorIO):
async def openai_list_files_in_vector_store( async def openai_list_files_in_vector_store(
self, self,
vector_store_id: str, vector_store_id: str,
limit: int | None = 20,
order: str | None = "desc",
after: str | None = None,
before: str | None = None,
filter: VectorStoreFileStatus | None = None,
) -> list[VectorStoreFileObject]: ) -> list[VectorStoreFileObject]:
logger.debug(f"VectorIORouter.openai_list_files_in_vector_store: {vector_store_id}") logger.debug(f"VectorIORouter.openai_list_files_in_vector_store: {vector_store_id}")
# Route based on vector store ID # Route based on vector store ID
provider = self.routing_table.get_provider_impl(vector_store_id) provider = self.routing_table.get_provider_impl(vector_store_id)
return await provider.openai_list_files_in_vector_store( return await provider.openai_list_files_in_vector_store(
vector_store_id=vector_store_id, vector_store_id=vector_store_id,
limit=limit,
order=order,
after=after,
before=before,
filter=filter,
) )
async def openai_retrieve_vector_store_file( async def openai_retrieve_vector_store_file(

View file

@ -35,6 +35,7 @@ from llama_stack.apis.vector_io.vector_io import (
VectorStoreFileDeleteResponse, VectorStoreFileDeleteResponse,
VectorStoreFileLastError, VectorStoreFileLastError,
VectorStoreFileObject, VectorStoreFileObject,
VectorStoreFileStatus,
VectorStoreListFilesResponse, VectorStoreListFilesResponse,
) )
from llama_stack.providers.utils.memory.vector_store import content_from_data_and_mime_type, make_overlapped_chunks from llama_stack.providers.utils.memory.vector_store import content_from_data_and_mime_type, make_overlapped_chunks
@ -592,21 +593,56 @@ class OpenAIVectorStoreMixin(ABC):
async def openai_list_files_in_vector_store( async def openai_list_files_in_vector_store(
self, self,
vector_store_id: str, vector_store_id: str,
limit: int | None = 20,
order: str | None = "desc",
after: str | None = None,
before: str | None = None,
filter: VectorStoreFileStatus | None = None,
) -> VectorStoreListFilesResponse: ) -> VectorStoreListFilesResponse:
"""List files in a vector store.""" """List files in a vector store."""
limit = limit or 20
order = order or "desc"
if vector_store_id not in self.openai_vector_stores: if vector_store_id not in self.openai_vector_stores:
raise ValueError(f"Vector store {vector_store_id} not found") raise ValueError(f"Vector store {vector_store_id} not found")
store_info = self.openai_vector_stores[vector_store_id] store_info = self.openai_vector_stores[vector_store_id]
file_objects = [] file_objects: list[VectorStoreFileObject] = []
for file_id in store_info["file_ids"]: for file_id in store_info["file_ids"]:
file_info = await self._load_openai_vector_store_file(vector_store_id, file_id) file_info = await self._load_openai_vector_store_file(vector_store_id, file_id)
file_objects.append(VectorStoreFileObject(**file_info)) file_object = VectorStoreFileObject(**file_info)
if filter and file_object.status != filter:
continue
file_objects.append(file_object)
# Sort by created_at
reverse_order = order == "desc"
file_objects.sort(key=lambda x: x.created_at, reverse=reverse_order)
# Apply cursor-based pagination
if after:
after_index = next((i for i, file in enumerate(file_objects) if file.id == after), -1)
if after_index >= 0:
file_objects = file_objects[after_index + 1 :]
if before:
before_index = next((i for i, file in enumerate(file_objects) if file.id == before), len(file_objects))
file_objects = file_objects[:before_index]
# Apply limit
limited_files = file_objects[:limit]
# Determine pagination info
has_more = len(file_objects) > limit
first_id = file_objects[0].id if file_objects else None
last_id = file_objects[-1].id if file_objects else None
return VectorStoreListFilesResponse( return VectorStoreListFilesResponse(
data=file_objects, data=limited_files,
has_more=has_more,
first_id=first_id,
last_id=last_id,
) )
async def openai_retrieve_vector_store_file( async def openai_retrieve_vector_store_file(

View file

@ -509,7 +509,9 @@ def test_openai_vector_store_attach_files_on_creation(compat_client_with_empty_s
valid_file_ids.append(file.id) valid_file_ids.append(file.id)
# include an invalid file ID so we can test failed status # include an invalid file ID so we can test failed status
file_ids = valid_file_ids + ["invalid_file_id"] failed_file_id = "invalid_file_id"
file_ids = valid_file_ids + [failed_file_id]
num_failed = len(file_ids) - len(valid_file_ids)
# Create a vector store # Create a vector store
vector_store = compat_client.vector_stores.create( vector_store = compat_client.vector_stores.create(
@ -520,7 +522,7 @@ def test_openai_vector_store_attach_files_on_creation(compat_client_with_empty_s
assert vector_store.file_counts.completed == len(valid_file_ids) assert vector_store.file_counts.completed == len(valid_file_ids)
assert vector_store.file_counts.total == len(file_ids) assert vector_store.file_counts.total == len(file_ids)
assert vector_store.file_counts.cancelled == 0 assert vector_store.file_counts.cancelled == 0
assert vector_store.file_counts.failed == len(file_ids) - len(valid_file_ids) assert vector_store.file_counts.failed == num_failed
assert vector_store.file_counts.in_progress == 0 assert vector_store.file_counts.in_progress == 0
files_list = compat_client.vector_stores.files.list(vector_store_id=vector_store.id) files_list = compat_client.vector_stores.files.list(vector_store_id=vector_store.id)
@ -532,11 +534,13 @@ def test_openai_vector_store_attach_files_on_creation(compat_client_with_empty_s
else: else:
assert file.status == "failed" assert file.status == "failed"
failed_list = compat_client.vector_stores.files.list(vector_store_id=vector_store.id, filter="failed")
assert len(failed_list.data) == num_failed
assert failed_file_id == failed_list.data[0].id
# Delete the invalid file # Delete the invalid file
delete_response = compat_client.vector_stores.files.delete( delete_response = compat_client.vector_stores.files.delete(vector_store_id=vector_store.id, file_id=failed_file_id)
vector_store_id=vector_store.id, file_id="invalid_file_id" assert delete_response.id == failed_file_id
)
assert delete_response.id == "invalid_file_id"
updated_vector_store = compat_client.vector_stores.retrieve(vector_store_id=vector_store.id) updated_vector_store = compat_client.vector_stores.retrieve(vector_store_id=vector_store.id)
assert updated_vector_store.file_counts.completed == len(valid_file_ids) assert updated_vector_store.file_counts.completed == len(valid_file_ids)
@ -573,6 +577,7 @@ def test_openai_vector_store_list_files(compat_client_with_empty_stores, client_
assert files_list assert files_list
assert files_list.object == "list" assert files_list.object == "list"
assert files_list.data assert files_list.data
assert not files_list.has_more
assert len(files_list.data) == 3 assert len(files_list.data) == 3
assert set(file_ids) == {file.id for file in files_list.data} assert set(file_ids) == {file.id for file in files_list.data}
assert files_list.data[0].object == "vector_store.file" assert files_list.data[0].object == "vector_store.file"
@ -580,8 +585,21 @@ def test_openai_vector_store_list_files(compat_client_with_empty_stores, client_
assert files_list.data[0].status == "completed" assert files_list.data[0].status == "completed"
assert files_list.data[0].chunking_strategy.type == "auto" assert files_list.data[0].chunking_strategy.type == "auto"
assert files_list.data[0].created_at > 0 assert files_list.data[0].created_at > 0
assert files_list.first_id == files_list.data[0].id
assert not files_list.data[0].last_error assert not files_list.data[0].last_error
first_page = compat_client.vector_stores.files.list(vector_store_id=vector_store.id, limit=2)
assert first_page.has_more
assert len(first_page.data) == 2
assert first_page.first_id == first_page.data[0].id
assert first_page.last_id != first_page.data[-1].id
next_page = compat_client.vector_stores.files.list(
vector_store_id=vector_store.id, limit=2, after=first_page.data[-1].id
)
assert not next_page.has_more
assert len(next_page.data) == 1
updated_vector_store = compat_client.vector_stores.retrieve(vector_store_id=vector_store.id) updated_vector_store = compat_client.vector_stores.retrieve(vector_store_id=vector_store.id)
assert updated_vector_store.file_counts.completed == 3 assert updated_vector_store.file_counts.completed == 3
assert updated_vector_store.file_counts.total == 3 assert updated_vector_store.file_counts.total == 3