From 866c0b0029bb9c79ba4b4569a8c96cc0bffa0f30 Mon Sep 17 00:00:00 2001 From: Ben Browning Date: Wed, 18 Jun 2025 15:50:48 -0400 Subject: [PATCH] Add and test pagination for vector store files list Signed-off-by: Ben Browning --- docs/_static/llama-stack-spec.html | 92 +++++++++++++++---- docs/_static/llama-stack-spec.yaml | 53 +++++++++-- llama_stack/apis/vector_io/vector_io.py | 14 ++- llama_stack/distribution/routers/vector_io.py | 11 +++ .../utils/memory/openai_vector_store_mixin.py | 42 ++++++++- .../vector_io/test_openai_vector_stores.py | 30 ++++-- 6 files changed, 204 insertions(+), 38 deletions(-) diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index 2b576a1a9..0a5caa3d1 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -3279,6 +3279,46 @@ "schema": { "type": "string" } + }, + { + "name": "limit", + "in": "query", + "required": false, + "schema": { + "type": "integer" + } + }, + { + "name": "order", + "in": "query", + "required": false, + "schema": { + "type": "string" + } + }, + { + "name": "after", + "in": "query", + "required": false, + "schema": { + "type": "string" + } + }, + { + "name": "before", + "in": "query", + "required": false, + "schema": { + "type": "string" + } + }, + { + "name": "filter", + "in": "query", + "required": false, + "schema": { + "$ref": "#/components/schemas/VectorStoreFileStatus" + } } ] }, @@ -12357,24 +12397,7 @@ "$ref": "#/components/schemas/VectorStoreFileLastError" }, "status": { - "oneOf": [ - { - "type": "string", - "const": "completed" - }, - { - "type": "string", - "const": "in_progress" - }, - { - "type": "string", - "const": "cancelled" - }, - { - "type": "string", - "const": "failed" - } - ] + "$ref": "#/components/schemas/VectorStoreFileStatus" }, "usage_bytes": { "type": "integer", @@ -12398,6 +12421,26 @@ "title": "VectorStoreFileObject", "description": "OpenAI Vector Store File object." }, + "VectorStoreFileStatus": { + "oneOf": [ + { + "type": "string", + "const": "completed" + }, + { + "type": "string", + "const": "in_progress" + }, + { + "type": "string", + "const": "cancelled" + }, + { + "type": "string", + "const": "failed" + } + ] + }, "OpenAIJSONSchema": { "type": "object", "properties": { @@ -13665,12 +13708,23 @@ "items": { "$ref": "#/components/schemas/VectorStoreFileObject" } + }, + "first_id": { + "type": "string" + }, + "last_id": { + "type": "string" + }, + "has_more": { + "type": "boolean", + "default": false } }, "additionalProperties": false, "required": [ "object", - "data" + "data", + "has_more" ], "title": "VectorStoreListFilesResponse", "description": "Response from listing vector stores." diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index 160193e6a..c115e1df2 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -2294,6 +2294,31 @@ paths: required: true schema: type: string + - name: limit + in: query + required: false + schema: + type: integer + - name: order + in: query + required: false + schema: + type: string + - name: after + in: query + required: false + schema: + type: string + - name: before + in: query + required: false + schema: + type: string + - name: filter + in: query + required: false + schema: + $ref: '#/components/schemas/VectorStoreFileStatus' post: responses: '200': @@ -8641,15 +8666,7 @@ components: last_error: $ref: '#/components/schemas/VectorStoreFileLastError' status: - oneOf: - - type: string - const: completed - - type: string - const: in_progress - - type: string - const: cancelled - - type: string - const: failed + $ref: '#/components/schemas/VectorStoreFileStatus' usage_bytes: type: integer default: 0 @@ -8667,6 +8684,16 @@ components: - vector_store_id title: VectorStoreFileObject description: OpenAI Vector Store File object. + VectorStoreFileStatus: + oneOf: + - type: string + const: completed + - type: string + const: in_progress + - type: string + const: cancelled + - type: string + const: failed OpenAIJSONSchema: type: object properties: @@ -9551,10 +9578,18 @@ components: type: array items: $ref: '#/components/schemas/VectorStoreFileObject' + first_id: + type: string + last_id: + type: string + has_more: + type: boolean + default: false additionalProperties: false required: - object - data + - has_more title: VectorStoreListFilesResponse description: Response from listing vector stores. OpenAIModel: diff --git a/llama_stack/apis/vector_io/vector_io.py b/llama_stack/apis/vector_io/vector_io.py index 6a674356d..dbea12d5f 100644 --- a/llama_stack/apis/vector_io/vector_io.py +++ b/llama_stack/apis/vector_io/vector_io.py @@ -177,6 +177,10 @@ class VectorStoreFileLastError(BaseModel): message: str +VectorStoreFileStatus = Literal["completed"] | Literal["in_progress"] | Literal["cancelled"] | Literal["failed"] +register_schema(VectorStoreFileStatus, name="VectorStoreFileStatus") + + @json_schema_type class VectorStoreFileObject(BaseModel): """OpenAI Vector Store File object.""" @@ -187,7 +191,7 @@ class VectorStoreFileObject(BaseModel): chunking_strategy: VectorStoreChunkingStrategy created_at: int last_error: VectorStoreFileLastError | None = None - status: Literal["completed"] | Literal["in_progress"] | Literal["cancelled"] | Literal["failed"] + status: VectorStoreFileStatus usage_bytes: int = 0 vector_store_id: str @@ -198,6 +202,9 @@ class VectorStoreListFilesResponse(BaseModel): object: str = "list" data: list[VectorStoreFileObject] + first_id: str | None = None + last_id: str | None = None + has_more: bool = False @json_schema_type @@ -399,6 +406,11 @@ class VectorIO(Protocol): async def openai_list_files_in_vector_store( self, vector_store_id: str, + limit: int | None = 20, + order: str | None = "desc", + after: str | None = None, + before: str | None = None, + filter: VectorStoreFileStatus | None = None, ) -> VectorStoreListFilesResponse: """List files in a vector store. diff --git a/llama_stack/distribution/routers/vector_io.py b/llama_stack/distribution/routers/vector_io.py index d6baa389d..643029d60 100644 --- a/llama_stack/distribution/routers/vector_io.py +++ b/llama_stack/distribution/routers/vector_io.py @@ -26,6 +26,7 @@ from llama_stack.apis.vector_io.vector_io import ( VectorStoreFileContentsResponse, VectorStoreFileDeleteResponse, VectorStoreFileObject, + VectorStoreFileStatus, ) from llama_stack.log import get_logger from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable @@ -287,12 +288,22 @@ class VectorIORouter(VectorIO): async def openai_list_files_in_vector_store( self, vector_store_id: str, + limit: int | None = 20, + order: str | None = "desc", + after: str | None = None, + before: str | None = None, + filter: VectorStoreFileStatus | None = None, ) -> list[VectorStoreFileObject]: logger.debug(f"VectorIORouter.openai_list_files_in_vector_store: {vector_store_id}") # Route based on vector store ID provider = self.routing_table.get_provider_impl(vector_store_id) return await provider.openai_list_files_in_vector_store( vector_store_id=vector_store_id, + limit=limit, + order=order, + after=after, + before=before, + filter=filter, ) async def openai_retrieve_vector_store_file( diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py index 2602acd4f..9c0e1dbe7 100644 --- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py +++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py @@ -35,6 +35,7 @@ from llama_stack.apis.vector_io.vector_io import ( VectorStoreFileDeleteResponse, VectorStoreFileLastError, VectorStoreFileObject, + VectorStoreFileStatus, VectorStoreListFilesResponse, ) from llama_stack.providers.utils.memory.vector_store import content_from_data_and_mime_type, make_overlapped_chunks @@ -592,21 +593,56 @@ class OpenAIVectorStoreMixin(ABC): async def openai_list_files_in_vector_store( self, vector_store_id: str, + limit: int | None = 20, + order: str | None = "desc", + after: str | None = None, + before: str | None = None, + filter: VectorStoreFileStatus | None = None, ) -> VectorStoreListFilesResponse: """List files in a vector store.""" + limit = limit or 20 + order = order or "desc" if vector_store_id not in self.openai_vector_stores: raise ValueError(f"Vector store {vector_store_id} not found") store_info = self.openai_vector_stores[vector_store_id] - file_objects = [] + file_objects: list[VectorStoreFileObject] = [] for file_id in store_info["file_ids"]: file_info = await self._load_openai_vector_store_file(vector_store_id, file_id) - file_objects.append(VectorStoreFileObject(**file_info)) + file_object = VectorStoreFileObject(**file_info) + if filter and file_object.status != filter: + continue + file_objects.append(file_object) + + # Sort by created_at + reverse_order = order == "desc" + file_objects.sort(key=lambda x: x.created_at, reverse=reverse_order) + + # Apply cursor-based pagination + if after: + after_index = next((i for i, file in enumerate(file_objects) if file.id == after), -1) + if after_index >= 0: + file_objects = file_objects[after_index + 1 :] + + if before: + before_index = next((i for i, file in enumerate(file_objects) if file.id == before), len(file_objects)) + file_objects = file_objects[:before_index] + + # Apply limit + limited_files = file_objects[:limit] + + # Determine pagination info + has_more = len(file_objects) > limit + first_id = file_objects[0].id if file_objects else None + last_id = file_objects[-1].id if file_objects else None return VectorStoreListFilesResponse( - data=file_objects, + data=limited_files, + has_more=has_more, + first_id=first_id, + last_id=last_id, ) async def openai_retrieve_vector_store_file( diff --git a/tests/integration/vector_io/test_openai_vector_stores.py b/tests/integration/vector_io/test_openai_vector_stores.py index 0440cd21c..4856455c4 100644 --- a/tests/integration/vector_io/test_openai_vector_stores.py +++ b/tests/integration/vector_io/test_openai_vector_stores.py @@ -509,7 +509,9 @@ def test_openai_vector_store_attach_files_on_creation(compat_client_with_empty_s valid_file_ids.append(file.id) # include an invalid file ID so we can test failed status - file_ids = valid_file_ids + ["invalid_file_id"] + failed_file_id = "invalid_file_id" + file_ids = valid_file_ids + [failed_file_id] + num_failed = len(file_ids) - len(valid_file_ids) # Create a vector store vector_store = compat_client.vector_stores.create( @@ -520,7 +522,7 @@ def test_openai_vector_store_attach_files_on_creation(compat_client_with_empty_s assert vector_store.file_counts.completed == len(valid_file_ids) assert vector_store.file_counts.total == len(file_ids) assert vector_store.file_counts.cancelled == 0 - assert vector_store.file_counts.failed == len(file_ids) - len(valid_file_ids) + assert vector_store.file_counts.failed == num_failed assert vector_store.file_counts.in_progress == 0 files_list = compat_client.vector_stores.files.list(vector_store_id=vector_store.id) @@ -532,11 +534,13 @@ def test_openai_vector_store_attach_files_on_creation(compat_client_with_empty_s else: assert file.status == "failed" + failed_list = compat_client.vector_stores.files.list(vector_store_id=vector_store.id, filter="failed") + assert len(failed_list.data) == num_failed + assert failed_file_id == failed_list.data[0].id + # Delete the invalid file - delete_response = compat_client.vector_stores.files.delete( - vector_store_id=vector_store.id, file_id="invalid_file_id" - ) - assert delete_response.id == "invalid_file_id" + delete_response = compat_client.vector_stores.files.delete(vector_store_id=vector_store.id, file_id=failed_file_id) + assert delete_response.id == failed_file_id updated_vector_store = compat_client.vector_stores.retrieve(vector_store_id=vector_store.id) assert updated_vector_store.file_counts.completed == len(valid_file_ids) @@ -573,6 +577,7 @@ def test_openai_vector_store_list_files(compat_client_with_empty_stores, client_ assert files_list assert files_list.object == "list" assert files_list.data + assert not files_list.has_more assert len(files_list.data) == 3 assert set(file_ids) == {file.id for file in files_list.data} assert files_list.data[0].object == "vector_store.file" @@ -580,8 +585,21 @@ def test_openai_vector_store_list_files(compat_client_with_empty_stores, client_ assert files_list.data[0].status == "completed" assert files_list.data[0].chunking_strategy.type == "auto" assert files_list.data[0].created_at > 0 + assert files_list.first_id == files_list.data[0].id assert not files_list.data[0].last_error + first_page = compat_client.vector_stores.files.list(vector_store_id=vector_store.id, limit=2) + assert first_page.has_more + assert len(first_page.data) == 2 + assert first_page.first_id == first_page.data[0].id + assert first_page.last_id != first_page.data[-1].id + + next_page = compat_client.vector_stores.files.list( + vector_store_id=vector_store.id, limit=2, after=first_page.data[-1].id + ) + assert not next_page.has_more + assert len(next_page.data) == 1 + updated_vector_store = compat_client.vector_stores.retrieve(vector_store_id=vector_store.id) assert updated_vector_store.file_counts.completed == 3 assert updated_vector_store.file_counts.total == 3