From 4c0eb47fc70696469e8469bd82e308724cfb3b6f Mon Sep 17 00:00:00 2001 From: Francisco Javier Arceo Date: Fri, 1 Aug 2025 17:03:43 -0400 Subject: [PATCH] feat: Adding support for get, update, delete for Vector Stores API Signed-off-by: Francisco Javier Arceo --- docs/_static/llama-stack-spec.html | 454 ++++++++++++++++ docs/_static/llama-stack-spec.yaml | 324 +++++++++++ llama_stack/apis/vector_io/vector_io.py | 142 +++++ llama_stack/core/routers/vector_io.py | 64 +++ llama_stack/core/routing_tables/vector_dbs.py | 70 +++ .../utils/memory/openai_vector_store_mixin.py | 241 +++++++++ .../vector-stores/vector-store-detail.tsx | 17 +- .../test_vector_io_openai_vector_stores.py | 507 ++++++++++++++++++ 8 files changed, 1818 insertions(+), 1 deletion(-) diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index f9af10165..2c8da2740 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -3793,6 +3793,195 @@ ] } }, + "/v1/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/chunks/{chunk_id}": { + "get": { + "responses": { + "200": { + "description": "A VectorStoreChunkObject representing the chunk.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/VectorStoreChunkObject" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "VectorIO" + ], + "description": "Retrieve a specific chunk from a vector store file.", + "parameters": [ + { + "name": "vector_store_id", + "in": "path", + "description": "The ID of the vector store containing the chunk.", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "file_id", + "in": "path", + "description": "The ID of the file containing the chunk.", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "chunk_id", + "in": "path", + "description": "The ID of the chunk to retrieve.", + "required": true, + "schema": { + "type": "string" + } + } + ] + }, + "post": { + "responses": { + "200": { + "description": "A VectorStoreChunkObject representing the updated chunk.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/VectorStoreChunkObject" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "VectorIO" + ], + "description": "Update a specific chunk in a vector store file.", + "parameters": [ + { + "name": "vector_store_id", + "in": "path", + "description": "The ID of the vector store containing the chunk.", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "file_id", + "in": "path", + "description": "The ID of the file containing the chunk.", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "chunk_id", + "in": "path", + "description": "The ID of the chunk to update.", + "required": true, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/OpenaiUpdateVectorStoreChunkRequest" + } + } + }, + "required": true + } + }, + "delete": { + "responses": { + "200": { + "description": "A VectorStoreChunkDeleteResponse indicating the deletion status.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/VectorStoreChunkDeleteResponse" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "VectorIO" + ], + "description": "Delete a specific chunk from a vector store file.", + "parameters": [ + { + "name": "vector_store_id", + "in": "path", + "description": "The ID of the vector store containing the chunk.", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "file_id", + "in": "path", + "description": "The ID of the file containing the chunk.", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "chunk_id", + "in": "path", + "description": "The ID of the chunk to delete.", + "required": true, + "schema": { + "type": "string" + } + } + ] + } + }, "/v1/openai/v1/vector_stores/{vector_store_id}/files/{file_id}": { "get": { "responses": { @@ -4155,6 +4344,94 @@ "parameters": [] } }, + "/v1/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/chunks": { + "get": { + "responses": { + "200": { + "description": "A VectorStoreListChunksResponse with the list of chunks.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/VectorStoreListChunksResponse" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "VectorIO" + ], + "description": "List chunks in a vector store file.", + "parameters": [ + { + "name": "vector_store_id", + "in": "path", + "description": "The ID of the vector store.", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "file_id", + "in": "path", + "description": "The ID of the file.", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "limit", + "in": "query", + "description": "Max number of chunks to return.", + "required": false, + "schema": { + "type": "integer" + } + }, + { + "name": "order", + "in": "query", + "description": "Sort order.", + "required": false, + "schema": { + "type": "string" + } + }, + { + "name": "after", + "in": "query", + "description": "Pagination cursor.", + "required": false, + "schema": { + "type": "string" + } + }, + { + "name": "before", + "in": "query", + "description": "Pagination cursor.", + "required": false, + "schema": { + "type": "string" + } + } + ] + } + }, "/v1/openai/v1/files/{file_id}/content": { "get": { "responses": { @@ -14351,6 +14628,33 @@ "title": "VectorStoreDeleteResponse", "description": "Response from deleting a vector store." }, + "VectorStoreChunkDeleteResponse": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "Unique identifier of the deleted chunk" + }, + "object": { + "type": "string", + "default": "vector_store.file.chunk.deleted", + "description": "Object type identifier for the deletion response" + }, + "deleted": { + "type": "boolean", + "default": true, + "description": "Whether the deletion operation was successful" + } + }, + "additionalProperties": false, + "required": [ + "id", + "object", + "deleted" + ], + "title": "VectorStoreChunkDeleteResponse", + "description": "Response from deleting a vector store chunk." + }, "VectorStoreFileDeleteResponse": { "type": "object", "properties": { @@ -14691,6 +14995,119 @@ ], "title": "OpenAIListModelsResponse" }, + "VectorStoreChunkObject": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "Unique identifier for the chunk" + }, + "object": { + "type": "string", + "default": "vector_store.file.chunk", + "description": "Object type identifier, always \"vector_store.file.chunk\"" + }, + "created_at": { + "type": "integer", + "description": "Timestamp when the chunk was created" + }, + "vector_store_id": { + "type": "string", + "description": "ID of the vector store containing this chunk" + }, + "file_id": { + "type": "string", + "description": "ID of the file containing this chunk" + }, + "content": { + "$ref": "#/components/schemas/InterleavedContent", + "description": "The content of the chunk, using the same format as Chunk class" + }, + "metadata": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "Metadata associated with the chunk" + }, + "embedding": { + "type": "array", + "items": { + "type": "number" + }, + "description": "The embedding vector for the chunk" + } + }, + "additionalProperties": false, + "required": [ + "id", + "object", + "created_at", + "vector_store_id", + "file_id", + "content", + "metadata" + ], + "title": "VectorStoreChunkObject", + "description": "OpenAI Vector Store Chunk object." + }, + "VectorStoreListChunksResponse": { + "type": "object", + "properties": { + "object": { + "type": "string", + "default": "list", + "description": "Object type identifier, always \"list\"" + }, + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/VectorStoreChunkObject" + }, + "description": "List of vector store chunk objects" + }, + "first_id": { + "type": "string", + "description": "(Optional) ID of the first chunk in the list for pagination" + }, + "last_id": { + "type": "string", + "description": "(Optional) ID of the last chunk in the list for pagination" + }, + "has_more": { + "type": "boolean", + "default": false, + "description": "Whether there are more chunks available beyond this page" + } + }, + "additionalProperties": false, + "required": [ + "object", + "data", + "has_more" + ], + "title": "VectorStoreListChunksResponse", + "description": "Response from listing chunks in a vector store file." + }, "VectorStoreListResponse": { "type": "object", "properties": { @@ -15039,6 +15456,43 @@ "additionalProperties": false, "title": "OpenaiUpdateVectorStoreRequest" }, + "OpenaiUpdateVectorStoreChunkRequest": { + "type": "object", + "properties": { + "content": { + "$ref": "#/components/schemas/InterleavedContent", + "description": "Updated content for the chunk." + }, + "metadata": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "Updated metadata for the chunk." + } + }, + "additionalProperties": false, + "title": "OpenaiUpdateVectorStoreChunkRequest" + }, "OpenaiUpdateVectorStoreFileRequest": { "type": "object", "properties": { diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index d2c41b2bf..453a0d8ed 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -2674,6 +2674,142 @@ paths: required: true schema: type: string + /v1/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/chunks/{chunk_id}: + get: + responses: + '200': + description: >- + A VectorStoreChunkObject representing the chunk. + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreChunkObject' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - VectorIO + description: >- + Retrieve a specific chunk from a vector store file. + parameters: + - name: vector_store_id + in: path + description: >- + The ID of the vector store containing the chunk. + required: true + schema: + type: string + - name: file_id + in: path + description: The ID of the file containing the chunk. + required: true + schema: + type: string + - name: chunk_id + in: path + description: The ID of the chunk to retrieve. + required: true + schema: + type: string + post: + responses: + '200': + description: >- + A VectorStoreChunkObject representing the updated chunk. + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreChunkObject' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - VectorIO + description: >- + Update a specific chunk in a vector store file. + parameters: + - name: vector_store_id + in: path + description: >- + The ID of the vector store containing the chunk. + required: true + schema: + type: string + - name: file_id + in: path + description: The ID of the file containing the chunk. + required: true + schema: + type: string + - name: chunk_id + in: path + description: The ID of the chunk to update. + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/OpenaiUpdateVectorStoreChunkRequest' + required: true + delete: + responses: + '200': + description: >- + A VectorStoreChunkDeleteResponse indicating the deletion status. + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreChunkDeleteResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - VectorIO + description: >- + Delete a specific chunk from a vector store file. + parameters: + - name: vector_store_id + in: path + description: >- + The ID of the vector store containing the chunk. + required: true + schema: + type: string + - name: file_id + in: path + description: The ID of the file containing the chunk. + required: true + schema: + type: string + - name: chunk_id + in: path + description: The ID of the chunk to delete. + required: true + schema: + type: string /v1/openai/v1/vector_stores/{vector_store_id}/files/{file_id}: get: responses: @@ -2947,6 +3083,66 @@ paths: - Models description: List models using the OpenAI API. parameters: [] + /v1/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/chunks: + get: + responses: + '200': + description: >- + A VectorStoreListChunksResponse with the list of chunks. + content: + application/json: + schema: + $ref: '#/components/schemas/VectorStoreListChunksResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - VectorIO + description: List chunks in a vector store file. + parameters: + - name: vector_store_id + in: path + description: The ID of the vector store. + required: true + schema: + type: string + - name: file_id + in: path + description: The ID of the file. + required: true + schema: + type: string + - name: limit + in: query + description: Max number of chunks to return. + required: false + schema: + type: integer + - name: order + in: query + description: Sort order. + required: false + schema: + type: string + - name: after + in: query + description: Pagination cursor. + required: false + schema: + type: string + - name: before + in: query + description: Pagination cursor. + required: false + schema: + type: string /v1/openai/v1/files/{file_id}/content: get: responses: @@ -10609,6 +10805,30 @@ components: - deleted title: VectorStoreDeleteResponse description: Response from deleting a vector store. + VectorStoreChunkDeleteResponse: + type: object + properties: + id: + type: string + description: Unique identifier of the deleted chunk + object: + type: string + default: vector_store.file.chunk.deleted + description: >- + Object type identifier for the deletion response + deleted: + type: boolean + default: true + description: >- + Whether the deletion operation was successful + additionalProperties: false + required: + - id + - object + - deleted + title: VectorStoreChunkDeleteResponse + description: >- + Response from deleting a vector store chunk. VectorStoreFileDeleteResponse: type: object properties: @@ -10895,6 +11115,91 @@ components: required: - data title: OpenAIListModelsResponse + VectorStoreChunkObject: + type: object + properties: + id: + type: string + description: Unique identifier for the chunk + object: + type: string + default: vector_store.file.chunk + description: >- + Object type identifier, always "vector_store.file.chunk" + created_at: + type: integer + description: Timestamp when the chunk was created + vector_store_id: + type: string + description: >- + ID of the vector store containing this chunk + file_id: + type: string + description: ID of the file containing this chunk + content: + $ref: '#/components/schemas/InterleavedContent' + description: >- + The content of the chunk, using the same format as Chunk class + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: Metadata associated with the chunk + embedding: + type: array + items: + type: number + description: The embedding vector for the chunk + additionalProperties: false + required: + - id + - object + - created_at + - vector_store_id + - file_id + - content + - metadata + title: VectorStoreChunkObject + description: OpenAI Vector Store Chunk object. + VectorStoreListChunksResponse: + type: object + properties: + object: + type: string + default: list + description: Object type identifier, always "list" + data: + type: array + items: + $ref: '#/components/schemas/VectorStoreChunkObject' + description: List of vector store chunk objects + first_id: + type: string + description: >- + (Optional) ID of the first chunk in the list for pagination + last_id: + type: string + description: >- + (Optional) ID of the last chunk in the list for pagination + has_more: + type: boolean + default: false + description: >- + Whether there are more chunks available beyond this page + additionalProperties: false + required: + - object + - data + - has_more + title: VectorStoreListChunksResponse + description: >- + Response from listing chunks in a vector store file. VectorStoreListResponse: type: object properties: @@ -11141,6 +11446,25 @@ components: Set of 16 key-value pairs that can be attached to an object. additionalProperties: false title: OpenaiUpdateVectorStoreRequest + OpenaiUpdateVectorStoreChunkRequest: + type: object + properties: + content: + $ref: '#/components/schemas/InterleavedContent' + description: Updated content for the chunk. + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: Updated metadata for the chunk. + additionalProperties: false + title: OpenaiUpdateVectorStoreChunkRequest OpenaiUpdateVectorStoreFileRequest: type: object properties: diff --git a/llama_stack/apis/vector_io/vector_io.py b/llama_stack/apis/vector_io/vector_io.py index 3e8065cfb..355c0675b 100644 --- a/llama_stack/apis/vector_io/vector_io.py +++ b/llama_stack/apis/vector_io/vector_io.py @@ -426,6 +426,74 @@ class VectorStoreFileDeleteResponse(BaseModel): deleted: bool = True +@json_schema_type +class VectorStoreChunkObject(BaseModel): + """OpenAI Vector Store Chunk object. + + :param id: Unique identifier for the chunk + :param object: Object type identifier, always "vector_store.file.chunk" + :param created_at: Timestamp when the chunk was created + :param vector_store_id: ID of the vector store containing this chunk + :param file_id: ID of the file containing this chunk + :param content: The content of the chunk, using the same format as Chunk class + :param metadata: Metadata associated with the chunk + :param embedding: The embedding vector for the chunk + """ + + id: str + object: str = "vector_store.file.chunk" + created_at: int + vector_store_id: str + file_id: str + content: InterleavedContent + metadata: dict[str, Any] = Field(default_factory=dict) + embedding: list[float] | None = None + + +@json_schema_type +class VectorStoreListChunksResponse(BaseModel): + """Response from listing chunks in a vector store file. + + :param object: Object type identifier, always "list" + :param data: List of vector store chunk objects + :param first_id: (Optional) ID of the first chunk in the list for pagination + :param last_id: (Optional) ID of the last chunk in the list for pagination + :param has_more: Whether there are more chunks available beyond this page + """ + + object: str = "list" + data: list[VectorStoreChunkObject] + first_id: str | None = None + last_id: str | None = None + has_more: bool = False + + +@json_schema_type +class VectorStoreChunkUpdateRequest(BaseModel): + """Request to update a vector store chunk. + + :param content: Updated content for the chunk + :param metadata: Updated metadata for the chunk + """ + + content: InterleavedContent | None = None + metadata: dict[str, Any] | None = None + + +@json_schema_type +class VectorStoreChunkDeleteResponse(BaseModel): + """Response from deleting a vector store chunk. + + :param id: Unique identifier of the deleted chunk + :param object: Object type identifier for the deletion response + :param deleted: Whether the deletion operation was successful + """ + + id: str + object: str = "vector_store.file.chunk.deleted" + deleted: bool = True + + class VectorDBStore(Protocol): def get_vector_db(self, vector_db_id: str) -> VectorDB | None: ... @@ -638,6 +706,28 @@ class VectorIO(Protocol): """ ... + @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/chunks", method="GET") + async def openai_list_vector_store_chunks( + self, + vector_store_id: str, + file_id: str, + limit: int | None = 20, + order: str | None = "desc", + after: str | None = None, + before: str | None = None, + ) -> VectorStoreListChunksResponse: + """List chunks in a vector store file. + + :param vector_store_id: The ID of the vector store. + :param file_id: The ID of the file. + :param limit: Max number of chunks to return. + :param order: Sort order. + :param after: Pagination cursor. + :param before: Pagination cursor. + :returns: A VectorStoreListChunksResponse with the list of chunks. + """ + ... + @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/content", method="GET") async def openai_retrieve_vector_store_file_contents( self, @@ -681,3 +771,55 @@ class VectorIO(Protocol): :returns: A VectorStoreFileDeleteResponse indicating the deletion status. """ ... + + @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/chunks/{chunk_id}", method="GET") + async def openai_retrieve_vector_store_chunk( + self, + vector_store_id: str, + file_id: str, + chunk_id: str, + ) -> VectorStoreChunkObject: + """Retrieve a specific chunk from a vector store file. + + :param vector_store_id: The ID of the vector store containing the chunk. + :param file_id: The ID of the file containing the chunk. + :param chunk_id: The ID of the chunk to retrieve. + :returns: A VectorStoreChunkObject representing the chunk. + """ + ... + + @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/chunks/{chunk_id}", method="POST") + async def openai_update_vector_store_chunk( + self, + vector_store_id: str, + file_id: str, + chunk_id: str, + content: InterleavedContent | None = None, + metadata: dict[str, Any] | None = None, + ) -> VectorStoreChunkObject: + """Update a specific chunk in a vector store file. + + :param vector_store_id: The ID of the vector store containing the chunk. + :param file_id: The ID of the file containing the chunk. + :param chunk_id: The ID of the chunk to update. + :param content: Updated content for the chunk. + :param metadata: Updated metadata for the chunk. + :returns: A VectorStoreChunkObject representing the updated chunk. + """ + ... + + @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/chunks/{chunk_id}", method="DELETE") + async def openai_delete_vector_store_chunk( + self, + vector_store_id: str, + file_id: str, + chunk_id: str, + ) -> VectorStoreChunkDeleteResponse: + """Delete a specific chunk from a vector store file. + + :param vector_store_id: The ID of the vector store containing the chunk. + :param file_id: The ID of the file containing the chunk. + :param chunk_id: The ID of the chunk to delete. + :returns: A VectorStoreChunkDeleteResponse indicating the deletion status. + """ + ... diff --git a/llama_stack/core/routers/vector_io.py b/llama_stack/core/routers/vector_io.py index 3d0996c49..d32289b4d 100644 --- a/llama_stack/core/routers/vector_io.py +++ b/llama_stack/core/routers/vector_io.py @@ -17,7 +17,9 @@ from llama_stack.apis.vector_io import ( QueryChunksResponse, SearchRankingOptions, VectorIO, + VectorStoreChunkDeleteResponse, VectorStoreChunkingStrategy, + VectorStoreChunkObject, VectorStoreDeleteResponse, VectorStoreFileContentsResponse, VectorStoreFileDeleteResponse, @@ -341,6 +343,68 @@ class VectorIORouter(VectorIO): file_id=file_id, ) + async def openai_retrieve_vector_store_chunk( + self, + vector_store_id: str, + file_id: str, + chunk_id: str, + ) -> VectorStoreChunkObject: + logger.debug(f"VectorIORouter.openai_retrieve_vector_store_chunk: {vector_store_id}, {file_id}, {chunk_id}") + return await self.routing_table.openai_retrieve_vector_store_chunk( + vector_store_id=vector_store_id, + file_id=file_id, + chunk_id=chunk_id, + ) + + async def openai_update_vector_store_chunk( + self, + vector_store_id: str, + file_id: str, + chunk_id: str, + content: InterleavedContent | None = None, + metadata: dict[str, Any] | None = None, + ) -> VectorStoreChunkObject: + logger.debug(f"VectorIORouter.openai_update_vector_store_chunk: {vector_store_id}, {file_id}, {chunk_id}") + return await self.routing_table.openai_update_vector_store_chunk( + vector_store_id=vector_store_id, + file_id=file_id, + chunk_id=chunk_id, + content=content, + metadata=metadata, + ) + + async def openai_delete_vector_store_chunk( + self, + vector_store_id: str, + file_id: str, + chunk_id: str, + ) -> VectorStoreChunkDeleteResponse: + logger.debug(f"VectorIORouter.openai_delete_vector_store_chunk: {vector_store_id}, {file_id}, {chunk_id}") + return await self.routing_table.openai_delete_vector_store_chunk( + vector_store_id=vector_store_id, + file_id=file_id, + chunk_id=chunk_id, + ) + + async def openai_list_vector_store_chunks( + self, + vector_store_id: str, + file_id: str, + limit: int | None = 20, + order: str | None = "desc", + after: str | None = None, + before: str | None = None, + ): + logger.debug(f"VectorIORouter.openai_list_vector_store_chunks: {vector_store_id}, {file_id}") + return await self.routing_table.openai_list_vector_store_chunks( + vector_store_id=vector_store_id, + file_id=file_id, + limit=limit, + order=order, + after=after, + before=before, + ) + async def health(self) -> dict[str, HealthResponse]: health_statuses = {} timeout = 1 # increasing the timeout to 1 second for health checks diff --git a/llama_stack/core/routing_tables/vector_dbs.py b/llama_stack/core/routing_tables/vector_dbs.py index c81a27a3b..6d5c14920 100644 --- a/llama_stack/core/routing_tables/vector_dbs.py +++ b/llama_stack/core/routing_tables/vector_dbs.py @@ -13,13 +13,17 @@ from llama_stack.apis.models import ModelType from llama_stack.apis.resource import ResourceType from llama_stack.apis.vector_dbs import ListVectorDBsResponse, VectorDB, VectorDBs from llama_stack.apis.vector_io.vector_io import ( + InterleavedContent, SearchRankingOptions, + VectorStoreChunkDeleteResponse, VectorStoreChunkingStrategy, + VectorStoreChunkObject, VectorStoreDeleteResponse, VectorStoreFileContentsResponse, VectorStoreFileDeleteResponse, VectorStoreFileObject, VectorStoreFileStatus, + VectorStoreListChunksResponse, VectorStoreObject, VectorStoreSearchResponsePage, ) @@ -227,3 +231,69 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl, VectorDBs): vector_store_id=vector_store_id, file_id=file_id, ) + + async def openai_retrieve_vector_store_chunk( + self, + vector_store_id: str, + file_id: str, + chunk_id: str, + ) -> VectorStoreChunkObject: + await self.assert_action_allowed("read", "vector_db", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) + return await provider.openai_retrieve_vector_store_chunk( + vector_store_id=vector_store_id, + file_id=file_id, + chunk_id=chunk_id, + ) + + async def openai_update_vector_store_chunk( + self, + vector_store_id: str, + file_id: str, + chunk_id: str, + content: InterleavedContent | None = None, + metadata: dict[str, Any] | None = None, + ) -> VectorStoreChunkObject: + await self.assert_action_allowed("update", "vector_db", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) + return await provider.openai_update_vector_store_chunk( + vector_store_id=vector_store_id, + file_id=file_id, + chunk_id=chunk_id, + content=content, + metadata=metadata, + ) + + async def openai_delete_vector_store_chunk( + self, + vector_store_id: str, + file_id: str, + chunk_id: str, + ) -> VectorStoreChunkDeleteResponse: + await self.assert_action_allowed("delete", "vector_db", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) + return await provider.openai_delete_vector_store_chunk( + vector_store_id=vector_store_id, + file_id=file_id, + chunk_id=chunk_id, + ) + + async def openai_list_vector_store_chunks( + self, + vector_store_id: str, + file_id: str, + limit: int | None = 20, + order: str | None = "desc", + after: str | None = None, + before: str | None = None, + ) -> VectorStoreListChunksResponse: + await self.assert_action_allowed("read", "vector_db", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) + return await provider.openai_list_vector_store_chunks( + vector_store_id=vector_store_id, + file_id=file_id, + limit=limit, + order=order, + after=after, + before=before, + ) diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py index c0b3175b0..d2e34575e 100644 --- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py +++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py @@ -15,14 +15,17 @@ from typing import Any from llama_stack.apis.common.errors import VectorStoreNotFoundError from llama_stack.apis.files import Files, OpenAIFileObject +from llama_stack.apis.inference import InterleavedContent from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import ( Chunk, QueryChunksResponse, SearchRankingOptions, + VectorStoreChunkDeleteResponse, VectorStoreChunkingStrategy, VectorStoreChunkingStrategyAuto, VectorStoreChunkingStrategyStatic, + VectorStoreChunkObject, VectorStoreContent, VectorStoreDeleteResponse, VectorStoreFileContentsResponse, @@ -31,6 +34,7 @@ from llama_stack.apis.vector_io import ( VectorStoreFileLastError, VectorStoreFileObject, VectorStoreFileStatus, + VectorStoreListChunksResponse, VectorStoreListFilesResponse, VectorStoreListResponse, VectorStoreObject, @@ -109,7 +113,14 @@ class OpenAIVectorStoreMixin(ABC): assert self.kvstore meta_key = f"{OPENAI_VECTOR_STORES_FILES_PREFIX}{store_id}:{file_id}" await self.kvstore.set(key=meta_key, value=json.dumps(file_info)) + + # delete old file data to properly update content contents_prefix = f"{OPENAI_VECTOR_STORES_FILES_CONTENTS_PREFIX}{store_id}:{file_id}:" + end_key = f"{contents_prefix}\xff" + old_keys = await self.kvstore.keys_in_range(contents_prefix, end_key) + for old_key in old_keys: + await self.kvstore.delete(old_key) + for idx, chunk in enumerate(file_contents): await self.kvstore.set(key=f"{contents_prefix}{idx}", value=json.dumps(chunk)) @@ -791,3 +802,233 @@ class OpenAIVectorStoreMixin(ABC): id=file_id, deleted=True, ) + + async def openai_retrieve_vector_store_chunk( + self, + vector_store_id: str, + file_id: str, + chunk_id: str, + ) -> VectorStoreChunkObject: + """Retrieve a specific chunk from a vector store file.""" + if vector_store_id not in self.openai_vector_stores: + raise VectorStoreNotFoundError(vector_store_id) + + store_info = self.openai_vector_stores[vector_store_id] + if file_id not in store_info["file_ids"]: + raise ValueError(f"File {file_id} not found in vector store {vector_store_id}") + + dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id) + chunks = [Chunk.model_validate(c) for c in dict_chunks] + + target_chunk = None + for chunk in chunks: + if chunk.chunk_id == chunk_id: + target_chunk = chunk + break + + if target_chunk is None: + raise ValueError(f"Chunk {chunk_id} not found in file {file_id}") + + file_info = await self._load_openai_vector_store_file(vector_store_id, file_id) + + return VectorStoreChunkObject( + id=chunk_id, + created_at=file_info.get("created_at", int(time.time())), + vector_store_id=vector_store_id, + file_id=file_id, + content=target_chunk.content, + metadata=target_chunk.metadata, + embedding=target_chunk.embedding, + ) + + async def openai_update_vector_store_chunk( + self, + vector_store_id: str, + file_id: str, + chunk_id: str, + content: InterleavedContent | None = None, + metadata: dict[str, Any] | None = None, + ) -> VectorStoreChunkObject: + """Update a specific chunk in a vector store file.""" + if vector_store_id not in self.openai_vector_stores: + raise VectorStoreNotFoundError(vector_store_id) + + store_info = self.openai_vector_stores[vector_store_id] + if file_id not in store_info["file_ids"]: + raise ValueError(f"File {file_id} not found in vector store {vector_store_id}") + + dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id) + chunks = [Chunk.model_validate(c) for c in dict_chunks] + + target_chunk_index = None + for i, chunk in enumerate(chunks): + if chunk.chunk_id == chunk_id: + target_chunk_index = i + break + + if target_chunk_index is None: + raise ValueError(f"Chunk {chunk_id} not found in file {file_id}") + + # updating content + target_chunk = chunks[target_chunk_index] + if content is not None: + target_chunk.content = content + # delete old chunk and update + await self.delete_chunks(vector_store_id, [chunk_id]) + await self.insert_chunks(vector_store_id, [target_chunk]) + + if metadata is not None: + target_chunk.metadata.update(metadata) + + chunks[target_chunk_index] = target_chunk + dict_chunks = [c.model_dump() for c in chunks] + file_info = await self._load_openai_vector_store_file(vector_store_id, file_id) + await self._save_openai_vector_store_file(vector_store_id, file_id, file_info, dict_chunks) + + return VectorStoreChunkObject( + id=chunk_id, + created_at=file_info.get("created_at", int(time.time())), + vector_store_id=vector_store_id, + file_id=file_id, + content=target_chunk.content, + metadata=target_chunk.metadata, + embedding=target_chunk.embedding, + ) + + async def openai_delete_vector_store_chunk( + self, + vector_store_id: str, + file_id: str, + chunk_id: str, + ) -> VectorStoreChunkDeleteResponse: + """Delete a specific chunk from a vector store file.""" + if vector_store_id not in self.openai_vector_stores: + raise VectorStoreNotFoundError(vector_store_id) + + store_info = self.openai_vector_stores[vector_store_id] + if file_id not in store_info["file_ids"]: + raise ValueError(f"File {file_id} not found in vector store {vector_store_id}") + + dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id) + chunks = [Chunk.model_validate(c) for c in dict_chunks] + + target_chunk_index = None + for i, chunk in enumerate(chunks): + if chunk.chunk_id == chunk_id: + target_chunk_index = i + break + + if target_chunk_index is None: + raise ValueError(f"Chunk {chunk_id} not found in file {file_id}") + + await self.delete_chunks(vector_store_id, [chunk_id]) + + dict_chunks.pop(target_chunk_index) + file_info = await self._load_openai_vector_store_file(vector_store_id, file_id) + await self._save_openai_vector_store_file(vector_store_id, file_id, file_info, dict_chunks) + + return VectorStoreChunkDeleteResponse( + id=chunk_id, + deleted=True, + ) + + async def openai_list_vector_store_chunks( + self, + vector_store_id: str, + file_id: str, + limit: int | None = 20, + order: str | None = "desc", + after: str | None = None, + before: str | None = None, + ) -> VectorStoreListChunksResponse: + """List chunks in a vector store file.""" + if vector_store_id not in self.openai_vector_stores: + raise VectorStoreNotFoundError(vector_store_id) + + store_info = self.openai_vector_stores[vector_store_id] + if file_id not in store_info["file_ids"]: + raise ValueError(f"File {file_id} not found in vector store {vector_store_id}") + + dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id) + file_info = await self._load_openai_vector_store_file(vector_store_id, file_id) + + chunk_ids = [] + for dict_chunk in dict_chunks: + chunk = Chunk.model_validate(dict_chunk) + if chunk.chunk_id: + chunk_ids.append(chunk.chunk_id) + + # TODO: Add abstract method query_all_chunks() to properly filter by file_id and vector_db_id, this is a temporary hack + chunks = [] + if chunk_ids: + try: + file_filter = {"type": "eq", "key": "file_id", "value": file_id} + + query_result = await self.query_chunks( + vector_db_id=vector_store_id, + query="*", # wildcard query to get all chunks + params={ + "max_chunks": len(chunk_ids) * 2, + "score_threshold": 0.0, + "filters": file_filter, + }, + ) + + chunk_id_set = set(chunk_ids) + chunks = [chunk for chunk in query_result.chunks if chunk.chunk_id in chunk_id_set] + except Exception as e: + logger.warning(f"Failed to query vector database for chunks: {e}") + # Fallback to KV store chunks if vector DB query fails + chunks = [Chunk.model_validate(c) for c in dict_chunks] + + chunk_objects = [] + for chunk in chunks: + chunk_obj = VectorStoreChunkObject( + id=chunk.chunk_id, + created_at=file_info.get("created_at", int(time.time())), + vector_store_id=vector_store_id, + file_id=file_id, + content=chunk.content, + metadata=chunk.metadata, + embedding=chunk.embedding, + ) + chunk_objects.append(chunk_obj) + + if order == "desc": + chunk_objects.sort(key=lambda x: x.created_at, reverse=True) + else: + chunk_objects.sort(key=lambda x: x.created_at) + + start_idx = 0 + end_idx = len(chunk_objects) + + if after: + # find index after 'after' chunk + for i, chunk_obj in enumerate(chunk_objects): + if chunk_obj.id == after: + start_idx = i + 1 + break + + if before: + # find index before 'before' chunk + for i, chunk_obj in enumerate(chunk_objects): + if chunk_obj.id == before: + end_idx = i + break + + if limit: + if end_idx - start_idx > limit: + end_idx = start_idx + limit + + paginated_chunks = chunk_objects[start_idx:end_idx] + + first_id = paginated_chunks[0].id if paginated_chunks else None + last_id = paginated_chunks[-1].id if paginated_chunks else None + has_more = end_idx < len(chunk_objects) + + return VectorStoreListChunksResponse( + data=paginated_chunks, + first_id=first_id, + last_id=last_id, + has_more=has_more, + ) diff --git a/llama_stack/ui/components/vector-stores/vector-store-detail.tsx b/llama_stack/ui/components/vector-stores/vector-store-detail.tsx index 7c5c91dd3..bbabe13d8 100644 --- a/llama_stack/ui/components/vector-stores/vector-store-detail.tsx +++ b/llama_stack/ui/components/vector-stores/vector-store-detail.tsx @@ -1,9 +1,11 @@ "use client"; +import { useRouter } from "next/navigation"; import type { VectorStore } from "llama-stack-client/resources/vector-stores/vector-stores"; import type { VectorStoreFile } from "llama-stack-client/resources/vector-stores/files"; import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card"; import { Skeleton } from "@/components/ui/skeleton"; +import { Button } from "@/components/ui/button"; import { DetailLoadingView, DetailErrorView, @@ -42,6 +44,11 @@ export function VectorStoreDetailView({ id, }: VectorStoreDetailViewProps) { const title = "Vector Store Details"; + const router = useRouter(); + + const handleFileClick = (fileId: string) => { + router.push(`/logs/vector-stores/${id}/files/${fileId}`); + }; if (errorStore) { return ; @@ -80,7 +87,15 @@ export function VectorStoreDetailView({ {files.map((file) => ( - {file.id} + + + {file.status} {new Date(file.created_at * 1000).toLocaleString()} diff --git a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py index 98889f38e..9a2bf436f 100644 --- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py +++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py @@ -11,6 +11,7 @@ from unittest.mock import AsyncMock import numpy as np import pytest +from llama_stack.apis.common.errors import VectorStoreNotFoundError from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import Chunk, QueryChunksResponse from llama_stack.providers.remote.vector_io.milvus.milvus import VECTOR_DBS_PREFIX @@ -294,3 +295,509 @@ async def test_delete_openai_vector_store_file_from_storage(vector_io_adapter, t assert loaded_file_info == {} loaded_contents = await vector_io_adapter._load_openai_vector_store_file_contents(store_id, file_id) assert loaded_contents == [] + + +async def test_openai_retrieve_vector_store_chunk(vector_io_adapter): + """Test retrieving a specific chunk from a vector store file.""" + store_id = "vs_1234" + file_id = "file_1234" + chunk_id = "chunk_001" + + store_info = { + "id": store_id, + "file_ids": [file_id], + "created_at": int(time.time()), + } + vector_io_adapter.openai_vector_stores[store_id] = store_info + + file_info = { + "id": file_id, + "status": "completed", + "vector_store_id": store_id, + "filename": "test_file.txt", + "created_at": int(time.time()), + } + + file_contents = [ + { + "content": "First chunk content", + "stored_chunk_id": chunk_id, + "metadata": {"file_id": file_id, "position": 0}, + "chunk_metadata": {"chunk_id": chunk_id}, + }, + { + "content": "Second chunk content", + "stored_chunk_id": "chunk_002", + "metadata": {"file_id": file_id, "position": 1}, + "chunk_metadata": {"chunk_id": "chunk_002"}, + }, + ] + + await vector_io_adapter._save_openai_vector_store_file(store_id, file_id, file_info, file_contents) + + chunk_object = await vector_io_adapter.openai_retrieve_vector_store_chunk( + vector_store_id=store_id, file_id=file_id, chunk_id=chunk_id + ) + + assert chunk_object.id == chunk_id + assert chunk_object.vector_store_id == store_id + assert chunk_object.file_id == file_id + assert chunk_object.object == "vector_store.file.chunk" + assert len(chunk_object.content) > 0 + assert chunk_object.content[0].type == "text" + assert chunk_object.content[0].text == "First chunk content" + assert chunk_object.metadata["file_id"] == file_id + assert chunk_object.metadata["position"] == 0 + + +async def test_openai_retrieve_vector_store_chunk_not_found(vector_io_adapter): + """Test retrieving a non-existent chunk raises appropriate error.""" + store_id = "vs_1234" + file_id = "file_1234" + chunk_id = "nonexistent_chunk" + + store_info = { + "id": store_id, + "file_ids": [file_id], + "created_at": int(time.time()), + } + vector_io_adapter.openai_vector_stores[store_id] = store_info + + file_info = {"id": file_id, "created_at": int(time.time())} + await vector_io_adapter._save_openai_vector_store_file(store_id, file_id, file_info, []) + + with pytest.raises(ValueError, match="Chunk nonexistent_chunk not found"): + await vector_io_adapter.openai_retrieve_vector_store_chunk( + vector_store_id=store_id, file_id=file_id, chunk_id=chunk_id + ) + + +async def test_openai_update_vector_store_chunk_metadata_only(vector_io_adapter): + """Test updating only the metadata of a chunk.""" + store_id = "vs_1234" + file_id = "file_1234" + chunk_id = "chunk_001" + + store_info = { + "id": store_id, + "file_ids": [file_id], + "created_at": int(time.time()), + } + vector_io_adapter.openai_vector_stores[store_id] = store_info + + file_info = { + "id": file_id, + "status": "completed", + "vector_store_id": store_id, + "filename": "test_file.txt", + "created_at": int(time.time()), + } + + original_content = "Original chunk content" + file_contents = [ + { + "content": original_content, + "stored_chunk_id": chunk_id, + "metadata": {"file_id": file_id, "version": 1}, + "chunk_metadata": {"chunk_id": chunk_id}, + } + ] + + await vector_io_adapter._save_openai_vector_store_file(store_id, file_id, file_info, file_contents) + + vector_io_adapter.delete_chunks = AsyncMock() + vector_io_adapter.insert_chunks = AsyncMock() + + new_metadata = {"file_id": file_id, "version": 2, "updated": True} + updated_chunk = await vector_io_adapter.openai_update_vector_store_chunk( + vector_store_id=store_id, file_id=file_id, chunk_id=chunk_id, metadata=new_metadata + ) + + vector_io_adapter.delete_chunks.assert_not_called() + vector_io_adapter.insert_chunks.assert_not_called() + + assert updated_chunk.id == chunk_id + assert updated_chunk.metadata["version"] == 2 + assert updated_chunk.metadata["updated"] is True + assert updated_chunk.content[0].text == original_content + + +async def test_openai_update_vector_store_chunk_content(vector_io_adapter): + """Test updating the content of a chunk.""" + store_id = "vs_1234" + file_id = "file_1234" + chunk_id = "chunk_001" + + store_info = { + "id": store_id, + "file_ids": [file_id], + "created_at": int(time.time()), + } + vector_io_adapter.openai_vector_stores[store_id] = store_info + + file_info = { + "id": file_id, + "status": "completed", + "vector_store_id": store_id, + "filename": "test_file.txt", + "created_at": int(time.time()), + } + + file_contents = [ + { + "content": "Original chunk content", + "stored_chunk_id": chunk_id, + "metadata": {"file_id": file_id}, + "chunk_metadata": {"chunk_id": chunk_id}, + } + ] + + await vector_io_adapter._save_openai_vector_store_file(store_id, file_id, file_info, file_contents) + + vector_io_adapter.delete_chunks = AsyncMock() + vector_io_adapter.insert_chunks = AsyncMock() + + new_content = "Updated chunk content" + updated_chunk = await vector_io_adapter.openai_update_vector_store_chunk( + vector_store_id=store_id, file_id=file_id, chunk_id=chunk_id, content=new_content + ) + + vector_io_adapter.delete_chunks.assert_awaited_once_with(store_id, [chunk_id]) + vector_io_adapter.insert_chunks.assert_awaited_once() + + assert updated_chunk.id == chunk_id + assert updated_chunk.content[0].text == new_content + + +async def test_openai_update_vector_store_chunk_both_content_and_metadata(vector_io_adapter): + """Test updating both content and metadata of a chunk.""" + store_id = "vs_1234" + file_id = "file_1234" + chunk_id = "chunk_001" + + store_info = { + "id": store_id, + "file_ids": [file_id], + "created_at": int(time.time()), + } + vector_io_adapter.openai_vector_stores[store_id] = store_info + + file_info = { + "id": file_id, + "status": "completed", + "vector_store_id": store_id, + "filename": "test_file.txt", + "created_at": int(time.time()), + } + + file_contents = [ + { + "content": "Original chunk content", + "stored_chunk_id": chunk_id, + "metadata": {"file_id": file_id, "version": 1}, + "chunk_metadata": {"chunk_id": chunk_id}, + } + ] + + await vector_io_adapter._save_openai_vector_store_file(store_id, file_id, file_info, file_contents) + + vector_io_adapter.delete_chunks = AsyncMock() + vector_io_adapter.insert_chunks = AsyncMock() + + new_content = "Updated chunk content" + new_metadata = {"file_id": file_id, "version": 2, "updated": True} + updated_chunk = await vector_io_adapter.openai_update_vector_store_chunk( + vector_store_id=store_id, file_id=file_id, chunk_id=chunk_id, content=new_content, metadata=new_metadata + ) + + vector_io_adapter.delete_chunks.assert_awaited_once_with(store_id, [chunk_id]) + vector_io_adapter.insert_chunks.assert_awaited_once() + + assert updated_chunk.id == chunk_id + assert updated_chunk.content[0].text == new_content + assert updated_chunk.metadata["version"] == 2 + assert updated_chunk.metadata["updated"] is True + + +async def test_openai_delete_vector_store_chunk(vector_io_adapter): + """Test deleting a specific chunk from a vector store file.""" + store_id = "vs_1234" + file_id = "file_1234" + chunk_id_to_delete = "chunk_001" + chunk_id_to_keep = "chunk_002" + + store_info = { + "id": store_id, + "file_ids": [file_id], + "created_at": int(time.time()), + } + vector_io_adapter.openai_vector_stores[store_id] = store_info + + file_info = { + "id": file_id, + "status": "completed", + "vector_store_id": store_id, + "filename": "test_file.txt", + "created_at": int(time.time()), + } + + file_contents = [ + { + "content": "First chunk content", + "stored_chunk_id": chunk_id_to_delete, + "metadata": {"file_id": file_id, "position": 0}, + "chunk_metadata": {"chunk_id": chunk_id_to_delete}, + }, + { + "content": "Second chunk content", + "stored_chunk_id": chunk_id_to_keep, + "metadata": {"file_id": file_id, "position": 1}, + "chunk_metadata": {"chunk_id": chunk_id_to_keep}, + }, + ] + + await vector_io_adapter._save_openai_vector_store_file(store_id, file_id, file_info, file_contents) + + vector_io_adapter.delete_chunks = AsyncMock() + + delete_response = await vector_io_adapter.openai_delete_vector_store_chunk( + vector_store_id=store_id, file_id=file_id, chunk_id=chunk_id_to_delete + ) + + vector_io_adapter.delete_chunks.assert_awaited_once_with(store_id, [chunk_id_to_delete]) + + assert delete_response.id == chunk_id_to_delete + assert delete_response.object == "vector_store.file.chunk.deleted" + assert delete_response.deleted is True + + remaining_contents = await vector_io_adapter._load_openai_vector_store_file_contents(store_id, file_id) + assert len(remaining_contents) == 1 + assert remaining_contents[0]["stored_chunk_id"] == chunk_id_to_keep + + +async def test_openai_delete_vector_store_chunk_not_found(vector_io_adapter): + """Test deleting a non-existent chunk raises appropriate error.""" + store_id = "vs_1234" + file_id = "file_1234" + chunk_id = "nonexistent_chunk" + + store_info = { + "id": store_id, + "file_ids": [file_id], + "created_at": int(time.time()), + } + vector_io_adapter.openai_vector_stores[store_id] = store_info + + file_info = {"id": file_id, "created_at": int(time.time())} + await vector_io_adapter._save_openai_vector_store_file(store_id, file_id, file_info, []) + + with pytest.raises(ValueError, match="Chunk nonexistent_chunk not found"): + await vector_io_adapter.openai_delete_vector_store_chunk( + vector_store_id=store_id, file_id=file_id, chunk_id=chunk_id + ) + + +async def test_chunk_operations_with_nonexistent_vector_store(vector_io_adapter): + """Test that chunk operations raise errors for non-existent vector stores.""" + + store_id = "nonexistent_store" + file_id = "file_1234" + chunk_id = "chunk_001" + + with pytest.raises(VectorStoreNotFoundError): + await vector_io_adapter.openai_retrieve_vector_store_chunk( + vector_store_id=store_id, file_id=file_id, chunk_id=chunk_id + ) + + with pytest.raises(VectorStoreNotFoundError): + await vector_io_adapter.openai_update_vector_store_chunk( + vector_store_id=store_id, file_id=file_id, chunk_id=chunk_id, metadata={"test": "value"} + ) + + with pytest.raises(VectorStoreNotFoundError): + await vector_io_adapter.openai_delete_vector_store_chunk( + vector_store_id=store_id, file_id=file_id, chunk_id=chunk_id + ) + + +async def test_chunk_operations_with_nonexistent_file(vector_io_adapter): + """Test that chunk operations raise errors for non-existent files.""" + store_id = "vs_1234" + file_id = "nonexistent_file" + chunk_id = "chunk_001" + + store_info = { + "id": store_id, + "file_ids": [], + "created_at": int(time.time()), + } + vector_io_adapter.openai_vector_stores[store_id] = store_info + + with pytest.raises(ValueError, match=f"File {file_id} not found in vector store"): + await vector_io_adapter.openai_retrieve_vector_store_chunk( + vector_store_id=store_id, file_id=file_id, chunk_id=chunk_id + ) + + with pytest.raises(ValueError, match=f"File {file_id} not found in vector store"): + await vector_io_adapter.openai_update_vector_store_chunk( + vector_store_id=store_id, file_id=file_id, chunk_id=chunk_id, metadata={"test": "value"} + ) + + with pytest.raises(ValueError, match=f"File {file_id} not found in vector store"): + await vector_io_adapter.openai_delete_vector_store_chunk( + vector_store_id=store_id, file_id=file_id, chunk_id=chunk_id + ) + + with pytest.raises(ValueError, match=f"File {file_id} not found in vector store"): + await vector_io_adapter.openai_list_vector_store_chunks(vector_store_id=store_id, file_id=file_id) + + +async def test_openai_list_vector_store_chunks(vector_io_adapter): + """Test listing chunks in a vector store file.""" + store_id = "test_store_123" + await vector_io_adapter.openai_create_vector_store( + vector_store_id=store_id, + name="Test Store", + embedding_model="test_model", + embedding_dimension=512, + ) + + test_content = "This is test content for chunk listing." + test_metadata = {"source": "test_file", "chunk_number": 1} + test_embedding = [0.1] * 512 + + chunk1 = Chunk( + content=test_content + " First chunk.", + metadata={**test_metadata, "chunk_id": 1}, + embedding=test_embedding, + chunk_id="chunk_1", + ) + chunk2 = Chunk( + content=test_content + " Second chunk.", + metadata={**test_metadata, "chunk_id": 2}, + embedding=[0.2] * 512, + chunk_id="chunk_2", + ) + chunk3 = Chunk( + content=test_content + " Third chunk.", + metadata={**test_metadata, "chunk_id": 3}, + embedding=[0.3] * 512, + chunk_id="chunk_3", + ) + + await vector_io_adapter.insert_chunks(store_id, [chunk1, chunk2, chunk3]) + + file_id = "test_file_456" + file_info = { + "id": file_id, + "object": "vector_store.file", + "created_at": int(time.time()), + "vector_store_id": store_id, + "status": "completed", + "usage_bytes": 1024, + "chunking_strategy": {"type": "static", "static": {"max_chunk_size_tokens": 800, "chunk_overlap_tokens": 400}}, + "filename": "test_file.txt", + } + + dict_chunks = [chunk1.model_dump(), chunk2.model_dump(), chunk3.model_dump()] + await vector_io_adapter._save_openai_vector_store_file(store_id, file_id, file_info, dict_chunks) + + vector_io_adapter.openai_vector_stores[store_id]["file_ids"].append(file_id) + + response = await vector_io_adapter.openai_list_vector_store_chunks(vector_store_id=store_id, file_id=file_id) + + assert response.object == "list" + assert len(response.data) == 3 + assert response.has_more is False + assert response.first_id is not None + assert response.last_id is not None + + chunk_ids = [chunk.id for chunk in response.data] + assert "chunk_1" in chunk_ids + assert "chunk_2" in chunk_ids + assert "chunk_3" in chunk_ids + + for chunk in response.data: + assert chunk.embedding is not None + assert len(chunk.embedding) == 512 + assert chunk.vector_store_id == store_id + assert chunk.file_id == file_id + + limited_response = await vector_io_adapter.openai_list_vector_store_chunks( + vector_store_id=store_id, file_id=file_id, limit=2 + ) + + assert len(limited_response.data) == 2 + assert limited_response.has_more is True + + desc_response = await vector_io_adapter.openai_list_vector_store_chunks( + vector_store_id=store_id, file_id=file_id, order="desc" + ) + + assert len(desc_response.data) == 3 + + asc_response = await vector_io_adapter.openai_list_vector_store_chunks( + vector_store_id=store_id, file_id=file_id, order="asc" + ) + + assert len(asc_response.data) == 3 + + first_chunk_id = response.data[0].id + after_response = await vector_io_adapter.openai_list_vector_store_chunks( + vector_store_id=store_id, file_id=file_id, after=first_chunk_id + ) + + assert len(after_response.data) <= 2 + after_chunk_ids = [chunk.id for chunk in after_response.data] + assert first_chunk_id not in after_chunk_ids + + +async def test_openai_list_vector_store_chunks_empty_file(vector_io_adapter): + """Test listing chunks in an empty file.""" + store_id = "test_store_empty" + await vector_io_adapter.openai_create_vector_store( + vector_store_id=store_id, + name="Test Store", + embedding_model="test_model", + embedding_dimension=512, + ) + + file_id = "empty_file" + file_info = { + "id": file_id, + "object": "vector_store.file", + "created_at": int(time.time()), + "vector_store_id": store_id, + "status": "completed", + "usage_bytes": 0, + "chunking_strategy": {"type": "static", "static": {"max_chunk_size_tokens": 800, "chunk_overlap_tokens": 400}}, + "filename": "empty_file.txt", + } + + await vector_io_adapter._save_openai_vector_store_file(store_id, file_id, file_info, []) + + vector_io_adapter.openai_vector_stores[store_id]["file_ids"].append(file_id) + + response = await vector_io_adapter.openai_list_vector_store_chunks(vector_store_id=store_id, file_id=file_id) + + assert response.object == "list" + assert len(response.data) == 0 + assert response.has_more is False + assert response.first_id is None + assert response.last_id is None + + +async def test_openai_list_vector_store_chunks_nonexistent_resources(vector_io_adapter): + with pytest.raises(VectorStoreNotFoundError): + await vector_io_adapter.openai_list_vector_store_chunks(vector_store_id="nonexistent_store", file_id="any_file") + + store_id = "test_store_list" + await vector_io_adapter.openai_create_vector_store( + vector_store_id=store_id, + name="Test Store", + embedding_model="test_model", + embedding_dimension=512, + ) + + with pytest.raises(ValueError, match="File nonexistent_file not found in vector store"): + await vector_io_adapter.openai_list_vector_store_chunks(vector_store_id=store_id, file_id="nonexistent_file")