diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index d480ff592..ac06ec56e 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -3827,6 +3827,195 @@
]
}
},
+ "/v1/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/chunks/{chunk_id}": {
+ "get": {
+ "responses": {
+ "200": {
+ "description": "A VectorStoreChunkObject representing the chunk.",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/VectorStoreChunkObject"
+ }
+ }
+ }
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
+ }
+ },
+ "tags": [
+ "VectorIO"
+ ],
+ "description": "Retrieve a specific chunk from a vector store file.",
+ "parameters": [
+ {
+ "name": "vector_store_id",
+ "in": "path",
+ "description": "The ID of the vector store containing the chunk.",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ },
+ {
+ "name": "file_id",
+ "in": "path",
+ "description": "The ID of the file containing the chunk.",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ },
+ {
+ "name": "chunk_id",
+ "in": "path",
+ "description": "The ID of the chunk to retrieve.",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ]
+ },
+ "post": {
+ "responses": {
+ "200": {
+ "description": "A VectorStoreChunkObject representing the updated chunk.",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/VectorStoreChunkObject"
+ }
+ }
+ }
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
+ }
+ },
+ "tags": [
+ "VectorIO"
+ ],
+ "description": "Update a specific chunk in a vector store file.",
+ "parameters": [
+ {
+ "name": "vector_store_id",
+ "in": "path",
+ "description": "The ID of the vector store containing the chunk.",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ },
+ {
+ "name": "file_id",
+ "in": "path",
+ "description": "The ID of the file containing the chunk.",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ },
+ {
+ "name": "chunk_id",
+ "in": "path",
+ "description": "The ID of the chunk to update.",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
+ "requestBody": {
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/OpenaiUpdateVectorStoreChunkRequest"
+ }
+ }
+ },
+ "required": true
+ }
+ },
+ "delete": {
+ "responses": {
+ "200": {
+ "description": "A VectorStoreChunkDeleteResponse indicating the deletion status.",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/VectorStoreChunkDeleteResponse"
+ }
+ }
+ }
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
+ }
+ },
+ "tags": [
+ "VectorIO"
+ ],
+ "description": "Delete a specific chunk from a vector store file.",
+ "parameters": [
+ {
+ "name": "vector_store_id",
+ "in": "path",
+ "description": "The ID of the vector store containing the chunk.",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ },
+ {
+ "name": "file_id",
+ "in": "path",
+ "description": "The ID of the file containing the chunk.",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ },
+ {
+ "name": "chunk_id",
+ "in": "path",
+ "description": "The ID of the chunk to delete.",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ]
+ }
+ },
"/v1/openai/v1/vector_stores/{vector_store_id}/files/{file_id}": {
"get": {
"responses": {
@@ -4189,6 +4378,94 @@
"parameters": []
}
},
+ "/v1/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/chunks": {
+ "get": {
+ "responses": {
+ "200": {
+ "description": "A VectorStoreListChunksResponse with the list of chunks.",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/VectorStoreListChunksResponse"
+ }
+ }
+ }
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
+ }
+ },
+ "tags": [
+ "VectorIO"
+ ],
+ "description": "List chunks in a vector store file.",
+ "parameters": [
+ {
+ "name": "vector_store_id",
+ "in": "path",
+ "description": "The ID of the vector store.",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ },
+ {
+ "name": "file_id",
+ "in": "path",
+ "description": "The ID of the file.",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ },
+ {
+ "name": "limit",
+ "in": "query",
+ "description": "Max number of chunks to return.",
+ "required": false,
+ "schema": {
+ "type": "integer"
+ }
+ },
+ {
+ "name": "order",
+ "in": "query",
+ "description": "Sort order.",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ },
+ {
+ "name": "after",
+ "in": "query",
+ "description": "Pagination cursor.",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ },
+ {
+ "name": "before",
+ "in": "query",
+ "description": "Pagination cursor.",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ]
+ }
+ },
"/v1/openai/v1/files/{file_id}/content": {
"get": {
"responses": {
@@ -14428,6 +14705,33 @@
"title": "VectorStoreDeleteResponse",
"description": "Response from deleting a vector store."
},
+ "VectorStoreChunkDeleteResponse": {
+ "type": "object",
+ "properties": {
+ "id": {
+ "type": "string",
+ "description": "Unique identifier of the deleted chunk"
+ },
+ "object": {
+ "type": "string",
+ "default": "vector_store.file.chunk.deleted",
+ "description": "Object type identifier for the deletion response"
+ },
+ "deleted": {
+ "type": "boolean",
+ "default": true,
+ "description": "Whether the deletion operation was successful"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "id",
+ "object",
+ "deleted"
+ ],
+ "title": "VectorStoreChunkDeleteResponse",
+ "description": "Response from deleting a vector store chunk."
+ },
"VectorStoreFileDeleteResponse": {
"type": "object",
"properties": {
@@ -14768,6 +15072,119 @@
],
"title": "OpenAIListModelsResponse"
},
+ "VectorStoreChunkObject": {
+ "type": "object",
+ "properties": {
+ "id": {
+ "type": "string",
+ "description": "Unique identifier for the chunk"
+ },
+ "object": {
+ "type": "string",
+ "default": "vector_store.file.chunk",
+ "description": "Object type identifier, always \"vector_store.file.chunk\""
+ },
+ "created_at": {
+ "type": "integer",
+ "description": "Timestamp when the chunk was created"
+ },
+ "vector_store_id": {
+ "type": "string",
+ "description": "ID of the vector store containing this chunk"
+ },
+ "file_id": {
+ "type": "string",
+ "description": "ID of the file containing this chunk"
+ },
+ "content": {
+ "$ref": "#/components/schemas/InterleavedContent",
+ "description": "The content of the chunk, using the same format as Chunk class"
+ },
+ "metadata": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ },
+ "description": "Metadata associated with the chunk"
+ },
+ "embedding": {
+ "type": "array",
+ "items": {
+ "type": "number"
+ },
+ "description": "The embedding vector for the chunk"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "id",
+ "object",
+ "created_at",
+ "vector_store_id",
+ "file_id",
+ "content",
+ "metadata"
+ ],
+ "title": "VectorStoreChunkObject",
+ "description": "OpenAI Vector Store Chunk object."
+ },
+ "VectorStoreListChunksResponse": {
+ "type": "object",
+ "properties": {
+ "object": {
+ "type": "string",
+ "default": "list",
+ "description": "Object type identifier, always \"list\""
+ },
+ "data": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/VectorStoreChunkObject"
+ },
+ "description": "List of vector store chunk objects"
+ },
+ "first_id": {
+ "type": "string",
+ "description": "(Optional) ID of the first chunk in the list for pagination"
+ },
+ "last_id": {
+ "type": "string",
+ "description": "(Optional) ID of the last chunk in the list for pagination"
+ },
+ "has_more": {
+ "type": "boolean",
+ "default": false,
+ "description": "Whether there are more chunks available beyond this page"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "object",
+ "data",
+ "has_more"
+ ],
+ "title": "VectorStoreListChunksResponse",
+ "description": "Response from listing chunks in a vector store file."
+ },
"VectorStoreListResponse": {
"type": "object",
"properties": {
@@ -15116,6 +15533,43 @@
"additionalProperties": false,
"title": "OpenaiUpdateVectorStoreRequest"
},
+ "OpenaiUpdateVectorStoreChunkRequest": {
+ "type": "object",
+ "properties": {
+ "content": {
+ "$ref": "#/components/schemas/InterleavedContent",
+ "description": "Updated content for the chunk."
+ },
+ "metadata": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ },
+ "description": "Updated metadata for the chunk."
+ }
+ },
+ "additionalProperties": false,
+ "title": "OpenaiUpdateVectorStoreChunkRequest"
+ },
"OpenaiUpdateVectorStoreFileRequest": {
"type": "object",
"properties": {
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index 9c0fba554..f00c1d636 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -2699,6 +2699,142 @@ paths:
required: true
schema:
type: string
+ /v1/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/chunks/{chunk_id}:
+ get:
+ responses:
+ '200':
+ description: >-
+ A VectorStoreChunkObject representing the chunk.
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/VectorStoreChunkObject'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
+ tags:
+ - VectorIO
+ description: >-
+ Retrieve a specific chunk from a vector store file.
+ parameters:
+ - name: vector_store_id
+ in: path
+ description: >-
+ The ID of the vector store containing the chunk.
+ required: true
+ schema:
+ type: string
+ - name: file_id
+ in: path
+ description: The ID of the file containing the chunk.
+ required: true
+ schema:
+ type: string
+ - name: chunk_id
+ in: path
+ description: The ID of the chunk to retrieve.
+ required: true
+ schema:
+ type: string
+ post:
+ responses:
+ '200':
+ description: >-
+ A VectorStoreChunkObject representing the updated chunk.
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/VectorStoreChunkObject'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
+ tags:
+ - VectorIO
+ description: >-
+ Update a specific chunk in a vector store file.
+ parameters:
+ - name: vector_store_id
+ in: path
+ description: >-
+ The ID of the vector store containing the chunk.
+ required: true
+ schema:
+ type: string
+ - name: file_id
+ in: path
+ description: The ID of the file containing the chunk.
+ required: true
+ schema:
+ type: string
+ - name: chunk_id
+ in: path
+ description: The ID of the chunk to update.
+ required: true
+ schema:
+ type: string
+ requestBody:
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/OpenaiUpdateVectorStoreChunkRequest'
+ required: true
+ delete:
+ responses:
+ '200':
+ description: >-
+ A VectorStoreChunkDeleteResponse indicating the deletion status.
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/VectorStoreChunkDeleteResponse'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
+ tags:
+ - VectorIO
+ description: >-
+ Delete a specific chunk from a vector store file.
+ parameters:
+ - name: vector_store_id
+ in: path
+ description: >-
+ The ID of the vector store containing the chunk.
+ required: true
+ schema:
+ type: string
+ - name: file_id
+ in: path
+ description: The ID of the file containing the chunk.
+ required: true
+ schema:
+ type: string
+ - name: chunk_id
+ in: path
+ description: The ID of the chunk to delete.
+ required: true
+ schema:
+ type: string
/v1/openai/v1/vector_stores/{vector_store_id}/files/{file_id}:
get:
responses:
@@ -2972,6 +3108,66 @@ paths:
- Models
description: List models using the OpenAI API.
parameters: []
+ /v1/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/chunks:
+ get:
+ responses:
+ '200':
+ description: >-
+ A VectorStoreListChunksResponse with the list of chunks.
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/VectorStoreListChunksResponse'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
+ tags:
+ - VectorIO
+ description: List chunks in a vector store file.
+ parameters:
+ - name: vector_store_id
+ in: path
+ description: The ID of the vector store.
+ required: true
+ schema:
+ type: string
+ - name: file_id
+ in: path
+ description: The ID of the file.
+ required: true
+ schema:
+ type: string
+ - name: limit
+ in: query
+ description: Max number of chunks to return.
+ required: false
+ schema:
+ type: integer
+ - name: order
+ in: query
+ description: Sort order.
+ required: false
+ schema:
+ type: string
+ - name: after
+ in: query
+ description: Pagination cursor.
+ required: false
+ schema:
+ type: string
+ - name: before
+ in: query
+ description: Pagination cursor.
+ required: false
+ schema:
+ type: string
/v1/openai/v1/files/{file_id}/content:
get:
responses:
@@ -10664,6 +10860,30 @@ components:
- deleted
title: VectorStoreDeleteResponse
description: Response from deleting a vector store.
+ VectorStoreChunkDeleteResponse:
+ type: object
+ properties:
+ id:
+ type: string
+ description: Unique identifier of the deleted chunk
+ object:
+ type: string
+ default: vector_store.file.chunk.deleted
+ description: >-
+ Object type identifier for the deletion response
+ deleted:
+ type: boolean
+ default: true
+ description: >-
+ Whether the deletion operation was successful
+ additionalProperties: false
+ required:
+ - id
+ - object
+ - deleted
+ title: VectorStoreChunkDeleteResponse
+ description: >-
+ Response from deleting a vector store chunk.
VectorStoreFileDeleteResponse:
type: object
properties:
@@ -10950,6 +11170,91 @@ components:
required:
- data
title: OpenAIListModelsResponse
+ VectorStoreChunkObject:
+ type: object
+ properties:
+ id:
+ type: string
+ description: Unique identifier for the chunk
+ object:
+ type: string
+ default: vector_store.file.chunk
+ description: >-
+ Object type identifier, always "vector_store.file.chunk"
+ created_at:
+ type: integer
+ description: Timestamp when the chunk was created
+ vector_store_id:
+ type: string
+ description: >-
+ ID of the vector store containing this chunk
+ file_id:
+ type: string
+ description: ID of the file containing this chunk
+ content:
+ $ref: '#/components/schemas/InterleavedContent'
+ description: >-
+ The content of the chunk, using the same format as Chunk class
+ metadata:
+ type: object
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ description: Metadata associated with the chunk
+ embedding:
+ type: array
+ items:
+ type: number
+ description: The embedding vector for the chunk
+ additionalProperties: false
+ required:
+ - id
+ - object
+ - created_at
+ - vector_store_id
+ - file_id
+ - content
+ - metadata
+ title: VectorStoreChunkObject
+ description: OpenAI Vector Store Chunk object.
+ VectorStoreListChunksResponse:
+ type: object
+ properties:
+ object:
+ type: string
+ default: list
+ description: Object type identifier, always "list"
+ data:
+ type: array
+ items:
+ $ref: '#/components/schemas/VectorStoreChunkObject'
+ description: List of vector store chunk objects
+ first_id:
+ type: string
+ description: >-
+ (Optional) ID of the first chunk in the list for pagination
+ last_id:
+ type: string
+ description: >-
+ (Optional) ID of the last chunk in the list for pagination
+ has_more:
+ type: boolean
+ default: false
+ description: >-
+ Whether there are more chunks available beyond this page
+ additionalProperties: false
+ required:
+ - object
+ - data
+ - has_more
+ title: VectorStoreListChunksResponse
+ description: >-
+ Response from listing chunks in a vector store file.
VectorStoreListResponse:
type: object
properties:
@@ -11196,6 +11501,25 @@ components:
Set of 16 key-value pairs that can be attached to an object.
additionalProperties: false
title: OpenaiUpdateVectorStoreRequest
+ OpenaiUpdateVectorStoreChunkRequest:
+ type: object
+ properties:
+ content:
+ $ref: '#/components/schemas/InterleavedContent'
+ description: Updated content for the chunk.
+ metadata:
+ type: object
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ description: Updated metadata for the chunk.
+ additionalProperties: false
+ title: OpenaiUpdateVectorStoreChunkRequest
OpenaiUpdateVectorStoreFileRequest:
type: object
properties:
diff --git a/llama_stack/apis/vector_io/vector_io.py b/llama_stack/apis/vector_io/vector_io.py
index 3e8065cfb..355c0675b 100644
--- a/llama_stack/apis/vector_io/vector_io.py
+++ b/llama_stack/apis/vector_io/vector_io.py
@@ -426,6 +426,74 @@ class VectorStoreFileDeleteResponse(BaseModel):
deleted: bool = True
+@json_schema_type
+class VectorStoreChunkObject(BaseModel):
+ """OpenAI Vector Store Chunk object.
+
+ :param id: Unique identifier for the chunk
+ :param object: Object type identifier, always "vector_store.file.chunk"
+ :param created_at: Timestamp when the chunk was created
+ :param vector_store_id: ID of the vector store containing this chunk
+ :param file_id: ID of the file containing this chunk
+ :param content: The content of the chunk, using the same format as Chunk class
+ :param metadata: Metadata associated with the chunk
+ :param embedding: The embedding vector for the chunk
+ """
+
+ id: str
+ object: str = "vector_store.file.chunk"
+ created_at: int
+ vector_store_id: str
+ file_id: str
+ content: InterleavedContent
+ metadata: dict[str, Any] = Field(default_factory=dict)
+ embedding: list[float] | None = None
+
+
+@json_schema_type
+class VectorStoreListChunksResponse(BaseModel):
+ """Response from listing chunks in a vector store file.
+
+ :param object: Object type identifier, always "list"
+ :param data: List of vector store chunk objects
+ :param first_id: (Optional) ID of the first chunk in the list for pagination
+ :param last_id: (Optional) ID of the last chunk in the list for pagination
+ :param has_more: Whether there are more chunks available beyond this page
+ """
+
+ object: str = "list"
+ data: list[VectorStoreChunkObject]
+ first_id: str | None = None
+ last_id: str | None = None
+ has_more: bool = False
+
+
+@json_schema_type
+class VectorStoreChunkUpdateRequest(BaseModel):
+ """Request to update a vector store chunk.
+
+ :param content: Updated content for the chunk
+ :param metadata: Updated metadata for the chunk
+ """
+
+ content: InterleavedContent | None = None
+ metadata: dict[str, Any] | None = None
+
+
+@json_schema_type
+class VectorStoreChunkDeleteResponse(BaseModel):
+ """Response from deleting a vector store chunk.
+
+ :param id: Unique identifier of the deleted chunk
+ :param object: Object type identifier for the deletion response
+ :param deleted: Whether the deletion operation was successful
+ """
+
+ id: str
+ object: str = "vector_store.file.chunk.deleted"
+ deleted: bool = True
+
+
class VectorDBStore(Protocol):
def get_vector_db(self, vector_db_id: str) -> VectorDB | None: ...
@@ -638,6 +706,28 @@ class VectorIO(Protocol):
"""
...
+ @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/chunks", method="GET")
+ async def openai_list_vector_store_chunks(
+ self,
+ vector_store_id: str,
+ file_id: str,
+ limit: int | None = 20,
+ order: str | None = "desc",
+ after: str | None = None,
+ before: str | None = None,
+ ) -> VectorStoreListChunksResponse:
+ """List chunks in a vector store file.
+
+ :param vector_store_id: The ID of the vector store.
+ :param file_id: The ID of the file.
+ :param limit: Max number of chunks to return.
+ :param order: Sort order.
+ :param after: Pagination cursor.
+ :param before: Pagination cursor.
+ :returns: A VectorStoreListChunksResponse with the list of chunks.
+ """
+ ...
+
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/content", method="GET")
async def openai_retrieve_vector_store_file_contents(
self,
@@ -681,3 +771,55 @@ class VectorIO(Protocol):
:returns: A VectorStoreFileDeleteResponse indicating the deletion status.
"""
...
+
+ @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/chunks/{chunk_id}", method="GET")
+ async def openai_retrieve_vector_store_chunk(
+ self,
+ vector_store_id: str,
+ file_id: str,
+ chunk_id: str,
+ ) -> VectorStoreChunkObject:
+ """Retrieve a specific chunk from a vector store file.
+
+ :param vector_store_id: The ID of the vector store containing the chunk.
+ :param file_id: The ID of the file containing the chunk.
+ :param chunk_id: The ID of the chunk to retrieve.
+ :returns: A VectorStoreChunkObject representing the chunk.
+ """
+ ...
+
+ @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/chunks/{chunk_id}", method="POST")
+ async def openai_update_vector_store_chunk(
+ self,
+ vector_store_id: str,
+ file_id: str,
+ chunk_id: str,
+ content: InterleavedContent | None = None,
+ metadata: dict[str, Any] | None = None,
+ ) -> VectorStoreChunkObject:
+ """Update a specific chunk in a vector store file.
+
+ :param vector_store_id: The ID of the vector store containing the chunk.
+ :param file_id: The ID of the file containing the chunk.
+ :param chunk_id: The ID of the chunk to update.
+ :param content: Updated content for the chunk.
+ :param metadata: Updated metadata for the chunk.
+ :returns: A VectorStoreChunkObject representing the updated chunk.
+ """
+ ...
+
+ @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/chunks/{chunk_id}", method="DELETE")
+ async def openai_delete_vector_store_chunk(
+ self,
+ vector_store_id: str,
+ file_id: str,
+ chunk_id: str,
+ ) -> VectorStoreChunkDeleteResponse:
+ """Delete a specific chunk from a vector store file.
+
+ :param vector_store_id: The ID of the vector store containing the chunk.
+ :param file_id: The ID of the file containing the chunk.
+ :param chunk_id: The ID of the chunk to delete.
+ :returns: A VectorStoreChunkDeleteResponse indicating the deletion status.
+ """
+ ...
diff --git a/llama_stack/core/routers/vector_io.py b/llama_stack/core/routers/vector_io.py
index 3d0996c49..d32289b4d 100644
--- a/llama_stack/core/routers/vector_io.py
+++ b/llama_stack/core/routers/vector_io.py
@@ -17,7 +17,9 @@ from llama_stack.apis.vector_io import (
QueryChunksResponse,
SearchRankingOptions,
VectorIO,
+ VectorStoreChunkDeleteResponse,
VectorStoreChunkingStrategy,
+ VectorStoreChunkObject,
VectorStoreDeleteResponse,
VectorStoreFileContentsResponse,
VectorStoreFileDeleteResponse,
@@ -341,6 +343,68 @@ class VectorIORouter(VectorIO):
file_id=file_id,
)
+ async def openai_retrieve_vector_store_chunk(
+ self,
+ vector_store_id: str,
+ file_id: str,
+ chunk_id: str,
+ ) -> VectorStoreChunkObject:
+ logger.debug(f"VectorIORouter.openai_retrieve_vector_store_chunk: {vector_store_id}, {file_id}, {chunk_id}")
+ return await self.routing_table.openai_retrieve_vector_store_chunk(
+ vector_store_id=vector_store_id,
+ file_id=file_id,
+ chunk_id=chunk_id,
+ )
+
+ async def openai_update_vector_store_chunk(
+ self,
+ vector_store_id: str,
+ file_id: str,
+ chunk_id: str,
+ content: InterleavedContent | None = None,
+ metadata: dict[str, Any] | None = None,
+ ) -> VectorStoreChunkObject:
+ logger.debug(f"VectorIORouter.openai_update_vector_store_chunk: {vector_store_id}, {file_id}, {chunk_id}")
+ return await self.routing_table.openai_update_vector_store_chunk(
+ vector_store_id=vector_store_id,
+ file_id=file_id,
+ chunk_id=chunk_id,
+ content=content,
+ metadata=metadata,
+ )
+
+ async def openai_delete_vector_store_chunk(
+ self,
+ vector_store_id: str,
+ file_id: str,
+ chunk_id: str,
+ ) -> VectorStoreChunkDeleteResponse:
+ logger.debug(f"VectorIORouter.openai_delete_vector_store_chunk: {vector_store_id}, {file_id}, {chunk_id}")
+ return await self.routing_table.openai_delete_vector_store_chunk(
+ vector_store_id=vector_store_id,
+ file_id=file_id,
+ chunk_id=chunk_id,
+ )
+
+ async def openai_list_vector_store_chunks(
+ self,
+ vector_store_id: str,
+ file_id: str,
+ limit: int | None = 20,
+ order: str | None = "desc",
+ after: str | None = None,
+ before: str | None = None,
+ ):
+ logger.debug(f"VectorIORouter.openai_list_vector_store_chunks: {vector_store_id}, {file_id}")
+ return await self.routing_table.openai_list_vector_store_chunks(
+ vector_store_id=vector_store_id,
+ file_id=file_id,
+ limit=limit,
+ order=order,
+ after=after,
+ before=before,
+ )
+
async def health(self) -> dict[str, HealthResponse]:
health_statuses = {}
timeout = 1 # increasing the timeout to 1 second for health checks
diff --git a/llama_stack/core/routing_tables/vector_dbs.py b/llama_stack/core/routing_tables/vector_dbs.py
index c81a27a3b..6d5c14920 100644
--- a/llama_stack/core/routing_tables/vector_dbs.py
+++ b/llama_stack/core/routing_tables/vector_dbs.py
@@ -13,13 +13,17 @@ from llama_stack.apis.models import ModelType
from llama_stack.apis.resource import ResourceType
from llama_stack.apis.vector_dbs import ListVectorDBsResponse, VectorDB, VectorDBs
from llama_stack.apis.vector_io.vector_io import (
+ InterleavedContent,
SearchRankingOptions,
+ VectorStoreChunkDeleteResponse,
VectorStoreChunkingStrategy,
+ VectorStoreChunkObject,
VectorStoreDeleteResponse,
VectorStoreFileContentsResponse,
VectorStoreFileDeleteResponse,
VectorStoreFileObject,
VectorStoreFileStatus,
+ VectorStoreListChunksResponse,
VectorStoreObject,
VectorStoreSearchResponsePage,
)
@@ -227,3 +231,69 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl, VectorDBs):
vector_store_id=vector_store_id,
file_id=file_id,
)
+
+ async def openai_retrieve_vector_store_chunk(
+ self,
+ vector_store_id: str,
+ file_id: str,
+ chunk_id: str,
+ ) -> VectorStoreChunkObject:
+ await self.assert_action_allowed("read", "vector_db", vector_store_id)
+ provider = await self.get_provider_impl(vector_store_id)
+ return await provider.openai_retrieve_vector_store_chunk(
+ vector_store_id=vector_store_id,
+ file_id=file_id,
+ chunk_id=chunk_id,
+ )
+
+ async def openai_update_vector_store_chunk(
+ self,
+ vector_store_id: str,
+ file_id: str,
+ chunk_id: str,
+ content: InterleavedContent | None = None,
+ metadata: dict[str, Any] | None = None,
+ ) -> VectorStoreChunkObject:
+ await self.assert_action_allowed("update", "vector_db", vector_store_id)
+ provider = await self.get_provider_impl(vector_store_id)
+ return await provider.openai_update_vector_store_chunk(
+ vector_store_id=vector_store_id,
+ file_id=file_id,
+ chunk_id=chunk_id,
+ content=content,
+ metadata=metadata,
+ )
+
+ async def openai_delete_vector_store_chunk(
+ self,
+ vector_store_id: str,
+ file_id: str,
+ chunk_id: str,
+ ) -> VectorStoreChunkDeleteResponse:
+ await self.assert_action_allowed("delete", "vector_db", vector_store_id)
+ provider = await self.get_provider_impl(vector_store_id)
+ return await provider.openai_delete_vector_store_chunk(
+ vector_store_id=vector_store_id,
+ file_id=file_id,
+ chunk_id=chunk_id,
+ )
+
+ async def openai_list_vector_store_chunks(
+ self,
+ vector_store_id: str,
+ file_id: str,
+ limit: int | None = 20,
+ order: str | None = "desc",
+ after: str | None = None,
+ before: str | None = None,
+ ) -> VectorStoreListChunksResponse:
+ await self.assert_action_allowed("read", "vector_db", vector_store_id)
+ provider = await self.get_provider_impl(vector_store_id)
+ return await provider.openai_list_vector_store_chunks(
+ vector_store_id=vector_store_id,
+ file_id=file_id,
+ limit=limit,
+ order=order,
+ after=after,
+ before=before,
+ )
diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
index 7b6e69df1..d9c0278da 100644
--- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@@ -15,14 +15,17 @@ from typing import Any
from llama_stack.apis.common.errors import VectorStoreNotFoundError
from llama_stack.apis.files import Files, OpenAIFileObject
+from llama_stack.apis.inference import InterleavedContent
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import (
Chunk,
QueryChunksResponse,
SearchRankingOptions,
+ VectorStoreChunkDeleteResponse,
VectorStoreChunkingStrategy,
VectorStoreChunkingStrategyAuto,
VectorStoreChunkingStrategyStatic,
+ VectorStoreChunkObject,
VectorStoreContent,
VectorStoreDeleteResponse,
VectorStoreFileContentsResponse,
@@ -31,6 +34,7 @@ from llama_stack.apis.vector_io import (
VectorStoreFileLastError,
VectorStoreFileObject,
VectorStoreFileStatus,
+ VectorStoreListChunksResponse,
VectorStoreListFilesResponse,
VectorStoreListResponse,
VectorStoreObject,
@@ -109,7 +113,14 @@ class OpenAIVectorStoreMixin(ABC):
assert self.kvstore
meta_key = f"{OPENAI_VECTOR_STORES_FILES_PREFIX}{store_id}:{file_id}"
await self.kvstore.set(key=meta_key, value=json.dumps(file_info))
+
+ # delete old file data to properly update content
contents_prefix = f"{OPENAI_VECTOR_STORES_FILES_CONTENTS_PREFIX}{store_id}:{file_id}:"
+ end_key = f"{contents_prefix}\xff"
+ old_keys = await self.kvstore.keys_in_range(contents_prefix, end_key)
+ for old_key in old_keys:
+ await self.kvstore.delete(old_key)
+
for idx, chunk in enumerate(file_contents):
await self.kvstore.set(key=f"{contents_prefix}{idx}", value=json.dumps(chunk))
@@ -787,3 +798,233 @@ class OpenAIVectorStoreMixin(ABC):
id=file_id,
deleted=True,
)
+
+ async def openai_retrieve_vector_store_chunk(
+ self,
+ vector_store_id: str,
+ file_id: str,
+ chunk_id: str,
+ ) -> VectorStoreChunkObject:
+ """Retrieve a specific chunk from a vector store file."""
+ if vector_store_id not in self.openai_vector_stores:
+ raise VectorStoreNotFoundError(vector_store_id)
+
+ store_info = self.openai_vector_stores[vector_store_id]
+ if file_id not in store_info["file_ids"]:
+ raise ValueError(f"File {file_id} not found in vector store {vector_store_id}")
+
+ dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id)
+ chunks = [Chunk.model_validate(c) for c in dict_chunks]
+
+ target_chunk = None
+ for chunk in chunks:
+ if chunk.chunk_id == chunk_id:
+ target_chunk = chunk
+ break
+
+ if target_chunk is None:
+ raise ValueError(f"Chunk {chunk_id} not found in file {file_id}")
+
+ file_info = await self._load_openai_vector_store_file(vector_store_id, file_id)
+
+ return VectorStoreChunkObject(
+ id=chunk_id,
+ created_at=file_info.get("created_at", int(time.time())),
+ vector_store_id=vector_store_id,
+ file_id=file_id,
+ content=target_chunk.content,
+ metadata=target_chunk.metadata,
+ embedding=target_chunk.embedding,
+ )
+
+ async def openai_update_vector_store_chunk(
+ self,
+ vector_store_id: str,
+ file_id: str,
+ chunk_id: str,
+ content: InterleavedContent | None = None,
+ metadata: dict[str, Any] | None = None,
+ ) -> VectorStoreChunkObject:
+ """Update a specific chunk in a vector store file."""
+ if vector_store_id not in self.openai_vector_stores:
+ raise VectorStoreNotFoundError(vector_store_id)
+
+ store_info = self.openai_vector_stores[vector_store_id]
+ if file_id not in store_info["file_ids"]:
+ raise ValueError(f"File {file_id} not found in vector store {vector_store_id}")
+
+ dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id)
+ chunks = [Chunk.model_validate(c) for c in dict_chunks]
+
+ target_chunk_index = None
+ for i, chunk in enumerate(chunks):
+ if chunk.chunk_id == chunk_id:
+ target_chunk_index = i
+ break
+
+ if target_chunk_index is None:
+ raise ValueError(f"Chunk {chunk_id} not found in file {file_id}")
+
+ # updating content
+ target_chunk = chunks[target_chunk_index]
+ if content is not None:
+ target_chunk.content = content
+ # delete old chunk and update
+ await self.delete_chunks(vector_store_id, [chunk_id])
+ await self.insert_chunks(vector_store_id, [target_chunk])
+
+ if metadata is not None:
+ target_chunk.metadata.update(metadata)
+
+ chunks[target_chunk_index] = target_chunk
+ dict_chunks = [c.model_dump() for c in chunks]
+ file_info = await self._load_openai_vector_store_file(vector_store_id, file_id)
+ await self._save_openai_vector_store_file(vector_store_id, file_id, file_info, dict_chunks)
+
+ return VectorStoreChunkObject(
+ id=chunk_id,
+ created_at=file_info.get("created_at", int(time.time())),
+ vector_store_id=vector_store_id,
+ file_id=file_id,
+ content=target_chunk.content,
+ metadata=target_chunk.metadata,
+ embedding=target_chunk.embedding,
+ )
+
+ async def openai_delete_vector_store_chunk(
+ self,
+ vector_store_id: str,
+ file_id: str,
+ chunk_id: str,
+ ) -> VectorStoreChunkDeleteResponse:
+ """Delete a specific chunk from a vector store file."""
+ if vector_store_id not in self.openai_vector_stores:
+ raise VectorStoreNotFoundError(vector_store_id)
+
+ store_info = self.openai_vector_stores[vector_store_id]
+ if file_id not in store_info["file_ids"]:
+ raise ValueError(f"File {file_id} not found in vector store {vector_store_id}")
+
+ dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id)
+ chunks = [Chunk.model_validate(c) for c in dict_chunks]
+
+ target_chunk_index = None
+ for i, chunk in enumerate(chunks):
+ if chunk.chunk_id == chunk_id:
+ target_chunk_index = i
+ break
+
+ if target_chunk_index is None:
+ raise ValueError(f"Chunk {chunk_id} not found in file {file_id}")
+
+ await self.delete_chunks(vector_store_id, [chunk_id])
+
+ dict_chunks.pop(target_chunk_index)
+ file_info = await self._load_openai_vector_store_file(vector_store_id, file_id)
+ await self._save_openai_vector_store_file(vector_store_id, file_id, file_info, dict_chunks)
+
+ return VectorStoreChunkDeleteResponse(
+ id=chunk_id,
+ deleted=True,
+ )
+
+ async def openai_list_vector_store_chunks(
+ self,
+ vector_store_id: str,
+ file_id: str,
+ limit: int | None = 20,
+ order: str | None = "desc",
+ after: str | None = None,
+ before: str | None = None,
+ ) -> VectorStoreListChunksResponse:
+ """List chunks in a vector store file."""
+ if vector_store_id not in self.openai_vector_stores:
+ raise VectorStoreNotFoundError(vector_store_id)
+
+ store_info = self.openai_vector_stores[vector_store_id]
+ if file_id not in store_info["file_ids"]:
+ raise ValueError(f"File {file_id} not found in vector store {vector_store_id}")
+
+ dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id)
+ file_info = await self._load_openai_vector_store_file(vector_store_id, file_id)
+
+ chunk_ids = []
+ for dict_chunk in dict_chunks:
+ chunk = Chunk.model_validate(dict_chunk)
+ if chunk.chunk_id:
+ chunk_ids.append(chunk.chunk_id)
+
+ # TODO: Add abstract method query_all_chunks() to properly filter by file_id and vector_db_id, this is a temporary hack
+ chunks = []
+ if chunk_ids:
+ try:
+ file_filter = {"type": "eq", "key": "file_id", "value": file_id}
+
+ query_result = await self.query_chunks(
+ vector_db_id=vector_store_id,
+ query="*", # wildcard query to get all chunks
+ params={
+ "max_chunks": len(chunk_ids) * 2,
+ "score_threshold": 0.0,
+ "filters": file_filter,
+ },
+ )
+
+ chunk_id_set = set(chunk_ids)
+ chunks = [chunk for chunk in query_result.chunks if chunk.chunk_id in chunk_id_set]
+ except Exception as e:
+ logger.warning(f"Failed to query vector database for chunks: {e}")
+ # Fallback to KV store chunks if vector DB query fails
+ chunks = [Chunk.model_validate(c) for c in dict_chunks]
+
+ chunk_objects = []
+ for chunk in chunks:
+ chunk_obj = VectorStoreChunkObject(
+ id=chunk.chunk_id,
+ created_at=file_info.get("created_at", int(time.time())),
+ vector_store_id=vector_store_id,
+ file_id=file_id,
+ content=chunk.content,
+ metadata=chunk.metadata,
+ embedding=chunk.embedding,
+ )
+ chunk_objects.append(chunk_obj)
+
+ if order == "desc":
+ chunk_objects.sort(key=lambda x: x.created_at, reverse=True)
+ else:
+ chunk_objects.sort(key=lambda x: x.created_at)
+
+ start_idx = 0
+ end_idx = len(chunk_objects)
+
+ if after:
+ # find index after 'after' chunk
+ for i, chunk_obj in enumerate(chunk_objects):
+ if chunk_obj.id == after:
+ start_idx = i + 1
+ break
+
+ if before:
+ # find index before 'before' chunk
+ for i, chunk_obj in enumerate(chunk_objects):
+ if chunk_obj.id == before:
+ end_idx = i
+ break
+
+ if limit:
+ if end_idx - start_idx > limit:
+ end_idx = start_idx + limit
+
+ paginated_chunks = chunk_objects[start_idx:end_idx]
+
+ first_id = paginated_chunks[0].id if paginated_chunks else None
+ last_id = paginated_chunks[-1].id if paginated_chunks else None
+ has_more = end_idx < len(chunk_objects)
+
+ return VectorStoreListChunksResponse(
+ data=paginated_chunks,
+ first_id=first_id,
+ last_id=last_id,
+ has_more=has_more,
+ )
diff --git a/llama_stack/ui/components/vector-stores/vector-store-detail.tsx b/llama_stack/ui/components/vector-stores/vector-store-detail.tsx
index 7c5c91dd3..bbabe13d8 100644
--- a/llama_stack/ui/components/vector-stores/vector-store-detail.tsx
+++ b/llama_stack/ui/components/vector-stores/vector-store-detail.tsx
@@ -1,9 +1,11 @@
"use client";
+import { useRouter } from "next/navigation";
import type { VectorStore } from "llama-stack-client/resources/vector-stores/vector-stores";
import type { VectorStoreFile } from "llama-stack-client/resources/vector-stores/files";
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
import { Skeleton } from "@/components/ui/skeleton";
+import { Button } from "@/components/ui/button";
import {
DetailLoadingView,
DetailErrorView,
@@ -42,6 +44,11 @@ export function VectorStoreDetailView({
id,
}: VectorStoreDetailViewProps) {
const title = "Vector Store Details";
+ const router = useRouter();
+
+ const handleFileClick = (fileId: string) => {
+ router.push(`/logs/vector-stores/${id}/files/${fileId}`);
+ };
if (errorStore) {
return ;
@@ -80,7 +87,15 @@ export function VectorStoreDetailView({
{files.map((file) => (
- {file.id}
+
+
+
{file.status}
{new Date(file.created_at * 1000).toLocaleString()}
diff --git a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
index 98889f38e..dd9783c2d 100644
--- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
+++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
@@ -11,6 +11,7 @@ from unittest.mock import AsyncMock
import numpy as np
import pytest
+from llama_stack.apis.common.errors import VectorStoreNotFoundError
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse
from llama_stack.providers.remote.vector_io.milvus.milvus import VECTOR_DBS_PREFIX
@@ -294,3 +295,510 @@ async def test_delete_openai_vector_store_file_from_storage(vector_io_adapter, t
assert loaded_file_info == {}
loaded_contents = await vector_io_adapter._load_openai_vector_store_file_contents(store_id, file_id)
assert loaded_contents == []
+
+
+async def test_openai_retrieve_vector_store_chunk(vector_io_adapter):
+ """Test retrieving a specific chunk from a vector store file."""
+ store_id = "vs_1234"
+ file_id = "file_1234"
+ chunk_id = "chunk_001"
+
+ store_info = {
+ "id": store_id,
+ "file_ids": [file_id],
+ "created_at": int(time.time()),
+ }
+ vector_io_adapter.openai_vector_stores[store_id] = store_info
+
+ file_info = {
+ "id": file_id,
+ "status": "completed",
+ "vector_store_id": store_id,
+ "filename": "test_file.txt",
+ "created_at": int(time.time()),
+ }
+
+ file_contents = [
+ {
+ "content": {"type": "text", "text": "First chunk content"},
+ "stored_chunk_id": chunk_id,
+ "metadata": {"file_id": file_id, "position": 0},
+ "chunk_metadata": {"chunk_id": chunk_id},
+ },
+ {
+ "content": {"type": "text", "text": "Second chunk content"},
+ "stored_chunk_id": "chunk_002",
+ "metadata": {"file_id": file_id, "position": 1},
+ "chunk_metadata": {"chunk_id": "chunk_002"},
+ },
+ ]
+
+ await vector_io_adapter._save_openai_vector_store_file(store_id, file_id, file_info, file_contents)
+
+ chunk_object = await vector_io_adapter.openai_retrieve_vector_store_chunk(
+ vector_store_id=store_id, file_id=file_id, chunk_id=chunk_id
+ )
+
+ assert chunk_object.id == chunk_id
+ assert chunk_object.vector_store_id == store_id
+ assert chunk_object.file_id == file_id
+ assert chunk_object.object == "vector_store.file.chunk"
+ assert chunk_object.content.type == "text"
+ assert chunk_object.content.text == "First chunk content"
+ assert chunk_object.metadata["file_id"] == file_id
+ assert chunk_object.metadata["position"] == 0
+
+
+async def test_openai_retrieve_vector_store_chunk_not_found(vector_io_adapter):
+ """Test retrieving a non-existent chunk raises appropriate error."""
+ store_id = "vs_1234"
+ file_id = "file_1234"
+ chunk_id = "nonexistent_chunk"
+
+ store_info = {
+ "id": store_id,
+ "file_ids": [file_id],
+ "created_at": int(time.time()),
+ }
+ vector_io_adapter.openai_vector_stores[store_id] = store_info
+
+ file_info = {"id": file_id, "created_at": int(time.time())}
+ await vector_io_adapter._save_openai_vector_store_file(store_id, file_id, file_info, [])
+
+ with pytest.raises(ValueError, match="Chunk nonexistent_chunk not found"):
+ await vector_io_adapter.openai_retrieve_vector_store_chunk(
+ vector_store_id=store_id, file_id=file_id, chunk_id=chunk_id
+ )
+
+
+async def test_openai_update_vector_store_chunk_metadata_only(vector_io_adapter):
+ """Test updating only the metadata of a chunk."""
+ store_id = "vs_1234"
+ file_id = "file_1234"
+ chunk_id = "chunk_001"
+
+ store_info = {
+ "id": store_id,
+ "file_ids": [file_id],
+ "created_at": int(time.time()),
+ }
+ vector_io_adapter.openai_vector_stores[store_id] = store_info
+
+ file_info = {
+ "id": file_id,
+ "status": "completed",
+ "vector_store_id": store_id,
+ "filename": "test_file.txt",
+ "created_at": int(time.time()),
+ }
+
+ original_content = "Original chunk content"
+ file_contents = [
+ {
+ "content": {"type": "text", "text": original_content},
+ "stored_chunk_id": chunk_id,
+ "metadata": {"file_id": file_id, "version": 1},
+ "chunk_metadata": {"chunk_id": chunk_id},
+ }
+ ]
+
+ await vector_io_adapter._save_openai_vector_store_file(store_id, file_id, file_info, file_contents)
+
+ vector_io_adapter.delete_chunks = AsyncMock()
+ vector_io_adapter.insert_chunks = AsyncMock()
+
+ new_metadata = {"file_id": file_id, "version": 2, "updated": True}
+ updated_chunk = await vector_io_adapter.openai_update_vector_store_chunk(
+ vector_store_id=store_id, file_id=file_id, chunk_id=chunk_id, metadata=new_metadata
+ )
+
+ vector_io_adapter.delete_chunks.assert_not_called()
+ vector_io_adapter.insert_chunks.assert_not_called()
+
+ assert updated_chunk.id == chunk_id
+ assert updated_chunk.metadata["version"] == 2
+ assert updated_chunk.metadata["updated"] is True
+ assert updated_chunk.content.text == original_content
+
+
+async def test_openai_update_vector_store_chunk_content(vector_io_adapter):
+ """Test updating the content of a chunk."""
+ store_id = "vs_1234"
+ file_id = "file_1234"
+ chunk_id = "chunk_001"
+
+ store_info = {
+ "id": store_id,
+ "file_ids": [file_id],
+ "created_at": int(time.time()),
+ }
+ vector_io_adapter.openai_vector_stores[store_id] = store_info
+
+ file_info = {
+ "id": file_id,
+ "status": "completed",
+ "vector_store_id": store_id,
+ "filename": "test_file.txt",
+ "created_at": int(time.time()),
+ }
+
+ file_contents = [
+ {
+ "content": {"type": "text", "text": "Original chunk content"},
+ "stored_chunk_id": chunk_id,
+ "metadata": {"file_id": file_id},
+ "chunk_metadata": {"chunk_id": chunk_id},
+ }
+ ]
+
+ await vector_io_adapter._save_openai_vector_store_file(store_id, file_id, file_info, file_contents)
+
+ vector_io_adapter.delete_chunks = AsyncMock()
+ vector_io_adapter.insert_chunks = AsyncMock()
+
+ new_content = {"type": "text", "text": "Updated chunk content"}
+ updated_chunk = await vector_io_adapter.openai_update_vector_store_chunk(
+ vector_store_id=store_id, file_id=file_id, chunk_id=chunk_id, content=new_content
+ )
+
+ vector_io_adapter.delete_chunks.assert_awaited_once_with(store_id, [chunk_id])
+ vector_io_adapter.insert_chunks.assert_awaited_once()
+
+ assert updated_chunk.id == chunk_id
+ assert updated_chunk.content.text == "Updated chunk content"
+
+
+async def test_openai_update_vector_store_chunk_both_content_and_metadata(vector_io_adapter):
+ """Test updating both content and metadata of a chunk."""
+ store_id = "vs_1234"
+ file_id = "file_1234"
+ chunk_id = "chunk_001"
+
+ store_info = {
+ "id": store_id,
+ "file_ids": [file_id],
+ "created_at": int(time.time()),
+ }
+ vector_io_adapter.openai_vector_stores[store_id] = store_info
+
+ file_info = {
+ "id": file_id,
+ "status": "completed",
+ "vector_store_id": store_id,
+ "filename": "test_file.txt",
+ "created_at": int(time.time()),
+ }
+
+ file_contents = [
+ {
+ "content": {"type": "text", "text": "Original chunk content"},
+ "stored_chunk_id": chunk_id,
+ "metadata": {"file_id": file_id, "version": 1},
+ "chunk_metadata": {"chunk_id": chunk_id},
+ }
+ ]
+
+ await vector_io_adapter._save_openai_vector_store_file(store_id, file_id, file_info, file_contents)
+
+ vector_io_adapter.delete_chunks = AsyncMock()
+ vector_io_adapter.insert_chunks = AsyncMock()
+
+ new_content = {"type": "text", "text": "Updated chunk content"}
+ new_metadata = {"file_id": file_id, "version": 2, "updated": True}
+ updated_chunk = await vector_io_adapter.openai_update_vector_store_chunk(
+ vector_store_id=store_id, file_id=file_id, chunk_id=chunk_id, content=new_content, metadata=new_metadata
+ )
+
+ vector_io_adapter.delete_chunks.assert_awaited_once_with(store_id, [chunk_id])
+ vector_io_adapter.insert_chunks.assert_awaited_once()
+
+ assert updated_chunk.id == chunk_id
+ assert updated_chunk.content.text == "Updated chunk content"
+ assert updated_chunk.metadata["version"] == 2
+ assert updated_chunk.metadata["updated"] is True
+
+
+async def test_openai_delete_vector_store_chunk(vector_io_adapter):
+ """Test deleting a specific chunk from a vector store file."""
+ store_id = "vs_1234"
+ file_id = "file_1234"
+ chunk_id_to_delete = "chunk_001"
+ chunk_id_to_keep = "chunk_002"
+
+ store_info = {
+ "id": store_id,
+ "file_ids": [file_id],
+ "created_at": int(time.time()),
+ }
+ vector_io_adapter.openai_vector_stores[store_id] = store_info
+
+ file_info = {
+ "id": file_id,
+ "status": "completed",
+ "vector_store_id": store_id,
+ "filename": "test_file.txt",
+ "created_at": int(time.time()),
+ }
+
+ file_contents = [
+ {
+ "content": {"type": "text", "text": "First chunk content"},
+ "stored_chunk_id": chunk_id_to_delete,
+ "metadata": {"file_id": file_id, "position": 0},
+ "chunk_metadata": {"chunk_id": chunk_id_to_delete},
+ },
+ {
+ "content": {"type": "text", "text": "Second chunk content"},
+ "stored_chunk_id": chunk_id_to_keep,
+ "metadata": {"file_id": file_id, "position": 1},
+ "chunk_metadata": {"chunk_id": chunk_id_to_keep},
+ },
+ ]
+
+ await vector_io_adapter._save_openai_vector_store_file(store_id, file_id, file_info, file_contents)
+
+ vector_io_adapter.delete_chunks = AsyncMock()
+
+ delete_response = await vector_io_adapter.openai_delete_vector_store_chunk(
+ vector_store_id=store_id, file_id=file_id, chunk_id=chunk_id_to_delete
+ )
+
+ vector_io_adapter.delete_chunks.assert_awaited_once_with(store_id, [chunk_id_to_delete])
+
+ assert delete_response.id == chunk_id_to_delete
+ assert delete_response.object == "vector_store.file.chunk.deleted"
+ assert delete_response.deleted is True
+
+ remaining_contents = await vector_io_adapter._load_openai_vector_store_file_contents(store_id, file_id)
+ assert len(remaining_contents) == 1
+ assert remaining_contents[0]["stored_chunk_id"] == chunk_id_to_keep
+
+
+async def test_openai_delete_vector_store_chunk_not_found(vector_io_adapter):
+ """Test deleting a non-existent chunk raises appropriate error."""
+ store_id = "vs_1234"
+ file_id = "file_1234"
+ chunk_id = "nonexistent_chunk"
+
+ store_info = {
+ "id": store_id,
+ "file_ids": [file_id],
+ "created_at": int(time.time()),
+ }
+ vector_io_adapter.openai_vector_stores[store_id] = store_info
+
+ file_info = {"id": file_id, "created_at": int(time.time())}
+ await vector_io_adapter._save_openai_vector_store_file(store_id, file_id, file_info, [])
+
+ with pytest.raises(ValueError, match="Chunk nonexistent_chunk not found"):
+ await vector_io_adapter.openai_delete_vector_store_chunk(
+ vector_store_id=store_id, file_id=file_id, chunk_id=chunk_id
+ )
+
+
+async def test_chunk_operations_with_nonexistent_vector_store(vector_io_adapter):
+ """Test that chunk operations raise errors for non-existent vector stores."""
+
+ store_id = "nonexistent_store"
+ file_id = "file_1234"
+ chunk_id = "chunk_001"
+
+ with pytest.raises(VectorStoreNotFoundError):
+ await vector_io_adapter.openai_retrieve_vector_store_chunk(
+ vector_store_id=store_id, file_id=file_id, chunk_id=chunk_id
+ )
+
+ with pytest.raises(VectorStoreNotFoundError):
+ await vector_io_adapter.openai_update_vector_store_chunk(
+ vector_store_id=store_id, file_id=file_id, chunk_id=chunk_id, metadata={"test": "value"}
+ )
+
+ with pytest.raises(VectorStoreNotFoundError):
+ await vector_io_adapter.openai_delete_vector_store_chunk(
+ vector_store_id=store_id, file_id=file_id, chunk_id=chunk_id
+ )
+
+
+async def test_chunk_operations_with_nonexistent_file(vector_io_adapter):
+ """Test that chunk operations raise errors for non-existent files."""
+ store_id = "vs_1234"
+ file_id = "nonexistent_file"
+ chunk_id = "chunk_001"
+
+ store_info = {
+ "id": store_id,
+ "file_ids": [],
+ "created_at": int(time.time()),
+ }
+ vector_io_adapter.openai_vector_stores[store_id] = store_info
+
+ with pytest.raises(ValueError, match=f"File {file_id} not found in vector store"):
+ await vector_io_adapter.openai_retrieve_vector_store_chunk(
+ vector_store_id=store_id, file_id=file_id, chunk_id=chunk_id
+ )
+
+ with pytest.raises(ValueError, match=f"File {file_id} not found in vector store"):
+ await vector_io_adapter.openai_update_vector_store_chunk(
+ vector_store_id=store_id, file_id=file_id, chunk_id=chunk_id, metadata={"test": "value"}
+ )
+
+ with pytest.raises(ValueError, match=f"File {file_id} not found in vector store"):
+ await vector_io_adapter.openai_delete_vector_store_chunk(
+ vector_store_id=store_id, file_id=file_id, chunk_id=chunk_id
+ )
+
+ with pytest.raises(ValueError, match=f"File {file_id} not found in vector store"):
+ await vector_io_adapter.openai_list_vector_store_chunks(vector_store_id=store_id, file_id=file_id)
+
+
+async def test_openai_list_vector_store_chunks(vector_io_adapter):
+ """Test listing chunks in a vector store file."""
+ store_id = "test_store_123"
+ await vector_io_adapter.openai_create_vector_store(
+ provider_vector_db_id=store_id,
+ name="Test Store",
+ embedding_model="test_model",
+ embedding_dimension=512,
+ provider_id="test_provider",
+ )
+
+ test_content = "This is test content for chunk listing."
+ file_id = "test_file_456"
+ test_metadata = {"source": "test_file", "chunk_number": "1", "file_id": file_id}
+ test_embedding = [0.1] * 512
+
+ chunk1 = Chunk(
+ content=test_content + " First chunk.",
+ metadata={**test_metadata, "chunk_id": "1"},
+ embedding=test_embedding,
+ chunk_id="chunk_1",
+ )
+ chunk2 = Chunk(
+ content=test_content + " Second chunk.",
+ metadata={**test_metadata, "chunk_id": "2"},
+ embedding=[0.2] * 512,
+ chunk_id="chunk_2",
+ )
+ chunk3 = Chunk(
+ content=test_content + " Third chunk.",
+ metadata={**test_metadata, "chunk_id": "3"},
+ embedding=[0.3] * 512,
+ chunk_id="chunk_3",
+ )
+
+ await vector_io_adapter.insert_chunks(store_id, [chunk1, chunk2, chunk3])
+ file_info = {
+ "id": file_id,
+ "object": "vector_store.file",
+ "created_at": int(time.time()),
+ "vector_store_id": store_id,
+ "status": "completed",
+ "usage_bytes": 1024,
+ "chunking_strategy": {"type": "static", "static": {"max_chunk_size_tokens": 800, "chunk_overlap_tokens": 400}},
+ "filename": "test_file.txt",
+ }
+
+ dict_chunks = [chunk1.model_dump(), chunk2.model_dump(), chunk3.model_dump()]
+ await vector_io_adapter._save_openai_vector_store_file(store_id, file_id, file_info, dict_chunks)
+
+ vector_io_adapter.openai_vector_stores[store_id]["file_ids"].append(file_id)
+
+ response = await vector_io_adapter.openai_list_vector_store_chunks(vector_store_id=store_id, file_id=file_id)
+
+ assert response.object == "list"
+ assert len(response.data) == 3
+ assert response.has_more is False
+ assert response.first_id is not None
+ assert response.last_id is not None
+
+ chunk_ids = [chunk.id for chunk in response.data]
+ expected_chunk_ids = {"chunk_1", "chunk_2", "chunk_3", "1", "2", "3"} # Accept either format
+ for chunk_id in chunk_ids:
+ assert chunk_id in expected_chunk_ids, f"Unexpected chunk_id: {chunk_id}"
+
+ for chunk in response.data:
+ assert chunk.embedding is not None
+ assert len(chunk.embedding) == 512
+ assert chunk.vector_store_id == store_id
+ assert chunk.file_id == file_id
+
+ limited_response = await vector_io_adapter.openai_list_vector_store_chunks(
+ vector_store_id=store_id, file_id=file_id, limit=2
+ )
+
+ assert len(limited_response.data) == 2
+ assert limited_response.has_more is True
+
+ desc_response = await vector_io_adapter.openai_list_vector_store_chunks(
+ vector_store_id=store_id, file_id=file_id, order="desc"
+ )
+
+ assert len(desc_response.data) == 3
+
+ asc_response = await vector_io_adapter.openai_list_vector_store_chunks(
+ vector_store_id=store_id, file_id=file_id, order="asc"
+ )
+
+ assert len(asc_response.data) == 3
+
+ first_chunk_id = response.data[0].id
+ after_response = await vector_io_adapter.openai_list_vector_store_chunks(
+ vector_store_id=store_id, file_id=file_id, after=first_chunk_id
+ )
+
+ assert len(after_response.data) <= 2
+ after_chunk_ids = [chunk.id for chunk in after_response.data]
+ assert first_chunk_id not in after_chunk_ids
+
+
+async def test_openai_list_vector_store_chunks_empty_file(vector_io_adapter):
+ """Test listing chunks in an empty file."""
+ store_id = "test_store_empty"
+ await vector_io_adapter.openai_create_vector_store(
+ provider_vector_db_id=store_id,
+ name="Test Store",
+ embedding_model="test_model",
+ embedding_dimension=512,
+ provider_id="test_provider",
+ )
+
+ file_id = "empty_file"
+ file_info = {
+ "id": file_id,
+ "object": "vector_store.file",
+ "created_at": int(time.time()),
+ "vector_store_id": store_id,
+ "status": "completed",
+ "usage_bytes": 0,
+ "chunking_strategy": {"type": "static", "static": {"max_chunk_size_tokens": 800, "chunk_overlap_tokens": 400}},
+ "filename": "empty_file.txt",
+ }
+
+ await vector_io_adapter._save_openai_vector_store_file(store_id, file_id, file_info, [])
+
+ vector_io_adapter.openai_vector_stores[store_id]["file_ids"].append(file_id)
+
+ response = await vector_io_adapter.openai_list_vector_store_chunks(vector_store_id=store_id, file_id=file_id)
+
+ assert response.object == "list"
+ assert len(response.data) == 0
+ assert response.has_more is False
+ assert response.first_id is None
+ assert response.last_id is None
+
+
+async def test_openai_list_vector_store_chunks_nonexistent_resources(vector_io_adapter):
+ with pytest.raises(VectorStoreNotFoundError):
+ await vector_io_adapter.openai_list_vector_store_chunks(vector_store_id="nonexistent_store", file_id="any_file")
+
+ store_id = "test_store_list"
+ await vector_io_adapter.openai_create_vector_store(
+ provider_vector_db_id=store_id,
+ name="Test Store",
+ embedding_model="test_model",
+ embedding_dimension=512,
+ provider_id="test_provider",
+ )
+
+ with pytest.raises(ValueError, match="File nonexistent_file not found in vector store"):
+ await vector_io_adapter.openai_list_vector_store_chunks(vector_store_id=store_id, file_id="nonexistent_file")