mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-15 14:08:00 +00:00
feat: Adding support for get, update, delete for Vector Stores API
Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
This commit is contained in:
parent
0527c0fb15
commit
4c0eb47fc7
8 changed files with 1818 additions and 1 deletions
454
docs/_static/llama-stack-spec.html
vendored
454
docs/_static/llama-stack-spec.html
vendored
|
@ -3793,6 +3793,195 @@
|
|||
]
|
||||
}
|
||||
},
|
||||
"/v1/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/chunks/{chunk_id}": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "A VectorStoreChunkObject representing the chunk.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/VectorStoreChunkObject"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"VectorIO"
|
||||
],
|
||||
"description": "Retrieve a specific chunk from a vector store file.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "vector_store_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the vector store containing the chunk.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "file_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the file containing the chunk.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "chunk_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the chunk to retrieve.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "A VectorStoreChunkObject representing the updated chunk.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/VectorStoreChunkObject"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"VectorIO"
|
||||
],
|
||||
"description": "Update a specific chunk in a vector store file.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "vector_store_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the vector store containing the chunk.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "file_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the file containing the chunk.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "chunk_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the chunk to update.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/OpenaiUpdateVectorStoreChunkRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
}
|
||||
},
|
||||
"delete": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "A VectorStoreChunkDeleteResponse indicating the deletion status.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/VectorStoreChunkDeleteResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"VectorIO"
|
||||
],
|
||||
"description": "Delete a specific chunk from a vector store file.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "vector_store_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the vector store containing the chunk.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "file_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the file containing the chunk.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "chunk_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the chunk to delete.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"/v1/openai/v1/vector_stores/{vector_store_id}/files/{file_id}": {
|
||||
"get": {
|
||||
"responses": {
|
||||
|
@ -4155,6 +4344,94 @@
|
|||
"parameters": []
|
||||
}
|
||||
},
|
||||
"/v1/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/chunks": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "A VectorStoreListChunksResponse with the list of chunks.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/VectorStoreListChunksResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"VectorIO"
|
||||
],
|
||||
"description": "List chunks in a vector store file.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "vector_store_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the vector store.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "file_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the file.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "limit",
|
||||
"in": "query",
|
||||
"description": "Max number of chunks to return.",
|
||||
"required": false,
|
||||
"schema": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "order",
|
||||
"in": "query",
|
||||
"description": "Sort order.",
|
||||
"required": false,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "after",
|
||||
"in": "query",
|
||||
"description": "Pagination cursor.",
|
||||
"required": false,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "before",
|
||||
"in": "query",
|
||||
"description": "Pagination cursor.",
|
||||
"required": false,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"/v1/openai/v1/files/{file_id}/content": {
|
||||
"get": {
|
||||
"responses": {
|
||||
|
@ -14351,6 +14628,33 @@
|
|||
"title": "VectorStoreDeleteResponse",
|
||||
"description": "Response from deleting a vector store."
|
||||
},
|
||||
"VectorStoreChunkDeleteResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "Unique identifier of the deleted chunk"
|
||||
},
|
||||
"object": {
|
||||
"type": "string",
|
||||
"default": "vector_store.file.chunk.deleted",
|
||||
"description": "Object type identifier for the deletion response"
|
||||
},
|
||||
"deleted": {
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
"description": "Whether the deletion operation was successful"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"id",
|
||||
"object",
|
||||
"deleted"
|
||||
],
|
||||
"title": "VectorStoreChunkDeleteResponse",
|
||||
"description": "Response from deleting a vector store chunk."
|
||||
},
|
||||
"VectorStoreFileDeleteResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -14691,6 +14995,119 @@
|
|||
],
|
||||
"title": "OpenAIListModelsResponse"
|
||||
},
|
||||
"VectorStoreChunkObject": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "Unique identifier for the chunk"
|
||||
},
|
||||
"object": {
|
||||
"type": "string",
|
||||
"default": "vector_store.file.chunk",
|
||||
"description": "Object type identifier, always \"vector_store.file.chunk\""
|
||||
},
|
||||
"created_at": {
|
||||
"type": "integer",
|
||||
"description": "Timestamp when the chunk was created"
|
||||
},
|
||||
"vector_store_id": {
|
||||
"type": "string",
|
||||
"description": "ID of the vector store containing this chunk"
|
||||
},
|
||||
"file_id": {
|
||||
"type": "string",
|
||||
"description": "ID of the file containing this chunk"
|
||||
},
|
||||
"content": {
|
||||
"$ref": "#/components/schemas/InterleavedContent",
|
||||
"description": "The content of the chunk, using the same format as Chunk class"
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "Metadata associated with the chunk"
|
||||
},
|
||||
"embedding": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "number"
|
||||
},
|
||||
"description": "The embedding vector for the chunk"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"id",
|
||||
"object",
|
||||
"created_at",
|
||||
"vector_store_id",
|
||||
"file_id",
|
||||
"content",
|
||||
"metadata"
|
||||
],
|
||||
"title": "VectorStoreChunkObject",
|
||||
"description": "OpenAI Vector Store Chunk object."
|
||||
},
|
||||
"VectorStoreListChunksResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"object": {
|
||||
"type": "string",
|
||||
"default": "list",
|
||||
"description": "Object type identifier, always \"list\""
|
||||
},
|
||||
"data": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/VectorStoreChunkObject"
|
||||
},
|
||||
"description": "List of vector store chunk objects"
|
||||
},
|
||||
"first_id": {
|
||||
"type": "string",
|
||||
"description": "(Optional) ID of the first chunk in the list for pagination"
|
||||
},
|
||||
"last_id": {
|
||||
"type": "string",
|
||||
"description": "(Optional) ID of the last chunk in the list for pagination"
|
||||
},
|
||||
"has_more": {
|
||||
"type": "boolean",
|
||||
"default": false,
|
||||
"description": "Whether there are more chunks available beyond this page"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"object",
|
||||
"data",
|
||||
"has_more"
|
||||
],
|
||||
"title": "VectorStoreListChunksResponse",
|
||||
"description": "Response from listing chunks in a vector store file."
|
||||
},
|
||||
"VectorStoreListResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -15039,6 +15456,43 @@
|
|||
"additionalProperties": false,
|
||||
"title": "OpenaiUpdateVectorStoreRequest"
|
||||
},
|
||||
"OpenaiUpdateVectorStoreChunkRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"content": {
|
||||
"$ref": "#/components/schemas/InterleavedContent",
|
||||
"description": "Updated content for the chunk."
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "Updated metadata for the chunk."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"title": "OpenaiUpdateVectorStoreChunkRequest"
|
||||
},
|
||||
"OpenaiUpdateVectorStoreFileRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
|
324
docs/_static/llama-stack-spec.yaml
vendored
324
docs/_static/llama-stack-spec.yaml
vendored
|
@ -2674,6 +2674,142 @@ paths:
|
|||
required: true
|
||||
schema:
|
||||
type: string
|
||||
/v1/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/chunks/{chunk_id}:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: >-
|
||||
A VectorStoreChunkObject representing the chunk.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/VectorStoreChunkObject'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- VectorIO
|
||||
description: >-
|
||||
Retrieve a specific chunk from a vector store file.
|
||||
parameters:
|
||||
- name: vector_store_id
|
||||
in: path
|
||||
description: >-
|
||||
The ID of the vector store containing the chunk.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: file_id
|
||||
in: path
|
||||
description: The ID of the file containing the chunk.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: chunk_id
|
||||
in: path
|
||||
description: The ID of the chunk to retrieve.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: >-
|
||||
A VectorStoreChunkObject representing the updated chunk.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/VectorStoreChunkObject'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- VectorIO
|
||||
description: >-
|
||||
Update a specific chunk in a vector store file.
|
||||
parameters:
|
||||
- name: vector_store_id
|
||||
in: path
|
||||
description: >-
|
||||
The ID of the vector store containing the chunk.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: file_id
|
||||
in: path
|
||||
description: The ID of the file containing the chunk.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: chunk_id
|
||||
in: path
|
||||
description: The ID of the chunk to update.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/OpenaiUpdateVectorStoreChunkRequest'
|
||||
required: true
|
||||
delete:
|
||||
responses:
|
||||
'200':
|
||||
description: >-
|
||||
A VectorStoreChunkDeleteResponse indicating the deletion status.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/VectorStoreChunkDeleteResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- VectorIO
|
||||
description: >-
|
||||
Delete a specific chunk from a vector store file.
|
||||
parameters:
|
||||
- name: vector_store_id
|
||||
in: path
|
||||
description: >-
|
||||
The ID of the vector store containing the chunk.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: file_id
|
||||
in: path
|
||||
description: The ID of the file containing the chunk.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: chunk_id
|
||||
in: path
|
||||
description: The ID of the chunk to delete.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
/v1/openai/v1/vector_stores/{vector_store_id}/files/{file_id}:
|
||||
get:
|
||||
responses:
|
||||
|
@ -2947,6 +3083,66 @@ paths:
|
|||
- Models
|
||||
description: List models using the OpenAI API.
|
||||
parameters: []
|
||||
/v1/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/chunks:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: >-
|
||||
A VectorStoreListChunksResponse with the list of chunks.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/VectorStoreListChunksResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- VectorIO
|
||||
description: List chunks in a vector store file.
|
||||
parameters:
|
||||
- name: vector_store_id
|
||||
in: path
|
||||
description: The ID of the vector store.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: file_id
|
||||
in: path
|
||||
description: The ID of the file.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: limit
|
||||
in: query
|
||||
description: Max number of chunks to return.
|
||||
required: false
|
||||
schema:
|
||||
type: integer
|
||||
- name: order
|
||||
in: query
|
||||
description: Sort order.
|
||||
required: false
|
||||
schema:
|
||||
type: string
|
||||
- name: after
|
||||
in: query
|
||||
description: Pagination cursor.
|
||||
required: false
|
||||
schema:
|
||||
type: string
|
||||
- name: before
|
||||
in: query
|
||||
description: Pagination cursor.
|
||||
required: false
|
||||
schema:
|
||||
type: string
|
||||
/v1/openai/v1/files/{file_id}/content:
|
||||
get:
|
||||
responses:
|
||||
|
@ -10609,6 +10805,30 @@ components:
|
|||
- deleted
|
||||
title: VectorStoreDeleteResponse
|
||||
description: Response from deleting a vector store.
|
||||
VectorStoreChunkDeleteResponse:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
description: Unique identifier of the deleted chunk
|
||||
object:
|
||||
type: string
|
||||
default: vector_store.file.chunk.deleted
|
||||
description: >-
|
||||
Object type identifier for the deletion response
|
||||
deleted:
|
||||
type: boolean
|
||||
default: true
|
||||
description: >-
|
||||
Whether the deletion operation was successful
|
||||
additionalProperties: false
|
||||
required:
|
||||
- id
|
||||
- object
|
||||
- deleted
|
||||
title: VectorStoreChunkDeleteResponse
|
||||
description: >-
|
||||
Response from deleting a vector store chunk.
|
||||
VectorStoreFileDeleteResponse:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -10895,6 +11115,91 @@ components:
|
|||
required:
|
||||
- data
|
||||
title: OpenAIListModelsResponse
|
||||
VectorStoreChunkObject:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
description: Unique identifier for the chunk
|
||||
object:
|
||||
type: string
|
||||
default: vector_store.file.chunk
|
||||
description: >-
|
||||
Object type identifier, always "vector_store.file.chunk"
|
||||
created_at:
|
||||
type: integer
|
||||
description: Timestamp when the chunk was created
|
||||
vector_store_id:
|
||||
type: string
|
||||
description: >-
|
||||
ID of the vector store containing this chunk
|
||||
file_id:
|
||||
type: string
|
||||
description: ID of the file containing this chunk
|
||||
content:
|
||||
$ref: '#/components/schemas/InterleavedContent'
|
||||
description: >-
|
||||
The content of the chunk, using the same format as Chunk class
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: Metadata associated with the chunk
|
||||
embedding:
|
||||
type: array
|
||||
items:
|
||||
type: number
|
||||
description: The embedding vector for the chunk
|
||||
additionalProperties: false
|
||||
required:
|
||||
- id
|
||||
- object
|
||||
- created_at
|
||||
- vector_store_id
|
||||
- file_id
|
||||
- content
|
||||
- metadata
|
||||
title: VectorStoreChunkObject
|
||||
description: OpenAI Vector Store Chunk object.
|
||||
VectorStoreListChunksResponse:
|
||||
type: object
|
||||
properties:
|
||||
object:
|
||||
type: string
|
||||
default: list
|
||||
description: Object type identifier, always "list"
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/VectorStoreChunkObject'
|
||||
description: List of vector store chunk objects
|
||||
first_id:
|
||||
type: string
|
||||
description: >-
|
||||
(Optional) ID of the first chunk in the list for pagination
|
||||
last_id:
|
||||
type: string
|
||||
description: >-
|
||||
(Optional) ID of the last chunk in the list for pagination
|
||||
has_more:
|
||||
type: boolean
|
||||
default: false
|
||||
description: >-
|
||||
Whether there are more chunks available beyond this page
|
||||
additionalProperties: false
|
||||
required:
|
||||
- object
|
||||
- data
|
||||
- has_more
|
||||
title: VectorStoreListChunksResponse
|
||||
description: >-
|
||||
Response from listing chunks in a vector store file.
|
||||
VectorStoreListResponse:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -11141,6 +11446,25 @@ components:
|
|||
Set of 16 key-value pairs that can be attached to an object.
|
||||
additionalProperties: false
|
||||
title: OpenaiUpdateVectorStoreRequest
|
||||
OpenaiUpdateVectorStoreChunkRequest:
|
||||
type: object
|
||||
properties:
|
||||
content:
|
||||
$ref: '#/components/schemas/InterleavedContent'
|
||||
description: Updated content for the chunk.
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: Updated metadata for the chunk.
|
||||
additionalProperties: false
|
||||
title: OpenaiUpdateVectorStoreChunkRequest
|
||||
OpenaiUpdateVectorStoreFileRequest:
|
||||
type: object
|
||||
properties:
|
||||
|
|
|
@ -426,6 +426,74 @@ class VectorStoreFileDeleteResponse(BaseModel):
|
|||
deleted: bool = True
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class VectorStoreChunkObject(BaseModel):
|
||||
"""OpenAI Vector Store Chunk object.
|
||||
|
||||
:param id: Unique identifier for the chunk
|
||||
:param object: Object type identifier, always "vector_store.file.chunk"
|
||||
:param created_at: Timestamp when the chunk was created
|
||||
:param vector_store_id: ID of the vector store containing this chunk
|
||||
:param file_id: ID of the file containing this chunk
|
||||
:param content: The content of the chunk, using the same format as Chunk class
|
||||
:param metadata: Metadata associated with the chunk
|
||||
:param embedding: The embedding vector for the chunk
|
||||
"""
|
||||
|
||||
id: str
|
||||
object: str = "vector_store.file.chunk"
|
||||
created_at: int
|
||||
vector_store_id: str
|
||||
file_id: str
|
||||
content: InterleavedContent
|
||||
metadata: dict[str, Any] = Field(default_factory=dict)
|
||||
embedding: list[float] | None = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class VectorStoreListChunksResponse(BaseModel):
|
||||
"""Response from listing chunks in a vector store file.
|
||||
|
||||
:param object: Object type identifier, always "list"
|
||||
:param data: List of vector store chunk objects
|
||||
:param first_id: (Optional) ID of the first chunk in the list for pagination
|
||||
:param last_id: (Optional) ID of the last chunk in the list for pagination
|
||||
:param has_more: Whether there are more chunks available beyond this page
|
||||
"""
|
||||
|
||||
object: str = "list"
|
||||
data: list[VectorStoreChunkObject]
|
||||
first_id: str | None = None
|
||||
last_id: str | None = None
|
||||
has_more: bool = False
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class VectorStoreChunkUpdateRequest(BaseModel):
|
||||
"""Request to update a vector store chunk.
|
||||
|
||||
:param content: Updated content for the chunk
|
||||
:param metadata: Updated metadata for the chunk
|
||||
"""
|
||||
|
||||
content: InterleavedContent | None = None
|
||||
metadata: dict[str, Any] | None = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class VectorStoreChunkDeleteResponse(BaseModel):
|
||||
"""Response from deleting a vector store chunk.
|
||||
|
||||
:param id: Unique identifier of the deleted chunk
|
||||
:param object: Object type identifier for the deletion response
|
||||
:param deleted: Whether the deletion operation was successful
|
||||
"""
|
||||
|
||||
id: str
|
||||
object: str = "vector_store.file.chunk.deleted"
|
||||
deleted: bool = True
|
||||
|
||||
|
||||
class VectorDBStore(Protocol):
|
||||
def get_vector_db(self, vector_db_id: str) -> VectorDB | None: ...
|
||||
|
||||
|
@ -638,6 +706,28 @@ class VectorIO(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/chunks", method="GET")
|
||||
async def openai_list_vector_store_chunks(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
limit: int | None = 20,
|
||||
order: str | None = "desc",
|
||||
after: str | None = None,
|
||||
before: str | None = None,
|
||||
) -> VectorStoreListChunksResponse:
|
||||
"""List chunks in a vector store file.
|
||||
|
||||
:param vector_store_id: The ID of the vector store.
|
||||
:param file_id: The ID of the file.
|
||||
:param limit: Max number of chunks to return.
|
||||
:param order: Sort order.
|
||||
:param after: Pagination cursor.
|
||||
:param before: Pagination cursor.
|
||||
:returns: A VectorStoreListChunksResponse with the list of chunks.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/content", method="GET")
|
||||
async def openai_retrieve_vector_store_file_contents(
|
||||
self,
|
||||
|
@ -681,3 +771,55 @@ class VectorIO(Protocol):
|
|||
:returns: A VectorStoreFileDeleteResponse indicating the deletion status.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/chunks/{chunk_id}", method="GET")
|
||||
async def openai_retrieve_vector_store_chunk(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
chunk_id: str,
|
||||
) -> VectorStoreChunkObject:
|
||||
"""Retrieve a specific chunk from a vector store file.
|
||||
|
||||
:param vector_store_id: The ID of the vector store containing the chunk.
|
||||
:param file_id: The ID of the file containing the chunk.
|
||||
:param chunk_id: The ID of the chunk to retrieve.
|
||||
:returns: A VectorStoreChunkObject representing the chunk.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/chunks/{chunk_id}", method="POST")
|
||||
async def openai_update_vector_store_chunk(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
chunk_id: str,
|
||||
content: InterleavedContent | None = None,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
) -> VectorStoreChunkObject:
|
||||
"""Update a specific chunk in a vector store file.
|
||||
|
||||
:param vector_store_id: The ID of the vector store containing the chunk.
|
||||
:param file_id: The ID of the file containing the chunk.
|
||||
:param chunk_id: The ID of the chunk to update.
|
||||
:param content: Updated content for the chunk.
|
||||
:param metadata: Updated metadata for the chunk.
|
||||
:returns: A VectorStoreChunkObject representing the updated chunk.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/chunks/{chunk_id}", method="DELETE")
|
||||
async def openai_delete_vector_store_chunk(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
chunk_id: str,
|
||||
) -> VectorStoreChunkDeleteResponse:
|
||||
"""Delete a specific chunk from a vector store file.
|
||||
|
||||
:param vector_store_id: The ID of the vector store containing the chunk.
|
||||
:param file_id: The ID of the file containing the chunk.
|
||||
:param chunk_id: The ID of the chunk to delete.
|
||||
:returns: A VectorStoreChunkDeleteResponse indicating the deletion status.
|
||||
"""
|
||||
...
|
||||
|
|
|
@ -17,7 +17,9 @@ from llama_stack.apis.vector_io import (
|
|||
QueryChunksResponse,
|
||||
SearchRankingOptions,
|
||||
VectorIO,
|
||||
VectorStoreChunkDeleteResponse,
|
||||
VectorStoreChunkingStrategy,
|
||||
VectorStoreChunkObject,
|
||||
VectorStoreDeleteResponse,
|
||||
VectorStoreFileContentsResponse,
|
||||
VectorStoreFileDeleteResponse,
|
||||
|
@ -341,6 +343,68 @@ class VectorIORouter(VectorIO):
|
|||
file_id=file_id,
|
||||
)
|
||||
|
||||
async def openai_retrieve_vector_store_chunk(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
chunk_id: str,
|
||||
) -> VectorStoreChunkObject:
|
||||
logger.debug(f"VectorIORouter.openai_retrieve_vector_store_chunk: {vector_store_id}, {file_id}, {chunk_id}")
|
||||
return await self.routing_table.openai_retrieve_vector_store_chunk(
|
||||
vector_store_id=vector_store_id,
|
||||
file_id=file_id,
|
||||
chunk_id=chunk_id,
|
||||
)
|
||||
|
||||
async def openai_update_vector_store_chunk(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
chunk_id: str,
|
||||
content: InterleavedContent | None = None,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
) -> VectorStoreChunkObject:
|
||||
logger.debug(f"VectorIORouter.openai_update_vector_store_chunk: {vector_store_id}, {file_id}, {chunk_id}")
|
||||
return await self.routing_table.openai_update_vector_store_chunk(
|
||||
vector_store_id=vector_store_id,
|
||||
file_id=file_id,
|
||||
chunk_id=chunk_id,
|
||||
content=content,
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
async def openai_delete_vector_store_chunk(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
chunk_id: str,
|
||||
) -> VectorStoreChunkDeleteResponse:
|
||||
logger.debug(f"VectorIORouter.openai_delete_vector_store_chunk: {vector_store_id}, {file_id}, {chunk_id}")
|
||||
return await self.routing_table.openai_delete_vector_store_chunk(
|
||||
vector_store_id=vector_store_id,
|
||||
file_id=file_id,
|
||||
chunk_id=chunk_id,
|
||||
)
|
||||
|
||||
async def openai_list_vector_store_chunks(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
limit: int | None = 20,
|
||||
order: str | None = "desc",
|
||||
after: str | None = None,
|
||||
before: str | None = None,
|
||||
):
|
||||
logger.debug(f"VectorIORouter.openai_list_vector_store_chunks: {vector_store_id}, {file_id}")
|
||||
return await self.routing_table.openai_list_vector_store_chunks(
|
||||
vector_store_id=vector_store_id,
|
||||
file_id=file_id,
|
||||
limit=limit,
|
||||
order=order,
|
||||
after=after,
|
||||
before=before,
|
||||
)
|
||||
|
||||
async def health(self) -> dict[str, HealthResponse]:
|
||||
health_statuses = {}
|
||||
timeout = 1 # increasing the timeout to 1 second for health checks
|
||||
|
|
|
@ -13,13 +13,17 @@ from llama_stack.apis.models import ModelType
|
|||
from llama_stack.apis.resource import ResourceType
|
||||
from llama_stack.apis.vector_dbs import ListVectorDBsResponse, VectorDB, VectorDBs
|
||||
from llama_stack.apis.vector_io.vector_io import (
|
||||
InterleavedContent,
|
||||
SearchRankingOptions,
|
||||
VectorStoreChunkDeleteResponse,
|
||||
VectorStoreChunkingStrategy,
|
||||
VectorStoreChunkObject,
|
||||
VectorStoreDeleteResponse,
|
||||
VectorStoreFileContentsResponse,
|
||||
VectorStoreFileDeleteResponse,
|
||||
VectorStoreFileObject,
|
||||
VectorStoreFileStatus,
|
||||
VectorStoreListChunksResponse,
|
||||
VectorStoreObject,
|
||||
VectorStoreSearchResponsePage,
|
||||
)
|
||||
|
@ -227,3 +231,69 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl, VectorDBs):
|
|||
vector_store_id=vector_store_id,
|
||||
file_id=file_id,
|
||||
)
|
||||
|
||||
async def openai_retrieve_vector_store_chunk(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
chunk_id: str,
|
||||
) -> VectorStoreChunkObject:
|
||||
await self.assert_action_allowed("read", "vector_db", vector_store_id)
|
||||
provider = await self.get_provider_impl(vector_store_id)
|
||||
return await provider.openai_retrieve_vector_store_chunk(
|
||||
vector_store_id=vector_store_id,
|
||||
file_id=file_id,
|
||||
chunk_id=chunk_id,
|
||||
)
|
||||
|
||||
async def openai_update_vector_store_chunk(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
chunk_id: str,
|
||||
content: InterleavedContent | None = None,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
) -> VectorStoreChunkObject:
|
||||
await self.assert_action_allowed("update", "vector_db", vector_store_id)
|
||||
provider = await self.get_provider_impl(vector_store_id)
|
||||
return await provider.openai_update_vector_store_chunk(
|
||||
vector_store_id=vector_store_id,
|
||||
file_id=file_id,
|
||||
chunk_id=chunk_id,
|
||||
content=content,
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
async def openai_delete_vector_store_chunk(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
chunk_id: str,
|
||||
) -> VectorStoreChunkDeleteResponse:
|
||||
await self.assert_action_allowed("delete", "vector_db", vector_store_id)
|
||||
provider = await self.get_provider_impl(vector_store_id)
|
||||
return await provider.openai_delete_vector_store_chunk(
|
||||
vector_store_id=vector_store_id,
|
||||
file_id=file_id,
|
||||
chunk_id=chunk_id,
|
||||
)
|
||||
|
||||
async def openai_list_vector_store_chunks(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
limit: int | None = 20,
|
||||
order: str | None = "desc",
|
||||
after: str | None = None,
|
||||
before: str | None = None,
|
||||
) -> VectorStoreListChunksResponse:
|
||||
await self.assert_action_allowed("read", "vector_db", vector_store_id)
|
||||
provider = await self.get_provider_impl(vector_store_id)
|
||||
return await provider.openai_list_vector_store_chunks(
|
||||
vector_store_id=vector_store_id,
|
||||
file_id=file_id,
|
||||
limit=limit,
|
||||
order=order,
|
||||
after=after,
|
||||
before=before,
|
||||
)
|
||||
|
|
|
@ -15,14 +15,17 @@ from typing import Any
|
|||
|
||||
from llama_stack.apis.common.errors import VectorStoreNotFoundError
|
||||
from llama_stack.apis.files import Files, OpenAIFileObject
|
||||
from llama_stack.apis.inference import InterleavedContent
|
||||
from llama_stack.apis.vector_dbs import VectorDB
|
||||
from llama_stack.apis.vector_io import (
|
||||
Chunk,
|
||||
QueryChunksResponse,
|
||||
SearchRankingOptions,
|
||||
VectorStoreChunkDeleteResponse,
|
||||
VectorStoreChunkingStrategy,
|
||||
VectorStoreChunkingStrategyAuto,
|
||||
VectorStoreChunkingStrategyStatic,
|
||||
VectorStoreChunkObject,
|
||||
VectorStoreContent,
|
||||
VectorStoreDeleteResponse,
|
||||
VectorStoreFileContentsResponse,
|
||||
|
@ -31,6 +34,7 @@ from llama_stack.apis.vector_io import (
|
|||
VectorStoreFileLastError,
|
||||
VectorStoreFileObject,
|
||||
VectorStoreFileStatus,
|
||||
VectorStoreListChunksResponse,
|
||||
VectorStoreListFilesResponse,
|
||||
VectorStoreListResponse,
|
||||
VectorStoreObject,
|
||||
|
@ -109,7 +113,14 @@ class OpenAIVectorStoreMixin(ABC):
|
|||
assert self.kvstore
|
||||
meta_key = f"{OPENAI_VECTOR_STORES_FILES_PREFIX}{store_id}:{file_id}"
|
||||
await self.kvstore.set(key=meta_key, value=json.dumps(file_info))
|
||||
|
||||
# delete old file data to properly update content
|
||||
contents_prefix = f"{OPENAI_VECTOR_STORES_FILES_CONTENTS_PREFIX}{store_id}:{file_id}:"
|
||||
end_key = f"{contents_prefix}\xff"
|
||||
old_keys = await self.kvstore.keys_in_range(contents_prefix, end_key)
|
||||
for old_key in old_keys:
|
||||
await self.kvstore.delete(old_key)
|
||||
|
||||
for idx, chunk in enumerate(file_contents):
|
||||
await self.kvstore.set(key=f"{contents_prefix}{idx}", value=json.dumps(chunk))
|
||||
|
||||
|
@ -791,3 +802,233 @@ class OpenAIVectorStoreMixin(ABC):
|
|||
id=file_id,
|
||||
deleted=True,
|
||||
)
|
||||
|
||||
async def openai_retrieve_vector_store_chunk(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
chunk_id: str,
|
||||
) -> VectorStoreChunkObject:
|
||||
"""Retrieve a specific chunk from a vector store file."""
|
||||
if vector_store_id not in self.openai_vector_stores:
|
||||
raise VectorStoreNotFoundError(vector_store_id)
|
||||
|
||||
store_info = self.openai_vector_stores[vector_store_id]
|
||||
if file_id not in store_info["file_ids"]:
|
||||
raise ValueError(f"File {file_id} not found in vector store {vector_store_id}")
|
||||
|
||||
dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id)
|
||||
chunks = [Chunk.model_validate(c) for c in dict_chunks]
|
||||
|
||||
target_chunk = None
|
||||
for chunk in chunks:
|
||||
if chunk.chunk_id == chunk_id:
|
||||
target_chunk = chunk
|
||||
break
|
||||
|
||||
if target_chunk is None:
|
||||
raise ValueError(f"Chunk {chunk_id} not found in file {file_id}")
|
||||
|
||||
file_info = await self._load_openai_vector_store_file(vector_store_id, file_id)
|
||||
|
||||
return VectorStoreChunkObject(
|
||||
id=chunk_id,
|
||||
created_at=file_info.get("created_at", int(time.time())),
|
||||
vector_store_id=vector_store_id,
|
||||
file_id=file_id,
|
||||
content=target_chunk.content,
|
||||
metadata=target_chunk.metadata,
|
||||
embedding=target_chunk.embedding,
|
||||
)
|
||||
|
||||
async def openai_update_vector_store_chunk(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
chunk_id: str,
|
||||
content: InterleavedContent | None = None,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
) -> VectorStoreChunkObject:
|
||||
"""Update a specific chunk in a vector store file."""
|
||||
if vector_store_id not in self.openai_vector_stores:
|
||||
raise VectorStoreNotFoundError(vector_store_id)
|
||||
|
||||
store_info = self.openai_vector_stores[vector_store_id]
|
||||
if file_id not in store_info["file_ids"]:
|
||||
raise ValueError(f"File {file_id} not found in vector store {vector_store_id}")
|
||||
|
||||
dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id)
|
||||
chunks = [Chunk.model_validate(c) for c in dict_chunks]
|
||||
|
||||
target_chunk_index = None
|
||||
for i, chunk in enumerate(chunks):
|
||||
if chunk.chunk_id == chunk_id:
|
||||
target_chunk_index = i
|
||||
break
|
||||
|
||||
if target_chunk_index is None:
|
||||
raise ValueError(f"Chunk {chunk_id} not found in file {file_id}")
|
||||
|
||||
# updating content
|
||||
target_chunk = chunks[target_chunk_index]
|
||||
if content is not None:
|
||||
target_chunk.content = content
|
||||
# delete old chunk and update
|
||||
await self.delete_chunks(vector_store_id, [chunk_id])
|
||||
await self.insert_chunks(vector_store_id, [target_chunk])
|
||||
|
||||
if metadata is not None:
|
||||
target_chunk.metadata.update(metadata)
|
||||
|
||||
chunks[target_chunk_index] = target_chunk
|
||||
dict_chunks = [c.model_dump() for c in chunks]
|
||||
file_info = await self._load_openai_vector_store_file(vector_store_id, file_id)
|
||||
await self._save_openai_vector_store_file(vector_store_id, file_id, file_info, dict_chunks)
|
||||
|
||||
return VectorStoreChunkObject(
|
||||
id=chunk_id,
|
||||
created_at=file_info.get("created_at", int(time.time())),
|
||||
vector_store_id=vector_store_id,
|
||||
file_id=file_id,
|
||||
content=target_chunk.content,
|
||||
metadata=target_chunk.metadata,
|
||||
embedding=target_chunk.embedding,
|
||||
)
|
||||
|
||||
async def openai_delete_vector_store_chunk(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
chunk_id: str,
|
||||
) -> VectorStoreChunkDeleteResponse:
|
||||
"""Delete a specific chunk from a vector store file."""
|
||||
if vector_store_id not in self.openai_vector_stores:
|
||||
raise VectorStoreNotFoundError(vector_store_id)
|
||||
|
||||
store_info = self.openai_vector_stores[vector_store_id]
|
||||
if file_id not in store_info["file_ids"]:
|
||||
raise ValueError(f"File {file_id} not found in vector store {vector_store_id}")
|
||||
|
||||
dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id)
|
||||
chunks = [Chunk.model_validate(c) for c in dict_chunks]
|
||||
|
||||
target_chunk_index = None
|
||||
for i, chunk in enumerate(chunks):
|
||||
if chunk.chunk_id == chunk_id:
|
||||
target_chunk_index = i
|
||||
break
|
||||
|
||||
if target_chunk_index is None:
|
||||
raise ValueError(f"Chunk {chunk_id} not found in file {file_id}")
|
||||
|
||||
await self.delete_chunks(vector_store_id, [chunk_id])
|
||||
|
||||
dict_chunks.pop(target_chunk_index)
|
||||
file_info = await self._load_openai_vector_store_file(vector_store_id, file_id)
|
||||
await self._save_openai_vector_store_file(vector_store_id, file_id, file_info, dict_chunks)
|
||||
|
||||
return VectorStoreChunkDeleteResponse(
|
||||
id=chunk_id,
|
||||
deleted=True,
|
||||
)
|
||||
|
||||
async def openai_list_vector_store_chunks(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
limit: int | None = 20,
|
||||
order: str | None = "desc",
|
||||
after: str | None = None,
|
||||
before: str | None = None,
|
||||
) -> VectorStoreListChunksResponse:
|
||||
"""List chunks in a vector store file."""
|
||||
if vector_store_id not in self.openai_vector_stores:
|
||||
raise VectorStoreNotFoundError(vector_store_id)
|
||||
|
||||
store_info = self.openai_vector_stores[vector_store_id]
|
||||
if file_id not in store_info["file_ids"]:
|
||||
raise ValueError(f"File {file_id} not found in vector store {vector_store_id}")
|
||||
|
||||
dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id)
|
||||
file_info = await self._load_openai_vector_store_file(vector_store_id, file_id)
|
||||
|
||||
chunk_ids = []
|
||||
for dict_chunk in dict_chunks:
|
||||
chunk = Chunk.model_validate(dict_chunk)
|
||||
if chunk.chunk_id:
|
||||
chunk_ids.append(chunk.chunk_id)
|
||||
|
||||
# TODO: Add abstract method query_all_chunks() to properly filter by file_id and vector_db_id, this is a temporary hack
|
||||
chunks = []
|
||||
if chunk_ids:
|
||||
try:
|
||||
file_filter = {"type": "eq", "key": "file_id", "value": file_id}
|
||||
|
||||
query_result = await self.query_chunks(
|
||||
vector_db_id=vector_store_id,
|
||||
query="*", # wildcard query to get all chunks
|
||||
params={
|
||||
"max_chunks": len(chunk_ids) * 2,
|
||||
"score_threshold": 0.0,
|
||||
"filters": file_filter,
|
||||
},
|
||||
)
|
||||
|
||||
chunk_id_set = set(chunk_ids)
|
||||
chunks = [chunk for chunk in query_result.chunks if chunk.chunk_id in chunk_id_set]
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to query vector database for chunks: {e}")
|
||||
# Fallback to KV store chunks if vector DB query fails
|
||||
chunks = [Chunk.model_validate(c) for c in dict_chunks]
|
||||
|
||||
chunk_objects = []
|
||||
for chunk in chunks:
|
||||
chunk_obj = VectorStoreChunkObject(
|
||||
id=chunk.chunk_id,
|
||||
created_at=file_info.get("created_at", int(time.time())),
|
||||
vector_store_id=vector_store_id,
|
||||
file_id=file_id,
|
||||
content=chunk.content,
|
||||
metadata=chunk.metadata,
|
||||
embedding=chunk.embedding,
|
||||
)
|
||||
chunk_objects.append(chunk_obj)
|
||||
|
||||
if order == "desc":
|
||||
chunk_objects.sort(key=lambda x: x.created_at, reverse=True)
|
||||
else:
|
||||
chunk_objects.sort(key=lambda x: x.created_at)
|
||||
|
||||
start_idx = 0
|
||||
end_idx = len(chunk_objects)
|
||||
|
||||
if after:
|
||||
# find index after 'after' chunk
|
||||
for i, chunk_obj in enumerate(chunk_objects):
|
||||
if chunk_obj.id == after:
|
||||
start_idx = i + 1
|
||||
break
|
||||
|
||||
if before:
|
||||
# find index before 'before' chunk
|
||||
for i, chunk_obj in enumerate(chunk_objects):
|
||||
if chunk_obj.id == before:
|
||||
end_idx = i
|
||||
break
|
||||
|
||||
if limit:
|
||||
if end_idx - start_idx > limit:
|
||||
end_idx = start_idx + limit
|
||||
|
||||
paginated_chunks = chunk_objects[start_idx:end_idx]
|
||||
|
||||
first_id = paginated_chunks[0].id if paginated_chunks else None
|
||||
last_id = paginated_chunks[-1].id if paginated_chunks else None
|
||||
has_more = end_idx < len(chunk_objects)
|
||||
|
||||
return VectorStoreListChunksResponse(
|
||||
data=paginated_chunks,
|
||||
first_id=first_id,
|
||||
last_id=last_id,
|
||||
has_more=has_more,
|
||||
)
|
||||
|
|
|
@ -1,9 +1,11 @@
|
|||
"use client";
|
||||
|
||||
import { useRouter } from "next/navigation";
|
||||
import type { VectorStore } from "llama-stack-client/resources/vector-stores/vector-stores";
|
||||
import type { VectorStoreFile } from "llama-stack-client/resources/vector-stores/files";
|
||||
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
|
||||
import { Skeleton } from "@/components/ui/skeleton";
|
||||
import { Button } from "@/components/ui/button";
|
||||
import {
|
||||
DetailLoadingView,
|
||||
DetailErrorView,
|
||||
|
@ -42,6 +44,11 @@ export function VectorStoreDetailView({
|
|||
id,
|
||||
}: VectorStoreDetailViewProps) {
|
||||
const title = "Vector Store Details";
|
||||
const router = useRouter();
|
||||
|
||||
const handleFileClick = (fileId: string) => {
|
||||
router.push(`/logs/vector-stores/${id}/files/${fileId}`);
|
||||
};
|
||||
|
||||
if (errorStore) {
|
||||
return <DetailErrorView title={title} id={id} error={errorStore} />;
|
||||
|
@ -80,7 +87,15 @@ export function VectorStoreDetailView({
|
|||
<TableBody>
|
||||
{files.map((file) => (
|
||||
<TableRow key={file.id}>
|
||||
<TableCell>{file.id}</TableCell>
|
||||
<TableCell>
|
||||
<Button
|
||||
variant="link"
|
||||
className="p-0 h-auto font-mono text-blue-600 hover:text-blue-800 dark:text-blue-400 dark:hover:text-blue-300"
|
||||
onClick={() => handleFileClick(file.id)}
|
||||
>
|
||||
{file.id}
|
||||
</Button>
|
||||
</TableCell>
|
||||
<TableCell>{file.status}</TableCell>
|
||||
<TableCell>
|
||||
{new Date(file.created_at * 1000).toLocaleString()}
|
||||
|
|
|
@ -11,6 +11,7 @@ from unittest.mock import AsyncMock
|
|||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from llama_stack.apis.common.errors import VectorStoreNotFoundError
|
||||
from llama_stack.apis.vector_dbs import VectorDB
|
||||
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse
|
||||
from llama_stack.providers.remote.vector_io.milvus.milvus import VECTOR_DBS_PREFIX
|
||||
|
@ -294,3 +295,509 @@ async def test_delete_openai_vector_store_file_from_storage(vector_io_adapter, t
|
|||
assert loaded_file_info == {}
|
||||
loaded_contents = await vector_io_adapter._load_openai_vector_store_file_contents(store_id, file_id)
|
||||
assert loaded_contents == []
|
||||
|
||||
|
||||
async def test_openai_retrieve_vector_store_chunk(vector_io_adapter):
|
||||
"""Test retrieving a specific chunk from a vector store file."""
|
||||
store_id = "vs_1234"
|
||||
file_id = "file_1234"
|
||||
chunk_id = "chunk_001"
|
||||
|
||||
store_info = {
|
||||
"id": store_id,
|
||||
"file_ids": [file_id],
|
||||
"created_at": int(time.time()),
|
||||
}
|
||||
vector_io_adapter.openai_vector_stores[store_id] = store_info
|
||||
|
||||
file_info = {
|
||||
"id": file_id,
|
||||
"status": "completed",
|
||||
"vector_store_id": store_id,
|
||||
"filename": "test_file.txt",
|
||||
"created_at": int(time.time()),
|
||||
}
|
||||
|
||||
file_contents = [
|
||||
{
|
||||
"content": "First chunk content",
|
||||
"stored_chunk_id": chunk_id,
|
||||
"metadata": {"file_id": file_id, "position": 0},
|
||||
"chunk_metadata": {"chunk_id": chunk_id},
|
||||
},
|
||||
{
|
||||
"content": "Second chunk content",
|
||||
"stored_chunk_id": "chunk_002",
|
||||
"metadata": {"file_id": file_id, "position": 1},
|
||||
"chunk_metadata": {"chunk_id": "chunk_002"},
|
||||
},
|
||||
]
|
||||
|
||||
await vector_io_adapter._save_openai_vector_store_file(store_id, file_id, file_info, file_contents)
|
||||
|
||||
chunk_object = await vector_io_adapter.openai_retrieve_vector_store_chunk(
|
||||
vector_store_id=store_id, file_id=file_id, chunk_id=chunk_id
|
||||
)
|
||||
|
||||
assert chunk_object.id == chunk_id
|
||||
assert chunk_object.vector_store_id == store_id
|
||||
assert chunk_object.file_id == file_id
|
||||
assert chunk_object.object == "vector_store.file.chunk"
|
||||
assert len(chunk_object.content) > 0
|
||||
assert chunk_object.content[0].type == "text"
|
||||
assert chunk_object.content[0].text == "First chunk content"
|
||||
assert chunk_object.metadata["file_id"] == file_id
|
||||
assert chunk_object.metadata["position"] == 0
|
||||
|
||||
|
||||
async def test_openai_retrieve_vector_store_chunk_not_found(vector_io_adapter):
|
||||
"""Test retrieving a non-existent chunk raises appropriate error."""
|
||||
store_id = "vs_1234"
|
||||
file_id = "file_1234"
|
||||
chunk_id = "nonexistent_chunk"
|
||||
|
||||
store_info = {
|
||||
"id": store_id,
|
||||
"file_ids": [file_id],
|
||||
"created_at": int(time.time()),
|
||||
}
|
||||
vector_io_adapter.openai_vector_stores[store_id] = store_info
|
||||
|
||||
file_info = {"id": file_id, "created_at": int(time.time())}
|
||||
await vector_io_adapter._save_openai_vector_store_file(store_id, file_id, file_info, [])
|
||||
|
||||
with pytest.raises(ValueError, match="Chunk nonexistent_chunk not found"):
|
||||
await vector_io_adapter.openai_retrieve_vector_store_chunk(
|
||||
vector_store_id=store_id, file_id=file_id, chunk_id=chunk_id
|
||||
)
|
||||
|
||||
|
||||
async def test_openai_update_vector_store_chunk_metadata_only(vector_io_adapter):
|
||||
"""Test updating only the metadata of a chunk."""
|
||||
store_id = "vs_1234"
|
||||
file_id = "file_1234"
|
||||
chunk_id = "chunk_001"
|
||||
|
||||
store_info = {
|
||||
"id": store_id,
|
||||
"file_ids": [file_id],
|
||||
"created_at": int(time.time()),
|
||||
}
|
||||
vector_io_adapter.openai_vector_stores[store_id] = store_info
|
||||
|
||||
file_info = {
|
||||
"id": file_id,
|
||||
"status": "completed",
|
||||
"vector_store_id": store_id,
|
||||
"filename": "test_file.txt",
|
||||
"created_at": int(time.time()),
|
||||
}
|
||||
|
||||
original_content = "Original chunk content"
|
||||
file_contents = [
|
||||
{
|
||||
"content": original_content,
|
||||
"stored_chunk_id": chunk_id,
|
||||
"metadata": {"file_id": file_id, "version": 1},
|
||||
"chunk_metadata": {"chunk_id": chunk_id},
|
||||
}
|
||||
]
|
||||
|
||||
await vector_io_adapter._save_openai_vector_store_file(store_id, file_id, file_info, file_contents)
|
||||
|
||||
vector_io_adapter.delete_chunks = AsyncMock()
|
||||
vector_io_adapter.insert_chunks = AsyncMock()
|
||||
|
||||
new_metadata = {"file_id": file_id, "version": 2, "updated": True}
|
||||
updated_chunk = await vector_io_adapter.openai_update_vector_store_chunk(
|
||||
vector_store_id=store_id, file_id=file_id, chunk_id=chunk_id, metadata=new_metadata
|
||||
)
|
||||
|
||||
vector_io_adapter.delete_chunks.assert_not_called()
|
||||
vector_io_adapter.insert_chunks.assert_not_called()
|
||||
|
||||
assert updated_chunk.id == chunk_id
|
||||
assert updated_chunk.metadata["version"] == 2
|
||||
assert updated_chunk.metadata["updated"] is True
|
||||
assert updated_chunk.content[0].text == original_content
|
||||
|
||||
|
||||
async def test_openai_update_vector_store_chunk_content(vector_io_adapter):
|
||||
"""Test updating the content of a chunk."""
|
||||
store_id = "vs_1234"
|
||||
file_id = "file_1234"
|
||||
chunk_id = "chunk_001"
|
||||
|
||||
store_info = {
|
||||
"id": store_id,
|
||||
"file_ids": [file_id],
|
||||
"created_at": int(time.time()),
|
||||
}
|
||||
vector_io_adapter.openai_vector_stores[store_id] = store_info
|
||||
|
||||
file_info = {
|
||||
"id": file_id,
|
||||
"status": "completed",
|
||||
"vector_store_id": store_id,
|
||||
"filename": "test_file.txt",
|
||||
"created_at": int(time.time()),
|
||||
}
|
||||
|
||||
file_contents = [
|
||||
{
|
||||
"content": "Original chunk content",
|
||||
"stored_chunk_id": chunk_id,
|
||||
"metadata": {"file_id": file_id},
|
||||
"chunk_metadata": {"chunk_id": chunk_id},
|
||||
}
|
||||
]
|
||||
|
||||
await vector_io_adapter._save_openai_vector_store_file(store_id, file_id, file_info, file_contents)
|
||||
|
||||
vector_io_adapter.delete_chunks = AsyncMock()
|
||||
vector_io_adapter.insert_chunks = AsyncMock()
|
||||
|
||||
new_content = "Updated chunk content"
|
||||
updated_chunk = await vector_io_adapter.openai_update_vector_store_chunk(
|
||||
vector_store_id=store_id, file_id=file_id, chunk_id=chunk_id, content=new_content
|
||||
)
|
||||
|
||||
vector_io_adapter.delete_chunks.assert_awaited_once_with(store_id, [chunk_id])
|
||||
vector_io_adapter.insert_chunks.assert_awaited_once()
|
||||
|
||||
assert updated_chunk.id == chunk_id
|
||||
assert updated_chunk.content[0].text == new_content
|
||||
|
||||
|
||||
async def test_openai_update_vector_store_chunk_both_content_and_metadata(vector_io_adapter):
|
||||
"""Test updating both content and metadata of a chunk."""
|
||||
store_id = "vs_1234"
|
||||
file_id = "file_1234"
|
||||
chunk_id = "chunk_001"
|
||||
|
||||
store_info = {
|
||||
"id": store_id,
|
||||
"file_ids": [file_id],
|
||||
"created_at": int(time.time()),
|
||||
}
|
||||
vector_io_adapter.openai_vector_stores[store_id] = store_info
|
||||
|
||||
file_info = {
|
||||
"id": file_id,
|
||||
"status": "completed",
|
||||
"vector_store_id": store_id,
|
||||
"filename": "test_file.txt",
|
||||
"created_at": int(time.time()),
|
||||
}
|
||||
|
||||
file_contents = [
|
||||
{
|
||||
"content": "Original chunk content",
|
||||
"stored_chunk_id": chunk_id,
|
||||
"metadata": {"file_id": file_id, "version": 1},
|
||||
"chunk_metadata": {"chunk_id": chunk_id},
|
||||
}
|
||||
]
|
||||
|
||||
await vector_io_adapter._save_openai_vector_store_file(store_id, file_id, file_info, file_contents)
|
||||
|
||||
vector_io_adapter.delete_chunks = AsyncMock()
|
||||
vector_io_adapter.insert_chunks = AsyncMock()
|
||||
|
||||
new_content = "Updated chunk content"
|
||||
new_metadata = {"file_id": file_id, "version": 2, "updated": True}
|
||||
updated_chunk = await vector_io_adapter.openai_update_vector_store_chunk(
|
||||
vector_store_id=store_id, file_id=file_id, chunk_id=chunk_id, content=new_content, metadata=new_metadata
|
||||
)
|
||||
|
||||
vector_io_adapter.delete_chunks.assert_awaited_once_with(store_id, [chunk_id])
|
||||
vector_io_adapter.insert_chunks.assert_awaited_once()
|
||||
|
||||
assert updated_chunk.id == chunk_id
|
||||
assert updated_chunk.content[0].text == new_content
|
||||
assert updated_chunk.metadata["version"] == 2
|
||||
assert updated_chunk.metadata["updated"] is True
|
||||
|
||||
|
||||
async def test_openai_delete_vector_store_chunk(vector_io_adapter):
|
||||
"""Test deleting a specific chunk from a vector store file."""
|
||||
store_id = "vs_1234"
|
||||
file_id = "file_1234"
|
||||
chunk_id_to_delete = "chunk_001"
|
||||
chunk_id_to_keep = "chunk_002"
|
||||
|
||||
store_info = {
|
||||
"id": store_id,
|
||||
"file_ids": [file_id],
|
||||
"created_at": int(time.time()),
|
||||
}
|
||||
vector_io_adapter.openai_vector_stores[store_id] = store_info
|
||||
|
||||
file_info = {
|
||||
"id": file_id,
|
||||
"status": "completed",
|
||||
"vector_store_id": store_id,
|
||||
"filename": "test_file.txt",
|
||||
"created_at": int(time.time()),
|
||||
}
|
||||
|
||||
file_contents = [
|
||||
{
|
||||
"content": "First chunk content",
|
||||
"stored_chunk_id": chunk_id_to_delete,
|
||||
"metadata": {"file_id": file_id, "position": 0},
|
||||
"chunk_metadata": {"chunk_id": chunk_id_to_delete},
|
||||
},
|
||||
{
|
||||
"content": "Second chunk content",
|
||||
"stored_chunk_id": chunk_id_to_keep,
|
||||
"metadata": {"file_id": file_id, "position": 1},
|
||||
"chunk_metadata": {"chunk_id": chunk_id_to_keep},
|
||||
},
|
||||
]
|
||||
|
||||
await vector_io_adapter._save_openai_vector_store_file(store_id, file_id, file_info, file_contents)
|
||||
|
||||
vector_io_adapter.delete_chunks = AsyncMock()
|
||||
|
||||
delete_response = await vector_io_adapter.openai_delete_vector_store_chunk(
|
||||
vector_store_id=store_id, file_id=file_id, chunk_id=chunk_id_to_delete
|
||||
)
|
||||
|
||||
vector_io_adapter.delete_chunks.assert_awaited_once_with(store_id, [chunk_id_to_delete])
|
||||
|
||||
assert delete_response.id == chunk_id_to_delete
|
||||
assert delete_response.object == "vector_store.file.chunk.deleted"
|
||||
assert delete_response.deleted is True
|
||||
|
||||
remaining_contents = await vector_io_adapter._load_openai_vector_store_file_contents(store_id, file_id)
|
||||
assert len(remaining_contents) == 1
|
||||
assert remaining_contents[0]["stored_chunk_id"] == chunk_id_to_keep
|
||||
|
||||
|
||||
async def test_openai_delete_vector_store_chunk_not_found(vector_io_adapter):
|
||||
"""Test deleting a non-existent chunk raises appropriate error."""
|
||||
store_id = "vs_1234"
|
||||
file_id = "file_1234"
|
||||
chunk_id = "nonexistent_chunk"
|
||||
|
||||
store_info = {
|
||||
"id": store_id,
|
||||
"file_ids": [file_id],
|
||||
"created_at": int(time.time()),
|
||||
}
|
||||
vector_io_adapter.openai_vector_stores[store_id] = store_info
|
||||
|
||||
file_info = {"id": file_id, "created_at": int(time.time())}
|
||||
await vector_io_adapter._save_openai_vector_store_file(store_id, file_id, file_info, [])
|
||||
|
||||
with pytest.raises(ValueError, match="Chunk nonexistent_chunk not found"):
|
||||
await vector_io_adapter.openai_delete_vector_store_chunk(
|
||||
vector_store_id=store_id, file_id=file_id, chunk_id=chunk_id
|
||||
)
|
||||
|
||||
|
||||
async def test_chunk_operations_with_nonexistent_vector_store(vector_io_adapter):
|
||||
"""Test that chunk operations raise errors for non-existent vector stores."""
|
||||
|
||||
store_id = "nonexistent_store"
|
||||
file_id = "file_1234"
|
||||
chunk_id = "chunk_001"
|
||||
|
||||
with pytest.raises(VectorStoreNotFoundError):
|
||||
await vector_io_adapter.openai_retrieve_vector_store_chunk(
|
||||
vector_store_id=store_id, file_id=file_id, chunk_id=chunk_id
|
||||
)
|
||||
|
||||
with pytest.raises(VectorStoreNotFoundError):
|
||||
await vector_io_adapter.openai_update_vector_store_chunk(
|
||||
vector_store_id=store_id, file_id=file_id, chunk_id=chunk_id, metadata={"test": "value"}
|
||||
)
|
||||
|
||||
with pytest.raises(VectorStoreNotFoundError):
|
||||
await vector_io_adapter.openai_delete_vector_store_chunk(
|
||||
vector_store_id=store_id, file_id=file_id, chunk_id=chunk_id
|
||||
)
|
||||
|
||||
|
||||
async def test_chunk_operations_with_nonexistent_file(vector_io_adapter):
|
||||
"""Test that chunk operations raise errors for non-existent files."""
|
||||
store_id = "vs_1234"
|
||||
file_id = "nonexistent_file"
|
||||
chunk_id = "chunk_001"
|
||||
|
||||
store_info = {
|
||||
"id": store_id,
|
||||
"file_ids": [],
|
||||
"created_at": int(time.time()),
|
||||
}
|
||||
vector_io_adapter.openai_vector_stores[store_id] = store_info
|
||||
|
||||
with pytest.raises(ValueError, match=f"File {file_id} not found in vector store"):
|
||||
await vector_io_adapter.openai_retrieve_vector_store_chunk(
|
||||
vector_store_id=store_id, file_id=file_id, chunk_id=chunk_id
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match=f"File {file_id} not found in vector store"):
|
||||
await vector_io_adapter.openai_update_vector_store_chunk(
|
||||
vector_store_id=store_id, file_id=file_id, chunk_id=chunk_id, metadata={"test": "value"}
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match=f"File {file_id} not found in vector store"):
|
||||
await vector_io_adapter.openai_delete_vector_store_chunk(
|
||||
vector_store_id=store_id, file_id=file_id, chunk_id=chunk_id
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match=f"File {file_id} not found in vector store"):
|
||||
await vector_io_adapter.openai_list_vector_store_chunks(vector_store_id=store_id, file_id=file_id)
|
||||
|
||||
|
||||
async def test_openai_list_vector_store_chunks(vector_io_adapter):
|
||||
"""Test listing chunks in a vector store file."""
|
||||
store_id = "test_store_123"
|
||||
await vector_io_adapter.openai_create_vector_store(
|
||||
vector_store_id=store_id,
|
||||
name="Test Store",
|
||||
embedding_model="test_model",
|
||||
embedding_dimension=512,
|
||||
)
|
||||
|
||||
test_content = "This is test content for chunk listing."
|
||||
test_metadata = {"source": "test_file", "chunk_number": 1}
|
||||
test_embedding = [0.1] * 512
|
||||
|
||||
chunk1 = Chunk(
|
||||
content=test_content + " First chunk.",
|
||||
metadata={**test_metadata, "chunk_id": 1},
|
||||
embedding=test_embedding,
|
||||
chunk_id="chunk_1",
|
||||
)
|
||||
chunk2 = Chunk(
|
||||
content=test_content + " Second chunk.",
|
||||
metadata={**test_metadata, "chunk_id": 2},
|
||||
embedding=[0.2] * 512,
|
||||
chunk_id="chunk_2",
|
||||
)
|
||||
chunk3 = Chunk(
|
||||
content=test_content + " Third chunk.",
|
||||
metadata={**test_metadata, "chunk_id": 3},
|
||||
embedding=[0.3] * 512,
|
||||
chunk_id="chunk_3",
|
||||
)
|
||||
|
||||
await vector_io_adapter.insert_chunks(store_id, [chunk1, chunk2, chunk3])
|
||||
|
||||
file_id = "test_file_456"
|
||||
file_info = {
|
||||
"id": file_id,
|
||||
"object": "vector_store.file",
|
||||
"created_at": int(time.time()),
|
||||
"vector_store_id": store_id,
|
||||
"status": "completed",
|
||||
"usage_bytes": 1024,
|
||||
"chunking_strategy": {"type": "static", "static": {"max_chunk_size_tokens": 800, "chunk_overlap_tokens": 400}},
|
||||
"filename": "test_file.txt",
|
||||
}
|
||||
|
||||
dict_chunks = [chunk1.model_dump(), chunk2.model_dump(), chunk3.model_dump()]
|
||||
await vector_io_adapter._save_openai_vector_store_file(store_id, file_id, file_info, dict_chunks)
|
||||
|
||||
vector_io_adapter.openai_vector_stores[store_id]["file_ids"].append(file_id)
|
||||
|
||||
response = await vector_io_adapter.openai_list_vector_store_chunks(vector_store_id=store_id, file_id=file_id)
|
||||
|
||||
assert response.object == "list"
|
||||
assert len(response.data) == 3
|
||||
assert response.has_more is False
|
||||
assert response.first_id is not None
|
||||
assert response.last_id is not None
|
||||
|
||||
chunk_ids = [chunk.id for chunk in response.data]
|
||||
assert "chunk_1" in chunk_ids
|
||||
assert "chunk_2" in chunk_ids
|
||||
assert "chunk_3" in chunk_ids
|
||||
|
||||
for chunk in response.data:
|
||||
assert chunk.embedding is not None
|
||||
assert len(chunk.embedding) == 512
|
||||
assert chunk.vector_store_id == store_id
|
||||
assert chunk.file_id == file_id
|
||||
|
||||
limited_response = await vector_io_adapter.openai_list_vector_store_chunks(
|
||||
vector_store_id=store_id, file_id=file_id, limit=2
|
||||
)
|
||||
|
||||
assert len(limited_response.data) == 2
|
||||
assert limited_response.has_more is True
|
||||
|
||||
desc_response = await vector_io_adapter.openai_list_vector_store_chunks(
|
||||
vector_store_id=store_id, file_id=file_id, order="desc"
|
||||
)
|
||||
|
||||
assert len(desc_response.data) == 3
|
||||
|
||||
asc_response = await vector_io_adapter.openai_list_vector_store_chunks(
|
||||
vector_store_id=store_id, file_id=file_id, order="asc"
|
||||
)
|
||||
|
||||
assert len(asc_response.data) == 3
|
||||
|
||||
first_chunk_id = response.data[0].id
|
||||
after_response = await vector_io_adapter.openai_list_vector_store_chunks(
|
||||
vector_store_id=store_id, file_id=file_id, after=first_chunk_id
|
||||
)
|
||||
|
||||
assert len(after_response.data) <= 2
|
||||
after_chunk_ids = [chunk.id for chunk in after_response.data]
|
||||
assert first_chunk_id not in after_chunk_ids
|
||||
|
||||
|
||||
async def test_openai_list_vector_store_chunks_empty_file(vector_io_adapter):
|
||||
"""Test listing chunks in an empty file."""
|
||||
store_id = "test_store_empty"
|
||||
await vector_io_adapter.openai_create_vector_store(
|
||||
vector_store_id=store_id,
|
||||
name="Test Store",
|
||||
embedding_model="test_model",
|
||||
embedding_dimension=512,
|
||||
)
|
||||
|
||||
file_id = "empty_file"
|
||||
file_info = {
|
||||
"id": file_id,
|
||||
"object": "vector_store.file",
|
||||
"created_at": int(time.time()),
|
||||
"vector_store_id": store_id,
|
||||
"status": "completed",
|
||||
"usage_bytes": 0,
|
||||
"chunking_strategy": {"type": "static", "static": {"max_chunk_size_tokens": 800, "chunk_overlap_tokens": 400}},
|
||||
"filename": "empty_file.txt",
|
||||
}
|
||||
|
||||
await vector_io_adapter._save_openai_vector_store_file(store_id, file_id, file_info, [])
|
||||
|
||||
vector_io_adapter.openai_vector_stores[store_id]["file_ids"].append(file_id)
|
||||
|
||||
response = await vector_io_adapter.openai_list_vector_store_chunks(vector_store_id=store_id, file_id=file_id)
|
||||
|
||||
assert response.object == "list"
|
||||
assert len(response.data) == 0
|
||||
assert response.has_more is False
|
||||
assert response.first_id is None
|
||||
assert response.last_id is None
|
||||
|
||||
|
||||
async def test_openai_list_vector_store_chunks_nonexistent_resources(vector_io_adapter):
|
||||
with pytest.raises(VectorStoreNotFoundError):
|
||||
await vector_io_adapter.openai_list_vector_store_chunks(vector_store_id="nonexistent_store", file_id="any_file")
|
||||
|
||||
store_id = "test_store_list"
|
||||
await vector_io_adapter.openai_create_vector_store(
|
||||
vector_store_id=store_id,
|
||||
name="Test Store",
|
||||
embedding_model="test_model",
|
||||
embedding_dimension=512,
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match="File nonexistent_file not found in vector store"):
|
||||
await vector_io_adapter.openai_list_vector_store_chunks(vector_store_id=store_id, file_id="nonexistent_file")
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue