feat: Enable ingestion of precomputed embeddings (#2317)

This commit is contained in:
Francisco Arceo 2025-05-31 04:03:37 -06:00 committed by GitHub
parent 31ce208bda
commit f328436831
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 366 additions and 15 deletions

View file

@ -10020,7 +10020,8 @@
"type": "object",
"properties": {
"content": {
"$ref": "#/components/schemas/InterleavedContent"
"$ref": "#/components/schemas/InterleavedContent",
"description": "The content of the chunk, which can be interleaved text, images, or other types."
},
"metadata": {
"type": "object",
@ -10045,7 +10046,15 @@
"type": "object"
}
]
}
},
"description": "Metadata associated with the chunk, such as document ID, source, or other relevant information."
},
"embedding": {
"type": "array",
"items": {
"type": "number"
},
"description": "Optional embedding for the chunk. If not provided, it will be computed later."
}
},
"additionalProperties": false,
@ -10053,9 +10062,10 @@
"content",
"metadata"
],
"title": "Chunk"
"title": "Chunk",
"description": "A chunk of content that can be inserted into a vector database."
},
"description": "The chunks to insert."
"description": "The chunks to insert. Each `Chunk` should contain content which can be interleaved text, images, or other types. `metadata`: `dict[str, Any]` and `embedding`: `List[float]` are optional. If `metadata` is provided, you configure how Llama Stack formats the chunk during generation. If `embedding` is not provided, it will be computed later."
},
"ttl_seconds": {
"type": "integer",
@ -12285,7 +12295,8 @@
"type": "object",
"properties": {
"content": {
"$ref": "#/components/schemas/InterleavedContent"
"$ref": "#/components/schemas/InterleavedContent",
"description": "The content of the chunk, which can be interleaved text, images, or other types."
},
"metadata": {
"type": "object",
@ -12310,7 +12321,15 @@
"type": "object"
}
]
}
},
"description": "Metadata associated with the chunk, such as document ID, source, or other relevant information."
},
"embedding": {
"type": "array",
"items": {
"type": "number"
},
"description": "Optional embedding for the chunk. If not provided, it will be computed later."
}
},
"additionalProperties": false,
@ -12318,7 +12337,8 @@
"content",
"metadata"
],
"title": "Chunk"
"title": "Chunk",
"description": "A chunk of content that can be inserted into a vector database."
}
},
"scores": {

View file

@ -7024,6 +7024,9 @@ components:
properties:
content:
$ref: '#/components/schemas/InterleavedContent'
description: >-
The content of the chunk, which can be interleaved text, images,
or other types.
metadata:
type: object
additionalProperties:
@ -7034,12 +7037,29 @@ components:
- type: string
- type: array
- type: object
description: >-
Metadata associated with the chunk, such as document ID, source,
or other relevant information.
embedding:
type: array
items:
type: number
description: >-
Optional embedding for the chunk. If not provided, it will be computed
later.
additionalProperties: false
required:
- content
- metadata
title: Chunk
description: The chunks to insert.
description: >-
A chunk of content that can be inserted into a vector database.
description: >-
The chunks to insert. Each `Chunk` should contain content which can be
interleaved text, images, or other types. `metadata`: `dict[str, Any]`
and `embedding`: `List[float]` are optional. If `metadata` is provided,
you configure how Llama Stack formats the chunk during generation. If
`embedding` is not provided, it will be computed later.
ttl_seconds:
type: integer
description: The time to live of the chunks.
@ -8537,6 +8557,9 @@ components:
properties:
content:
$ref: '#/components/schemas/InterleavedContent'
description: >-
The content of the chunk, which can be interleaved text, images,
or other types.
metadata:
type: object
additionalProperties:
@ -8547,11 +8570,23 @@ components:
- type: string
- type: array
- type: object
description: >-
Metadata associated with the chunk, such as document ID, source,
or other relevant information.
embedding:
type: array
items:
type: number
description: >-
Optional embedding for the chunk. If not provided, it will be computed
later.
additionalProperties: false
required:
- content
- metadata
title: Chunk
description: >-
A chunk of content that can be inserted into a vector database.
scores:
type: array
items:

View file

@ -57,6 +57,31 @@ chunks = [
]
client.vector_io.insert(vector_db_id=vector_db_id, chunks=chunks)
```
#### Using Precomputed Embeddings
If you decide to precompute embeddings for your documents, you can insert them directly into the vector database by
including the embedding vectors in the chunk data. This is useful if you have a separate embedding service or if you
want to customize the ingestion process.
```python
chunks_with_embeddings = [
{
"content": "First chunk of text",
"mime_type": "text/plain",
"embedding": [0.1, 0.2, 0.3, ...], # Your precomputed embedding vector
"metadata": {"document_id": "doc1", "section": "introduction"},
},
{
"content": "Second chunk of text",
"mime_type": "text/plain",
"embedding": [0.2, 0.3, 0.4, ...], # Your precomputed embedding vector
"metadata": {"document_id": "doc1", "section": "methodology"},
},
]
client.vector_io.insert(vector_db_id=vector_db_id, chunks=chunks_with_embeddings)
```
When providing precomputed embeddings, ensure the embedding dimension matches the embedding_dimension specified when
registering the vector database.
### Retrieval
You can query the vector database to retrieve documents based on their embeddings.
```python