forked from phoenix-oss/llama-stack-mirror
feat: Enable ingestion of precomputed embeddings (#2317)
This commit is contained in:
parent
31ce208bda
commit
f328436831
9 changed files with 366 additions and 15 deletions
34
docs/_static/llama-stack-spec.html
vendored
34
docs/_static/llama-stack-spec.html
vendored
|
@ -10020,7 +10020,8 @@
|
|||
"type": "object",
|
||||
"properties": {
|
||||
"content": {
|
||||
"$ref": "#/components/schemas/InterleavedContent"
|
||||
"$ref": "#/components/schemas/InterleavedContent",
|
||||
"description": "The content of the chunk, which can be interleaved text, images, or other types."
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
|
@ -10045,7 +10046,15 @@
|
|||
"type": "object"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"description": "Metadata associated with the chunk, such as document ID, source, or other relevant information."
|
||||
},
|
||||
"embedding": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "number"
|
||||
},
|
||||
"description": "Optional embedding for the chunk. If not provided, it will be computed later."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
@ -10053,9 +10062,10 @@
|
|||
"content",
|
||||
"metadata"
|
||||
],
|
||||
"title": "Chunk"
|
||||
"title": "Chunk",
|
||||
"description": "A chunk of content that can be inserted into a vector database."
|
||||
},
|
||||
"description": "The chunks to insert."
|
||||
"description": "The chunks to insert. Each `Chunk` should contain content which can be interleaved text, images, or other types. `metadata`: `dict[str, Any]` and `embedding`: `List[float]` are optional. If `metadata` is provided, you configure how Llama Stack formats the chunk during generation. If `embedding` is not provided, it will be computed later."
|
||||
},
|
||||
"ttl_seconds": {
|
||||
"type": "integer",
|
||||
|
@ -12285,7 +12295,8 @@
|
|||
"type": "object",
|
||||
"properties": {
|
||||
"content": {
|
||||
"$ref": "#/components/schemas/InterleavedContent"
|
||||
"$ref": "#/components/schemas/InterleavedContent",
|
||||
"description": "The content of the chunk, which can be interleaved text, images, or other types."
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
|
@ -12310,7 +12321,15 @@
|
|||
"type": "object"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"description": "Metadata associated with the chunk, such as document ID, source, or other relevant information."
|
||||
},
|
||||
"embedding": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "number"
|
||||
},
|
||||
"description": "Optional embedding for the chunk. If not provided, it will be computed later."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
@ -12318,7 +12337,8 @@
|
|||
"content",
|
||||
"metadata"
|
||||
],
|
||||
"title": "Chunk"
|
||||
"title": "Chunk",
|
||||
"description": "A chunk of content that can be inserted into a vector database."
|
||||
}
|
||||
},
|
||||
"scores": {
|
||||
|
|
37
docs/_static/llama-stack-spec.yaml
vendored
37
docs/_static/llama-stack-spec.yaml
vendored
|
@ -7024,6 +7024,9 @@ components:
|
|||
properties:
|
||||
content:
|
||||
$ref: '#/components/schemas/InterleavedContent'
|
||||
description: >-
|
||||
The content of the chunk, which can be interleaved text, images,
|
||||
or other types.
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
|
@ -7034,12 +7037,29 @@ components:
|
|||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: >-
|
||||
Metadata associated with the chunk, such as document ID, source,
|
||||
or other relevant information.
|
||||
embedding:
|
||||
type: array
|
||||
items:
|
||||
type: number
|
||||
description: >-
|
||||
Optional embedding for the chunk. If not provided, it will be computed
|
||||
later.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- content
|
||||
- metadata
|
||||
title: Chunk
|
||||
description: The chunks to insert.
|
||||
description: >-
|
||||
A chunk of content that can be inserted into a vector database.
|
||||
description: >-
|
||||
The chunks to insert. Each `Chunk` should contain content which can be
|
||||
interleaved text, images, or other types. `metadata`: `dict[str, Any]`
|
||||
and `embedding`: `List[float]` are optional. If `metadata` is provided,
|
||||
you configure how Llama Stack formats the chunk during generation. If
|
||||
`embedding` is not provided, it will be computed later.
|
||||
ttl_seconds:
|
||||
type: integer
|
||||
description: The time to live of the chunks.
|
||||
|
@ -8537,6 +8557,9 @@ components:
|
|||
properties:
|
||||
content:
|
||||
$ref: '#/components/schemas/InterleavedContent'
|
||||
description: >-
|
||||
The content of the chunk, which can be interleaved text, images,
|
||||
or other types.
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
|
@ -8547,11 +8570,23 @@ components:
|
|||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: >-
|
||||
Metadata associated with the chunk, such as document ID, source,
|
||||
or other relevant information.
|
||||
embedding:
|
||||
type: array
|
||||
items:
|
||||
type: number
|
||||
description: >-
|
||||
Optional embedding for the chunk. If not provided, it will be computed
|
||||
later.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- content
|
||||
- metadata
|
||||
title: Chunk
|
||||
description: >-
|
||||
A chunk of content that can be inserted into a vector database.
|
||||
scores:
|
||||
type: array
|
||||
items:
|
||||
|
|
|
@ -57,6 +57,31 @@ chunks = [
|
|||
]
|
||||
client.vector_io.insert(vector_db_id=vector_db_id, chunks=chunks)
|
||||
```
|
||||
|
||||
#### Using Precomputed Embeddings
|
||||
If you decide to precompute embeddings for your documents, you can insert them directly into the vector database by
|
||||
including the embedding vectors in the chunk data. This is useful if you have a separate embedding service or if you
|
||||
want to customize the ingestion process.
|
||||
```python
|
||||
chunks_with_embeddings = [
|
||||
{
|
||||
"content": "First chunk of text",
|
||||
"mime_type": "text/plain",
|
||||
"embedding": [0.1, 0.2, 0.3, ...], # Your precomputed embedding vector
|
||||
"metadata": {"document_id": "doc1", "section": "introduction"},
|
||||
},
|
||||
{
|
||||
"content": "Second chunk of text",
|
||||
"mime_type": "text/plain",
|
||||
"embedding": [0.2, 0.3, 0.4, ...], # Your precomputed embedding vector
|
||||
"metadata": {"document_id": "doc1", "section": "methodology"},
|
||||
},
|
||||
]
|
||||
client.vector_io.insert(vector_db_id=vector_db_id, chunks=chunks_with_embeddings)
|
||||
```
|
||||
When providing precomputed embeddings, ensure the embedding dimension matches the embedding_dimension specified when
|
||||
registering the vector database.
|
||||
|
||||
### Retrieval
|
||||
You can query the vector database to retrieve documents based on their embeddings.
|
||||
```python
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue