feat: Enable ingestion of precomputed embeddings (#2317)

2025-05-31 04:03:37 -06:00 · 2025-05-31 04:03:37 -06:00 · f328436831
commit f328436831
parent 31ce208bda
9 changed files with 366 additions and 15 deletions
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@ -10020,7 +10020,8 @@
                            "type": "object",
                            "properties": {
                                "content": {
-                                    "$ref": "#/components/schemas/InterleavedContent"
+                                    "$ref": "#/components/schemas/InterleavedContent",
+                                    "description": "The content of the chunk, which can be interleaved text, images, or other types."
                                },
                                "metadata": {
                                    "type": "object",
@ -10045,7 +10046,15 @@
                                                "type": "object"
                                            }
                                        ]
-                                    }
+                                    },
+                                    "description": "Metadata associated with the chunk, such as document ID, source, or other relevant information."
+                                },
+                                "embedding": {
+                                    "type": "array",
+                                    "items": {
+                                        "type": "number"
+                                    },
+                                    "description": "Optional embedding for the chunk. If not provided, it will be computed later."
                                }
                            },
                            "additionalProperties": false,
@ -10053,9 +10062,10 @@
                                "content",
                                "metadata"
                            ],
-                            "title": "Chunk"
+                            "title": "Chunk",
+                            "description": "A chunk of content that can be inserted into a vector database."
                        },
-                        "description": "The chunks to insert."
+                        "description": "The chunks to insert. Each `Chunk` should contain content which can be interleaved text, images, or other types. `metadata`: `dict[str, Any]` and `embedding`: `List[float]` are optional. If `metadata` is provided, you configure how Llama Stack formats the chunk during generation. If `embedding` is not provided, it will be computed later."
                    },
                    "ttl_seconds": {
                        "type": "integer",
@ -12285,7 +12295,8 @@
                            "type": "object",
                            "properties": {
                                "content": {
-                                    "$ref": "#/components/schemas/InterleavedContent"
+                                    "$ref": "#/components/schemas/InterleavedContent",
+                                    "description": "The content of the chunk, which can be interleaved text, images, or other types."
                                },
                                "metadata": {
                                    "type": "object",
@ -12310,7 +12321,15 @@
                                                "type": "object"
                                            }
                                        ]
-                                    }
+                                    },
+                                    "description": "Metadata associated with the chunk, such as document ID, source, or other relevant information."
+                                },
+                                "embedding": {
+                                    "type": "array",
+                                    "items": {
+                                        "type": "number"
+                                    },
+                                    "description": "Optional embedding for the chunk. If not provided, it will be computed later."
                                }
                            },
                            "additionalProperties": false,
@ -12318,7 +12337,8 @@
                                "content",
                                "metadata"
                            ],
-                            "title": "Chunk"
+                            "title": "Chunk",
+                            "description": "A chunk of content that can be inserted into a vector database."
                        }
                    },
                    "scores": {
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -7024,6 +7024,9 @@ components:
            properties:
              content:
                $ref: '#/components/schemas/InterleavedContent'
+                description: >-
+                  The content of the chunk, which can be interleaved text, images,
+                  or other types.
              metadata:
                type: object
                additionalProperties:
@ -7034,12 +7037,29 @@ components:
                    - type: string
                    - type: array
                    - type: object
+                description: >-
+                  Metadata associated with the chunk, such as document ID, source,
+                  or other relevant information.
+              embedding:
+                type: array
+                items:
+                  type: number
+                description: >-
+                  Optional embedding for the chunk. If not provided, it will be computed
+                  later.
            additionalProperties: false
            required:
              - content
              - metadata
            title: Chunk
-          description: The chunks to insert.
+            description: >-
+              A chunk of content that can be inserted into a vector database.
+          description: >-
+            The chunks to insert. Each `Chunk` should contain content which can be
+            interleaved text, images, or other types. `metadata`: `dict[str, Any]`
+            and `embedding`: `List[float]` are optional. If `metadata` is provided,
+            you configure how Llama Stack formats the chunk during generation. If
+            `embedding` is not provided, it will be computed later.
        ttl_seconds:
          type: integer
          description: The time to live of the chunks.
@ -8537,6 +8557,9 @@ components:
            properties:
              content:
                $ref: '#/components/schemas/InterleavedContent'
+                description: >-
+                  The content of the chunk, which can be interleaved text, images,
+                  or other types.
              metadata:
                type: object
                additionalProperties:
@ -8547,11 +8570,23 @@ components:
                    - type: string
                    - type: array
                    - type: object
+                description: >-
+                  Metadata associated with the chunk, such as document ID, source,
+                  or other relevant information.
+              embedding:
+                type: array
+                items:
+                  type: number
+                description: >-
+                  Optional embedding for the chunk. If not provided, it will be computed
+                  later.
            additionalProperties: false
            required:
              - content
              - metadata
            title: Chunk
+            description: >-
+              A chunk of content that can be inserted into a vector database.
        scores:
          type: array
          items:
--- a/docs/source/building_applications/rag.md
+++ b/docs/source/building_applications/rag.md
@ -57,6 +57,31 @@ chunks = [
 ]
 client.vector_io.insert(vector_db_id=vector_db_id, chunks=chunks)
 ```
+
+#### Using Precomputed Embeddings
+If you decide to precompute embeddings for your documents, you can insert them directly into the vector database by
+including the embedding vectors in the chunk data. This is useful if you have a separate embedding service or if you
+want to customize the ingestion process.
+```python
+chunks_with_embeddings = [
+    {
+        "content": "First chunk of text",
+        "mime_type": "text/plain",
+        "embedding": [0.1, 0.2, 0.3, ...],  # Your precomputed embedding vector
+        "metadata": {"document_id": "doc1", "section": "introduction"},
+    },
+    {
+        "content": "Second chunk of text",
+        "mime_type": "text/plain",
+        "embedding": [0.2, 0.3, 0.4, ...],  # Your precomputed embedding vector
+        "metadata": {"document_id": "doc1", "section": "methodology"},
+    },
+]
+client.vector_io.insert(vector_db_id=vector_db_id, chunks=chunks_with_embeddings)
+```
+When providing precomputed embeddings, ensure the embedding dimension matches the embedding_dimension specified when
+registering the vector database.
+
 ### Retrieval
 You can query the vector database to retrieve documents based on their embeddings.
 ```python