updated docstring

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
This commit is contained in:
Francisco Javier Arceo 2025-02-14 13:36:07 -05:00
parent 2f017b8590
commit 4b34603995

View file

@ -72,8 +72,9 @@ class SQLiteVecIndex(EmbeddingIndex):
async def add_chunks(self, chunks: List[Chunk], embeddings: NDArray): async def add_chunks(self, chunks: List[Chunk], embeddings: NDArray):
""" """
Add new chunks along with their embeddings using batch inserts. Add new chunks along with their embeddings using batch inserts.
First inserts all chunk metadata in a batch, then inserts all embeddings in a batch, For each chunk, we insert its JSON into the metadata table and then insert its
using the assigned rowids. If any insert fails, the transaction is rolled back. embedding (serialized to raw bytes) into the virtual table using the assigned rowid.
If any insert fails, the transaction is rolled back.
""" """
cur = self.connection.cursor() cur = self.connection.cursor()
try: try:
@ -89,7 +90,7 @@ class SQLiteVecIndex(EmbeddingIndex):
# Insert embeddings using the retrieved row IDs # Insert embeddings using the retrieved row IDs
embedding_data = [ embedding_data = [
(row_id, serialize_vector(emb.tolist() if isinstance(emb, np.ndarray) else list(emb))) (row_id, serialize_vector(emb.tolist() if isinstance(emb, np.ndarray) else list(emb)))
for row_id, emb in zip(row_ids, embeddings) for row_id, emb in zip(row_ids, embeddings, strict=True)
] ]
cur.executemany(f"INSERT INTO {self.vector_table} (rowid, embedding) VALUES (?, ?)", embedding_data) cur.executemany(f"INSERT INTO {self.vector_table} (rowid, embedding) VALUES (?, ?)", embedding_data)
# Commit transaction if all inserts succeed # Commit transaction if all inserts succeed