From 30be1fd8b7fb454d528647cde7cc12be4e32dba8 Mon Sep 17 00:00:00 2001
From: Sergey Yedrikov <48031344+syedriko@users.noreply.github.com>
Date: Wed, 16 Jul 2025 11:25:44 -0400
Subject: [PATCH] fix: SQLiteVecIndex.create(..., bank_id="test_bank.123") -
 bank_id with a dot - leads to sqlite3.OperationalError (#2770) (#2771)

# What does this PR do?
Resolves https://github.com/meta-llama/llama-stack/issues/2770. It
replaces characters in SQLite table names that are not alphanumeric or
underscores with underscores and quotes the table names with square
brackets in SQL statements.

Closes #[2770]

## Test Plan
I added a ".123" suffix to the bank_id on the following line
```
    index = await SQLiteVecIndex.create(dimension=embedding_dimension, db_path=db_path, bank_id="test_bank.123")
```
in tests/unit/providers/vector_io/test_sqlite_vec.py, which, without the
fix in place, demonstrates the issue.
---
 .../inline/vector_io/sqlite_vec/sqlite_vec.py | 41 +++++++++++--------
 .../providers/vector_io/test_sqlite_vec.py    |  4 +-
 2 files changed, 25 insertions(+), 20 deletions(-)

diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
index 771ffa607..060b5b15c 100644
--- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
+++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
@@ -7,6 +7,7 @@
 import asyncio
 import json
 import logging
+import re
 import sqlite3
 import struct
 from typing import Any
@@ -117,6 +118,10 @@ def _rrf_rerank(
     return rrf_scores
 
 
+def _make_sql_identifier(name: str) -> str:
+    return re.sub(r"[^a-zA-Z0-9_]", "_", name)
+
+
 class SQLiteVecIndex(EmbeddingIndex):
     """
     An index implementation that stores embeddings in a SQLite virtual table using sqlite-vec.
@@ -130,9 +135,9 @@ class SQLiteVecIndex(EmbeddingIndex):
         self.dimension = dimension
         self.db_path = db_path
         self.bank_id = bank_id
-        self.metadata_table = f"chunks_{bank_id}".replace("-", "_")
-        self.vector_table = f"vec_chunks_{bank_id}".replace("-", "_")
-        self.fts_table = f"fts_chunks_{bank_id}".replace("-", "_")
+        self.metadata_table = _make_sql_identifier(f"chunks_{bank_id}")
+        self.vector_table = _make_sql_identifier(f"vec_chunks_{bank_id}")
+        self.fts_table = _make_sql_identifier(f"fts_chunks_{bank_id}")
         self.kvstore = kvstore
 
     @classmethod
@@ -148,14 +153,14 @@ class SQLiteVecIndex(EmbeddingIndex):
             try:
                 # Create the table to store chunk metadata.
                 cur.execute(f"""
-                    CREATE TABLE IF NOT EXISTS {self.metadata_table} (
+                    CREATE TABLE IF NOT EXISTS [{self.metadata_table}] (
                         id TEXT PRIMARY KEY,
                         chunk TEXT
                     );
                 """)
                 # Create the virtual table for embeddings.
                 cur.execute(f"""
-                    CREATE VIRTUAL TABLE IF NOT EXISTS {self.vector_table}
+                    CREATE VIRTUAL TABLE IF NOT EXISTS [{self.vector_table}]
                     USING vec0(embedding FLOAT[{self.dimension}], id TEXT);
                 """)
                 connection.commit()
@@ -163,7 +168,7 @@ class SQLiteVecIndex(EmbeddingIndex):
                 # based on query. Implementation of the change on client side will allow passing the search_mode option
                 # during initialization to make it easier to create the table that is required.
                 cur.execute(f"""
-                            CREATE VIRTUAL TABLE IF NOT EXISTS {self.fts_table}
+                            CREATE VIRTUAL TABLE IF NOT EXISTS [{self.fts_table}]
                             USING fts5(id, content);
                         """)
                 connection.commit()
@@ -178,9 +183,9 @@ class SQLiteVecIndex(EmbeddingIndex):
             connection = _create_sqlite_connection(self.db_path)
             cur = connection.cursor()
             try:
-                cur.execute(f"DROP TABLE IF EXISTS {self.metadata_table};")
-                cur.execute(f"DROP TABLE IF EXISTS {self.vector_table};")
-                cur.execute(f"DROP TABLE IF EXISTS {self.fts_table};")
+                cur.execute(f"DROP TABLE IF EXISTS [{self.metadata_table}];")
+                cur.execute(f"DROP TABLE IF EXISTS [{self.vector_table}];")
+                cur.execute(f"DROP TABLE IF EXISTS [{self.fts_table}];")
                 connection.commit()
             finally:
                 cur.close()
@@ -212,7 +217,7 @@ class SQLiteVecIndex(EmbeddingIndex):
                     metadata_data = [(chunk.chunk_id, chunk.model_dump_json()) for chunk in batch_chunks]
                     cur.executemany(
                         f"""
-                        INSERT INTO {self.metadata_table} (id, chunk)
+                        INSERT INTO [{self.metadata_table}] (id, chunk)
                         VALUES (?, ?)
                         ON CONFLICT(id) DO UPDATE SET chunk = excluded.chunk;
                         """,
@@ -230,7 +235,7 @@ class SQLiteVecIndex(EmbeddingIndex):
                         for chunk, emb in zip(batch_chunks, batch_embeddings, strict=True)
                     ]
                     cur.executemany(
-                        f"INSERT INTO {self.vector_table} (id, embedding) VALUES (?, ?);",
+                        f"INSERT INTO [{self.vector_table}] (id, embedding) VALUES (?, ?);",
                         embedding_data,
                     )
 
@@ -238,13 +243,13 @@ class SQLiteVecIndex(EmbeddingIndex):
                     fts_data = [(chunk.chunk_id, chunk.content) for chunk in batch_chunks]
                     # DELETE existing entries with same IDs (FTS5 doesn't support ON CONFLICT)
                     cur.executemany(
-                        f"DELETE FROM {self.fts_table} WHERE id = ?;",
+                        f"DELETE FROM [{self.fts_table}] WHERE id = ?;",
                         [(row[0],) for row in fts_data],
                     )
 
                     # INSERT new entries
                     cur.executemany(
-                        f"INSERT INTO {self.fts_table} (id, content) VALUES (?, ?);",
+                        f"INSERT INTO [{self.fts_table}] (id, content) VALUES (?, ?);",
                         fts_data,
                     )
 
@@ -280,8 +285,8 @@ class SQLiteVecIndex(EmbeddingIndex):
                 emb_blob = serialize_vector(emb_list)
                 query_sql = f"""
                     SELECT m.id, m.chunk, v.distance
-                    FROM {self.vector_table} AS v
-                    JOIN {self.metadata_table} AS m ON m.id = v.id
+                    FROM [{self.vector_table}] AS v
+                    JOIN [{self.metadata_table}] AS m ON m.id = v.id
                     WHERE v.embedding MATCH ? AND k = ?
                     ORDER BY v.distance;
                 """
@@ -322,9 +327,9 @@ class SQLiteVecIndex(EmbeddingIndex):
             cur = connection.cursor()
             try:
                 query_sql = f"""
-                    SELECT DISTINCT m.id, m.chunk, bm25({self.fts_table}) AS score
-                    FROM {self.fts_table} AS f
-                    JOIN {self.metadata_table} AS m ON m.id = f.id
+                    SELECT DISTINCT m.id, m.chunk, bm25([{self.fts_table}]) AS score
+                    FROM [{self.fts_table}] AS f
+                    JOIN [{self.metadata_table}] AS m ON m.id = f.id
                     WHERE f.content MATCH ?
                     ORDER BY score ASC
                     LIMIT ?;
diff --git a/tests/unit/providers/vector_io/test_sqlite_vec.py b/tests/unit/providers/vector_io/test_sqlite_vec.py
index a61eeeeca..23c4d6ff6 100644
--- a/tests/unit/providers/vector_io/test_sqlite_vec.py
+++ b/tests/unit/providers/vector_io/test_sqlite_vec.py
@@ -37,7 +37,7 @@ def loop():
 async def sqlite_vec_index(embedding_dimension, tmp_path_factory):
     temp_dir = tmp_path_factory.getbasetemp()
     db_path = str(temp_dir / "test_sqlite.db")
-    index = await SQLiteVecIndex.create(dimension=embedding_dimension, db_path=db_path, bank_id="test_bank")
+    index = await SQLiteVecIndex.create(dimension=embedding_dimension, db_path=db_path, bank_id="test_bank.123")
     yield index
     await index.delete()
 
@@ -110,7 +110,7 @@ async def test_chunk_id_conflict(sqlite_vec_index, sample_chunks, embedding_dime
     cur = connection.cursor()
 
     # Retrieve all chunk IDs to check for duplicates
-    cur.execute(f"SELECT id FROM {sqlite_vec_index.metadata_table}")
+    cur.execute(f"SELECT id FROM [{sqlite_vec_index.metadata_table}]")
     chunk_ids = [row[0] for row in cur.fetchall()]
     cur.close()
     connection.close()