chore: Moving vector store and vector store files helper methods to openai_vector_store_mixin (#2863)

# What does this PR do? Moving vector store and vector store files helper methods to `openai_vector_store_mixin.py`   ## Test Plan The tests are already supported in the CI and tests the inline providers and current integration tests. Note that the `vector_index` fixture will be test `milvus_vec_adapter`, `faiss_vec_adapter`, and `sqlite_vec_adapter` in `tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py`. Additionally, the integration tests in `integration-vector-io-tests.yml` runs `tests/integration/vector_io` tests for the following providers: ```python vector-io-provider: ["inline::faiss", "inline::sqlite-vec", "inline::milvus", "remote::chromadb", "remote::pgvector"] ``` Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
2025-07-26 06:07:43 +00:00 · 2025-07-23 13:35:48 -04:00 · 2025-07-23 13:35:48 -04:00 · 2aba2c1236
commit 2aba2c1236
parent e1ed152779
5 changed files with 38 additions and 504 deletions
--- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@ -66,7 +66,7 @@ class OpenAIVectorStoreMixin(ABC):

    async def _save_openai_vector_store(self, store_id: str, store_info: dict[str, Any]) -> None:
        """Save vector store metadata to persistent storage."""
-        assert self.kvstore is not None
+        assert self.kvstore
        key = f"{OPENAI_VECTOR_STORES_PREFIX}{store_id}"
        await self.kvstore.set(key=key, value=json.dumps(store_info))
        # update in-memory cache
@ -74,7 +74,7 @@ class OpenAIVectorStoreMixin(ABC):

    async def _load_openai_vector_stores(self) -> dict[str, dict[str, Any]]:
        """Load all vector store metadata from persistent storage."""
-        assert self.kvstore is not None
+        assert self.kvstore
        start_key = OPENAI_VECTOR_STORES_PREFIX
        end_key = f"{OPENAI_VECTOR_STORES_PREFIX}\xff"
        stored_data = await self.kvstore.values_in_range(start_key, end_key)
@ -87,7 +87,7 @@ class OpenAIVectorStoreMixin(ABC):

    async def _update_openai_vector_store(self, store_id: str, store_info: dict[str, Any]) -> None:
        """Update vector store metadata in persistent storage."""
-        assert self.kvstore is not None
+        assert self.kvstore
        key = f"{OPENAI_VECTOR_STORES_PREFIX}{store_id}"
        await self.kvstore.set(key=key, value=json.dumps(store_info))
        # update in-memory cache
@ -95,38 +95,62 @@ class OpenAIVectorStoreMixin(ABC):

    async def _delete_openai_vector_store_from_storage(self, store_id: str) -> None:
        """Delete vector store metadata from persistent storage."""
-        assert self.kvstore is not None
+        assert self.kvstore
        key = f"{OPENAI_VECTOR_STORES_PREFIX}{store_id}"
        await self.kvstore.delete(key)
        # remove from in-memory cache
        self.openai_vector_stores.pop(store_id, None)

-    @abstractmethod
    async def _save_openai_vector_store_file(
        self, store_id: str, file_id: str, file_info: dict[str, Any], file_contents: list[dict[str, Any]]
    ) -> None:
        """Save vector store file metadata to persistent storage."""
-        pass
+        assert self.kvstore
+        meta_key = f"{OPENAI_VECTOR_STORES_FILES_PREFIX}{store_id}:{file_id}"
+        await self.kvstore.set(key=meta_key, value=json.dumps(file_info))
+        contents_prefix = f"{OPENAI_VECTOR_STORES_FILES_CONTENTS_PREFIX}{store_id}:{file_id}:"
+        for idx, chunk in enumerate(file_contents):
+            await self.kvstore.set(key=f"{contents_prefix}{idx}", value=json.dumps(chunk))

-    @abstractmethod
    async def _load_openai_vector_store_file(self, store_id: str, file_id: str) -> dict[str, Any]:
        """Load vector store file metadata from persistent storage."""
-        pass
+        assert self.kvstore
+        key = f"{OPENAI_VECTOR_STORES_FILES_PREFIX}{store_id}:{file_id}"
+        stored_data = await self.kvstore.get(key)
+        return json.loads(stored_data) if stored_data else {}

-    @abstractmethod
    async def _load_openai_vector_store_file_contents(self, store_id: str, file_id: str) -> list[dict[str, Any]]:
        """Load vector store file contents from persistent storage."""
-        pass
+        assert self.kvstore
+        prefix = f"{OPENAI_VECTOR_STORES_FILES_CONTENTS_PREFIX}{store_id}:{file_id}:"
+        end_key = f"{prefix}\xff"
+        raw_items = await self.kvstore.values_in_range(prefix, end_key)
+        return [json.loads(item) for item in raw_items]

-    @abstractmethod
    async def _update_openai_vector_store_file(self, store_id: str, file_id: str, file_info: dict[str, Any]) -> None:
        """Update vector store file metadata in persistent storage."""
-        pass
+        assert self.kvstore
+        key = f"{OPENAI_VECTOR_STORES_FILES_PREFIX}{store_id}:{file_id}"
+        await self.kvstore.set(key=key, value=json.dumps(file_info))

-    @abstractmethod
    async def _delete_openai_vector_store_file_from_storage(self, store_id: str, file_id: str) -> None:
        """Delete vector store file metadata from persistent storage."""
-        pass
+        assert self.kvstore
+
+        meta_key = f"{OPENAI_VECTOR_STORES_FILES_PREFIX}{store_id}:{file_id}"
+        await self.kvstore.delete(meta_key)
+
+        contents_prefix = f"{OPENAI_VECTOR_STORES_FILES_CONTENTS_PREFIX}{store_id}:{file_id}:"
+        end_key = f"{contents_prefix}\xff"
+        # load all stored chunk values (values_in_range is implemented by all backends)
+        raw_items = await self.kvstore.values_in_range(contents_prefix, end_key)
+        # delete each chunk by its index suffix
+        for idx in range(len(raw_items)):
+            await self.kvstore.delete(f"{contents_prefix}{idx}")
+
+    async def initialize_openai_vector_stores(self) -> None:
+        """Load existing OpenAI vector stores into the in-memory cache."""
+        self.openai_vector_stores = await self._load_openai_vector_stores()

    @abstractmethod
    async def register_vector_db(self, vector_db: VectorDB) -> None:
@ -138,10 +162,6 @@ class OpenAIVectorStoreMixin(ABC):
        """Unregister a vector database (provider-specific implementation)."""
        pass

-    async def initialize_openai_vector_stores(self) -> None:
-        """Load existing OpenAI vector stores into the in-memory cache."""
-        self.openai_vector_stores = await self._load_openai_vector_stores()
-
    @abstractmethod
    async def insert_chunks(
        self,