From 3d4b32db0a15108561e9643e7a484ddf47f3fbd3 Mon Sep 17 00:00:00 2001
From: Derek Higgins <derekh@redhat.com>
Date: Wed, 9 Jul 2025 12:19:50 +0100
Subject: [PATCH 1/6] feat: implement chunk deletion for vector stores

Add support for deleting individual chunks from vector stores

- Add abstract delete_chunk() method to EmbeddingIndex base class
- Implement chunk deletion for Faiss provider with index tracking
- Add chunk_ids list to maintain chunk order in Faiss index
- Integrate chunk deletion into OpenAI vector store file deletion flow
- Add placeholder implementations for SQLite and Milvus providers

Closes: #2477

Signed-off-by: Derek Higgins <derekh@redhat.com>
---
 .../providers/inline/vector_io/faiss/faiss.py | 35 ++++++++++++++++++-
 .../inline/vector_io/sqlite_vec/sqlite_vec.py |  4 +++
 .../remote/vector_io/milvus/milvus.py         |  4 +++
 .../utils/memory/openai_vector_store_mixin.py | 11 ++++++
 .../providers/utils/memory/vector_store.py    |  4 +++
 5 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/llama_stack/providers/inline/vector_io/faiss/faiss.py b/llama_stack/providers/inline/vector_io/faiss/faiss.py
index a57b4a4ee..f7052c6e5 100644
--- a/llama_stack/providers/inline/vector_io/faiss/faiss.py
+++ b/llama_stack/providers/inline/vector_io/faiss/faiss.py
@@ -55,6 +55,11 @@ class FaissIndex(EmbeddingIndex):
         self.kvstore = kvstore
         self.bank_id = bank_id
 
+        # A list of chunk id's in the same order as they are in the index,
+        # must be updated when chunks are added or removed
+        self.chunk_id_lock = asyncio.Lock()
+        self.chunk_ids: list[Any] = []
+
     @classmethod
     async def create(cls, dimension: int, kvstore: KVStore | None = None, bank_id: str | None = None):
         instance = cls(dimension, kvstore, bank_id)
@@ -75,6 +80,7 @@ class FaissIndex(EmbeddingIndex):
             buffer = io.BytesIO(base64.b64decode(data["faiss_index"]))
             try:
                 self.index = faiss.deserialize_index(np.load(buffer, allow_pickle=False))
+                self.chunk_ids = [chunk.chunk_id for chunk in self.chunk_by_index.values()]
             except Exception as e:
                 logger.debug(e, exc_info=True)
                 raise ValueError(
@@ -114,11 +120,33 @@ class FaissIndex(EmbeddingIndex):
         for i, chunk in enumerate(chunks):
             self.chunk_by_index[indexlen + i] = chunk
 
-        self.index.add(np.array(embeddings).astype(np.float32))
+        async with self.chunk_id_lock:
+            self.index.add(np.array(embeddings).astype(np.float32))
+            self.chunk_ids.extend([chunk.chunk_id for chunk in chunks])
 
         # Save updated index
         await self._save_index()
 
+    async def delete_chunk(self, chunk_id: str) -> None:
+        if chunk_id not in self.chunk_ids:
+            return
+
+        async with self.chunk_id_lock:
+            index = self.chunk_ids.index(chunk_id)
+            self.index.remove_ids(np.array([index]))
+
+            new_chunk_by_index = {}
+            for idx, chunk in self.chunk_by_index.items():
+                # Shift all chunks after the removed chunk to the left
+                if idx > index:
+                    new_chunk_by_index[idx - 1] = chunk
+                else:
+                    new_chunk_by_index[idx] = chunk
+            self.chunk_by_index = new_chunk_by_index
+            self.chunk_ids.pop(index)
+
+        await self._save_index()
+
     async def query_vector(
         self,
         embedding: NDArray,
@@ -260,3 +288,8 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr
             raise ValueError(f"Vector DB {vector_db_id} not found")
 
         return await index.query_chunks(query, params)
+
+    async def _delete_openai_chunk_from_vector_store(self, store_id: str, chunk_id: str) -> None:
+        """Delete a chunk from a faiss index"""
+        faiss_index = self.cache[store_id].index
+        await faiss_index.delete_chunk(chunk_id)
diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
index f2598cc7c..a8331e65a 100644
--- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
+++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
@@ -520,3 +520,7 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoc
         if not index:
             raise ValueError(f"Vector DB {vector_db_id} not found")
         return await index.query_chunks(query, params)
+
+    async def _delete_openai_chunk_from_vector_store(self, store_id: str, chunk_id: str) -> None:
+        """Delete a chunk from a sqlite_vec index."""
+        pass  # TODO
diff --git a/llama_stack/providers/remote/vector_io/milvus/milvus.py b/llama_stack/providers/remote/vector_io/milvus/milvus.py
index dc4852821..d41c7eb8d 100644
--- a/llama_stack/providers/remote/vector_io/milvus/milvus.py
+++ b/llama_stack/providers/remote/vector_io/milvus/milvus.py
@@ -369,3 +369,7 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
                 )
 
         return await index.query_chunks(query, params)
+
+    async def _delete_openai_chunk_from_vector_store(self, store_id: str, chunk_id: str) -> None:
+        """Delete a chunk from a milvus vector store."""
+        pass  # TODO
diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
index f178e9299..294da69a1 100644
--- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@@ -152,6 +152,11 @@ class OpenAIVectorStoreMixin(ABC):
         """Load existing OpenAI vector stores into the in-memory cache."""
         self.openai_vector_stores = await self._load_openai_vector_stores()
 
+    @abstractmethod
+    async def _delete_openai_chunk_from_vector_store(self, store_id: str, chunk_id: str) -> None:
+        """Delete a chunk from a vector store."""
+        pass
+
     @abstractmethod
     async def register_vector_db(self, vector_db: VectorDB) -> None:
         """Register a vector database (provider-specific implementation)."""
@@ -763,6 +768,12 @@ class OpenAIVectorStoreMixin(ABC):
         if vector_store_id not in self.openai_vector_stores:
             raise ValueError(f"Vector store {vector_store_id} not found")
 
+        dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id)
+        chunks = [Chunk.model_validate(c) for c in dict_chunks]
+        for c in chunks:
+            if c.chunk_id:
+                await self._delete_openai_chunk_from_vector_store(vector_store_id, str(c.chunk_id))
+
         store_info = self.openai_vector_stores[vector_store_id].copy()
 
         file = await self.openai_retrieve_vector_store_file(vector_store_id, file_id)
diff --git a/llama_stack/providers/utils/memory/vector_store.py b/llama_stack/providers/utils/memory/vector_store.py
index f892d33c6..4a8749cba 100644
--- a/llama_stack/providers/utils/memory/vector_store.py
+++ b/llama_stack/providers/utils/memory/vector_store.py
@@ -231,6 +231,10 @@ class EmbeddingIndex(ABC):
     async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray):
         raise NotImplementedError()
 
+    @abstractmethod
+    async def delete_chunk(self, chunk_id: str):
+        raise NotImplementedError()
+
     @abstractmethod
     async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
         raise NotImplementedError()

From e44a29012eb5676453b78f16164ca97c6b2971c7 Mon Sep 17 00:00:00 2001
From: Derek Higgins <derekh@redhat.com>
Date: Fri, 18 Jul 2025 16:39:23 +0100
Subject: [PATCH 2/6] feat(vector-io): implement chunk deletion for SQLite
 vector provider

Add delete_chunk() method with transaction-safe deletion
Delete from metadata, vector, and FTS tables
Implement _delete_openai_chunk_from_vector_store() for OpenAI compatibility

Signed-off-by: Derek Higgins <derekh@redhat.com>
---
 .../inline/vector_io/sqlite_vec/sqlite_vec.py | 36 ++++++++++++++++++-
 1 file changed, 35 insertions(+), 1 deletion(-)

diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
index a8331e65a..3f31fa4ab 100644
--- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
+++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
@@ -425,6 +425,35 @@ class SQLiteVecIndex(EmbeddingIndex):
 
         return QueryChunksResponse(chunks=chunks, scores=scores)
 
+    async def delete_chunk(self, chunk_id: str) -> None:
+        """Remove a chunk from the SQLite vector store."""
+
+        def _delete_chunk():
+            connection = _create_sqlite_connection(self.db_path)
+            cur = connection.cursor()
+            try:
+                cur.execute("BEGIN TRANSACTION")
+
+                # Delete from metadata table
+                cur.execute(f"DELETE FROM {self.metadata_table} WHERE id = ?", (chunk_id,))
+
+                # Delete from vector table
+                cur.execute(f"DELETE FROM {self.vector_table} WHERE id = ?", (chunk_id,))
+
+                # Delete from FTS table
+                cur.execute(f"DELETE FROM {self.fts_table} WHERE id = ?", (chunk_id,))
+
+                connection.commit()
+            except Exception as e:
+                connection.rollback()
+                logger.error(f"Error deleting chunk {chunk_id}: {e}")
+                raise
+            finally:
+                cur.close()
+                connection.close()
+
+        await asyncio.to_thread(_delete_chunk)
+
 
 class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
     """
@@ -523,4 +552,9 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoc
 
     async def _delete_openai_chunk_from_vector_store(self, store_id: str, chunk_id: str) -> None:
         """Delete a chunk from a sqlite_vec index."""
-        pass  # TODO
+        index = await self._get_and_cache_vector_db_index(store_id)
+        if not index:
+            raise ValueError(f"Vector DB {store_id} not found")
+
+        # Use the index's delete_chunk method
+        await index.index.delete_chunk(chunk_id)

From ccafee36c42d4ca6df9d3b599fce234b945842b8 Mon Sep 17 00:00:00 2001
From: Derek Higgins <derekh@redhat.com>
Date: Fri, 18 Jul 2025 16:51:14 +0100
Subject: [PATCH 3/6] feat(vector-io): implement chunk deletion for Milvus
 provider

Add delete_chunk() method using Milvus delete with chunk_id filter
Implement _delete_openai_chunk_from_vector_store() for OpenAI compatibility
---
 .../providers/remote/vector_io/milvus/milvus.py | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/llama_stack/providers/remote/vector_io/milvus/milvus.py b/llama_stack/providers/remote/vector_io/milvus/milvus.py
index d41c7eb8d..71a3571f0 100644
--- a/llama_stack/providers/remote/vector_io/milvus/milvus.py
+++ b/llama_stack/providers/remote/vector_io/milvus/milvus.py
@@ -247,6 +247,16 @@ class MilvusIndex(EmbeddingIndex):
     ) -> QueryChunksResponse:
         raise NotImplementedError("Hybrid search is not supported in Milvus")
 
+    async def delete_chunk(self, chunk_id: str) -> None:
+        """Remove a chunk from the Milvus collection."""
+        try:
+            await asyncio.to_thread(
+                self.client.delete, collection_name=self.collection_name, filter=f'chunk_id == "{chunk_id}"'
+            )
+        except Exception as e:
+            logger.error(f"Error deleting chunk {chunk_id} from Milvus collection {self.collection_name}: {e}")
+            raise
+
 
 class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
     def __init__(
@@ -372,4 +382,9 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
 
     async def _delete_openai_chunk_from_vector_store(self, store_id: str, chunk_id: str) -> None:
         """Delete a chunk from a milvus vector store."""
-        pass  # TODO
+        index = await self._get_and_cache_vector_db_index(store_id)
+        if not index:
+            raise ValueError(f"Vector DB {store_id} not found")
+
+        # Use the index's delete_chunk method
+        await index.index.delete_chunk(chunk_id)

From 6238833185f5e79c716feab8c65d195f230cd832 Mon Sep 17 00:00:00 2001
From: Derek Higgins <derekh@redhat.com>
Date: Mon, 21 Jul 2025 11:13:21 +0100
Subject: [PATCH 4/6] feat(vector-io): implement chunk deletion for PGVector
 provider

Implement delete_chunk() method with SQL DELETE operation
Add _delete_openai_chunk_from_vector_store() for OpenAI compatibility
Fix chunk_id generation to use actual chunk.chunk_id instead of generated IDs (so they can be removed by id)
Set pgvector provider to requiring Api.files as optional
Update remote_provider_spec() function to support optional_api_dependencies parameter

This allows pgvector to work in configurations without a files provider
while still supporting file operations when files API is available.

Signed-off-by: Derek Higgins <derekh@redhat.com>
---
 llama_stack/providers/datatypes.py               |  6 +++++-
 llama_stack/providers/registry/vector_io.py      |  1 +
 .../remote/vector_io/pgvector/__init__.py        |  2 +-
 .../remote/vector_io/pgvector/pgvector.py        | 16 +++++++++++++++-
 4 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/llama_stack/providers/datatypes.py b/llama_stack/providers/datatypes.py
index 424380324..24b3b6547 100644
--- a/llama_stack/providers/datatypes.py
+++ b/llama_stack/providers/datatypes.py
@@ -226,7 +226,10 @@ API responses, specify the adapter here.
 
 
 def remote_provider_spec(
-    api: Api, adapter: AdapterSpec, api_dependencies: list[Api] | None = None
+    api: Api,
+    adapter: AdapterSpec,
+    api_dependencies: list[Api] | None = None,
+    optional_api_dependencies: list[Api] | None = None,
 ) -> RemoteProviderSpec:
     return RemoteProviderSpec(
         api=api,
@@ -234,6 +237,7 @@ def remote_provider_spec(
         config_class=adapter.config_class,
         adapter=adapter,
         api_dependencies=api_dependencies or [],
+        optional_api_dependencies=optional_api_dependencies or [],
     )
 
 
diff --git a/llama_stack/providers/registry/vector_io.py b/llama_stack/providers/registry/vector_io.py
index e391341b4..063b382df 100644
--- a/llama_stack/providers/registry/vector_io.py
+++ b/llama_stack/providers/registry/vector_io.py
@@ -410,6 +410,7 @@ See [PGVector's documentation](https://github.com/pgvector/pgvector) for more de
 """,
             ),
             api_dependencies=[Api.inference],
+            optional_api_dependencies=[Api.files],
         ),
         remote_provider_spec(
             Api.vector_io,
diff --git a/llama_stack/providers/remote/vector_io/pgvector/__init__.py b/llama_stack/providers/remote/vector_io/pgvector/__init__.py
index 9f528db74..59eef4c81 100644
--- a/llama_stack/providers/remote/vector_io/pgvector/__init__.py
+++ b/llama_stack/providers/remote/vector_io/pgvector/__init__.py
@@ -12,6 +12,6 @@ from .config import PGVectorVectorIOConfig
 async def get_adapter_impl(config: PGVectorVectorIOConfig, deps: dict[Api, ProviderSpec]):
     from .pgvector import PGVectorVectorIOAdapter
 
-    impl = PGVectorVectorIOAdapter(config, deps[Api.inference])
+    impl = PGVectorVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files, None))
     await impl.initialize()
     return impl
diff --git a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
index 3aeb3f30d..7f01ffce2 100644
--- a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
+++ b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
@@ -99,7 +99,7 @@ class PGVectorIndex(EmbeddingIndex):
         for i, chunk in enumerate(chunks):
             values.append(
                 (
-                    f"{chunk.metadata['document_id']}:chunk-{i}",
+                    f"{chunk.chunk_id}",
                     Json(chunk.model_dump()),
                     embeddings[i].tolist(),
                 )
@@ -159,6 +159,11 @@ class PGVectorIndex(EmbeddingIndex):
         with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
             cur.execute(f"DROP TABLE IF EXISTS {self.table_name}")
 
+    async def delete_chunk(self, chunk_id: str) -> None:
+        """Remove a chunk from the PostgreSQL table."""
+        with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
+            cur.execute(f"DELETE FROM {self.table_name} WHERE id = %s", (chunk_id,))
+
 
 class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
     def __init__(
@@ -265,3 +270,12 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoco
         index = PGVectorIndex(vector_db, vector_db.embedding_dimension, self.conn)
         self.cache[vector_db_id] = VectorDBWithIndex(vector_db, index, self.inference_api)
         return self.cache[vector_db_id]
+
+    async def _delete_openai_chunk_from_vector_store(self, store_id: str, chunk_id: str) -> None:
+        """Delete a chunk from a PostgreSQL vector store."""
+        index = await self._get_and_cache_vector_db_index(store_id)
+        if not index:
+            raise ValueError(f"Vector DB {store_id} not found")
+
+        # Use the index's delete_chunk method
+        await index.index.delete_chunk(chunk_id)

From 239c2a9eef36aaeb54e2b6805d4bb28b6bfc832f Mon Sep 17 00:00:00 2001
From: Derek Higgins <derekh@redhat.com>
Date: Mon, 21 Jul 2025 11:16:40 +0100
Subject: [PATCH 5/6] feat(vector-io): add delete_chunk stub methods for
 unsupported providers

Add NotImplementedError placeholders for delete_chunk and
_delete_openai_chunk_from_vector_store methods in Chroma, Qdrant,
and Weaviate providers that don't yet support the File API
---
 llama_stack/providers/remote/vector_io/chroma/chroma.py     | 6 ++++++
 llama_stack/providers/remote/vector_io/qdrant/qdrant.py     | 6 ++++++
 llama_stack/providers/remote/vector_io/weaviate/weaviate.py | 6 ++++++
 3 files changed, 18 insertions(+)

diff --git a/llama_stack/providers/remote/vector_io/chroma/chroma.py b/llama_stack/providers/remote/vector_io/chroma/chroma.py
index bd968d96d..596f288fa 100644
--- a/llama_stack/providers/remote/vector_io/chroma/chroma.py
+++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py
@@ -112,6 +112,9 @@ class ChromaIndex(EmbeddingIndex):
     ) -> QueryChunksResponse:
         raise NotImplementedError("Keyword search is not supported in Chroma")
 
+    async def delete_chunk(self, chunk_id: str) -> None:
+        raise NotImplementedError("delete_chunk is not supported in Chroma")
+
     async def query_hybrid(
         self,
         embedding: NDArray,
@@ -208,3 +211,6 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
         index = VectorDBWithIndex(vector_db, ChromaIndex(self.client, collection), self.inference_api)
         self.cache[vector_db_id] = index
         return index
+
+    async def _delete_openai_chunk_from_vector_store(self, store_id: str, chunk_id: str) -> None:
+        raise NotImplementedError("OpenAI Vector Stores API is not supported in Chroma")
diff --git a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
index 5bdea0ce8..cbaba0543 100644
--- a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
+++ b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
@@ -82,6 +82,9 @@ class QdrantIndex(EmbeddingIndex):
 
         await self.client.upsert(collection_name=self.collection_name, points=points)
 
+    async def delete_chunk(self, chunk_id: str) -> None:
+        raise NotImplementedError("delete_chunk is not supported in qdrant")
+
     async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
         results = (
             await self.client.query_points(
@@ -307,3 +310,6 @@ class QdrantVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
         file_id: str,
     ) -> VectorStoreFileObject:
         raise NotImplementedError("OpenAI Vector Stores API is not supported in Qdrant")
+
+    async def _delete_openai_chunk_from_vector_store(self, store_id: str, chunk_id: str) -> None:
+        raise NotImplementedError("OpenAI Vector Stores API is not supported in Qdrant")
diff --git a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
index 35bb40454..6929aeb54 100644
--- a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
+++ b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
@@ -66,6 +66,9 @@ class WeaviateIndex(EmbeddingIndex):
         # TODO: make this async friendly
         collection.data.insert_many(data_objects)
 
+    async def delete_chunk(self, chunk_id: str) -> None:
+        raise NotImplementedError("delete_chunk is not supported in Chroma")
+
     async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
         collection = self.client.collections.get(self.collection_name)
 
@@ -264,3 +267,6 @@ class WeaviateVectorIOAdapter(
 
     async def _delete_openai_vector_store_file_from_storage(self, store_id: str, file_id: str) -> None:
         raise NotImplementedError("OpenAI Vector Stores API is not supported in Weaviate")
+
+    async def _delete_openai_chunk_from_vector_store(self, store_id: str, chunk_id: str) -> None:
+        raise NotImplementedError("OpenAI Vector Stores API is not supported in Weaviate")

From 05d3fffbdfa0c7f88f70b111d55f9c37ad9b5087 Mon Sep 17 00:00:00 2001
From: Derek Higgins <derekh@redhat.com>
Date: Fri, 18 Jul 2025 16:53:41 +0100
Subject: [PATCH 6/6] test(vector-io): enable chunk deletion test for vector
 stores

Remove xfail marker from test_openai_vector_store_delete_file_removes_from_vector_store
now that chunk deletion functionality has been implemented

Signed-off-by: Derek Higgins <derekh@redhat.com>
---
 .../providers/utils/memory/openai_vector_store_mixin.py     | 6 ------
 tests/integration/vector_io/test_openai_vector_stores.py    | 2 --
 2 files changed, 8 deletions(-)

diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
index 294da69a1..45f586823 100644
--- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@@ -779,12 +779,6 @@ class OpenAIVectorStoreMixin(ABC):
         file = await self.openai_retrieve_vector_store_file(vector_store_id, file_id)
         await self._delete_openai_vector_store_file_from_storage(vector_store_id, file_id)
 
-        # TODO: We need to actually delete the embeddings from the underlying vector store...
-        # Also uncomment the corresponding integration test marked as xfail
-        #
-        # test_openai_vector_store_delete_file_removes_from_vector_store in
-        # tests/integration/vector_io/test_openai_vector_stores.py
-
         # Update in-memory cache
         store_info["file_ids"].remove(file_id)
         store_info["file_counts"][file.status] -= 1
diff --git a/tests/integration/vector_io/test_openai_vector_stores.py b/tests/integration/vector_io/test_openai_vector_stores.py
index 71d2bc55e..a3342d732 100644
--- a/tests/integration/vector_io/test_openai_vector_stores.py
+++ b/tests/integration/vector_io/test_openai_vector_stores.py
@@ -735,8 +735,6 @@ def test_openai_vector_store_delete_file(compat_client_with_empty_stores, client
     assert updated_vector_store.file_counts.in_progress == 0
 
 
-# TODO: Remove this xfail once we have a way to remove embeddings from vector store
-@pytest.mark.xfail(reason="Vector Store Files delete doesn't remove embeddings from vector store", strict=True)
 def test_openai_vector_store_delete_file_removes_from_vector_store(compat_client_with_empty_stores, client_with_models):
     """Test OpenAI vector store delete file removes from vector store."""
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)