This commit is contained in:
Derek Higgins 2025-07-24 16:09:59 -07:00 committed by GitHub
commit 754fb32c59
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 146 additions and 12 deletions

View file

@ -152,6 +152,11 @@ class OpenAIVectorStoreMixin(ABC):
"""Load existing OpenAI vector stores into the in-memory cache."""
self.openai_vector_stores = await self._load_openai_vector_stores()
@abstractmethod
async def _delete_openai_chunk_from_vector_store(self, store_id: str, chunk_id: str) -> None:
"""Delete a chunk from a vector store."""
pass
@abstractmethod
async def register_vector_db(self, vector_db: VectorDB) -> None:
"""Register a vector database (provider-specific implementation)."""
@ -763,17 +768,17 @@ class OpenAIVectorStoreMixin(ABC):
if vector_store_id not in self.openai_vector_stores:
raise ValueError(f"Vector store {vector_store_id} not found")
dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id)
chunks = [Chunk.model_validate(c) for c in dict_chunks]
for c in chunks:
if c.chunk_id:
await self._delete_openai_chunk_from_vector_store(vector_store_id, str(c.chunk_id))
store_info = self.openai_vector_stores[vector_store_id].copy()
file = await self.openai_retrieve_vector_store_file(vector_store_id, file_id)
await self._delete_openai_vector_store_file_from_storage(vector_store_id, file_id)
# TODO: We need to actually delete the embeddings from the underlying vector store...
# Also uncomment the corresponding integration test marked as xfail
#
# test_openai_vector_store_delete_file_removes_from_vector_store in
# tests/integration/vector_io/test_openai_vector_stores.py
# Update in-memory cache
store_info["file_ids"].remove(file_id)
store_info["file_counts"][file.status] -= 1

View file

@ -231,6 +231,10 @@ class EmbeddingIndex(ABC):
async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray):
raise NotImplementedError()
@abstractmethod
async def delete_chunk(self, chunk_id: str):
raise NotImplementedError()
@abstractmethod
async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
raise NotImplementedError()