fix(vector-io): handle missing document_id in insert_chunks

Fixed KeyError when chunks don't have document_id in metadata or chunk_metadata.
Updated logging to safely extract document_id using getattr and RAG memory
to handle different document_id locations. Added test for missing document_id scenarios.

Fixes issue #3494 where /v1/vector-io/insert would crash with KeyError.
This commit is contained in:
skamenan7 2025-09-22 16:54:51 -04:00
parent a50b63906c
commit a14f79a362
3 changed files with 29 additions and 3 deletions

View file

@ -101,11 +101,15 @@ class VectorIORouter(VectorIO):
chunks: list[Chunk],
ttl_seconds: int | None = None,
) -> None:
doc_ids = [
getattr(chunk.chunk_metadata, "document_id", None) if chunk.chunk_metadata else None for chunk in chunks[:3]
]
logger.debug(
f"VectorIORouter.insert_chunks: {vector_db_id}, {len(chunks)} chunks, ttl_seconds={ttl_seconds}, chunk_ids={[chunk.metadata['document_id'] for chunk in chunks[:3]]}{' and more...' if len(chunks) > 3 else ''}",
f"VectorIORouter.insert_chunks: {vector_db_id}, {len(chunks)} chunks, "
f"ttl_seconds={ttl_seconds}, chunk_ids={doc_ids}{' and more...' if len(chunks) > 3 else ''}"
)
provider = await self.routing_table.get_provider_impl(vector_db_id)
return await provider.insert_chunks(vector_db_id, chunks, ttl_seconds)
await provider.insert_chunks(vector_db_id, chunks, ttl_seconds)
async def query_chunks(
self,