From a14f79a362d2cb79cac5a0c7fe71d873bd9467d6 Mon Sep 17 00:00:00 2001 From: skamenan7 Date: Mon, 22 Sep 2025 16:54:51 -0400 Subject: [PATCH 1/3] fix(vector-io): handle missing document_id in insert_chunks Fixed KeyError when chunks don't have document_id in metadata or chunk_metadata. Updated logging to safely extract document_id using getattr and RAG memory to handle different document_id locations. Added test for missing document_id scenarios. Fixes issue #3494 where /v1/vector-io/insert would crash with KeyError. --- llama_stack/core/routers/vector_io.py | 8 ++++++-- .../inline/tool_runtime/rag/memory.py | 5 ++++- .../test_vector_io_openai_vector_stores.py | 19 +++++++++++++++++++ 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/llama_stack/core/routers/vector_io.py b/llama_stack/core/routers/vector_io.py index 786b0e391..ebf99bba9 100644 --- a/llama_stack/core/routers/vector_io.py +++ b/llama_stack/core/routers/vector_io.py @@ -101,11 +101,15 @@ class VectorIORouter(VectorIO): chunks: list[Chunk], ttl_seconds: int | None = None, ) -> None: + doc_ids = [ + getattr(chunk.chunk_metadata, "document_id", None) if chunk.chunk_metadata else None for chunk in chunks[:3] + ] logger.debug( - f"VectorIORouter.insert_chunks: {vector_db_id}, {len(chunks)} chunks, ttl_seconds={ttl_seconds}, chunk_ids={[chunk.metadata['document_id'] for chunk in chunks[:3]]}{' and more...' if len(chunks) > 3 else ''}", + f"VectorIORouter.insert_chunks: {vector_db_id}, {len(chunks)} chunks, " + f"ttl_seconds={ttl_seconds}, chunk_ids={doc_ids}{' and more...' if len(chunks) > 3 else ''}" ) provider = await self.routing_table.get_provider_impl(vector_db_id) - return await provider.insert_chunks(vector_db_id, chunks, ttl_seconds) + await provider.insert_chunks(vector_db_id, chunks, ttl_seconds) async def query_chunks( self, diff --git a/llama_stack/providers/inline/tool_runtime/rag/memory.py b/llama_stack/providers/inline/tool_runtime/rag/memory.py index bc68f198d..80eb47573 100644 --- a/llama_stack/providers/inline/tool_runtime/rag/memory.py +++ b/llama_stack/providers/inline/tool_runtime/rag/memory.py @@ -279,7 +279,10 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti return RAGQueryResult( content=picked, metadata={ - "document_ids": [c.metadata["document_id"] for c in chunks[: len(picked)]], + "document_ids": [ + c.metadata.get("document_id") or (c.chunk_metadata.document_id if c.chunk_metadata else None) + for c in chunks[: len(picked)] + ], "chunks": [c.content for c in chunks[: len(picked)]], "scores": scores[: len(picked)], "vector_db_ids": [c.metadata["vector_db_id"] for c in chunks[: len(picked)]], diff --git a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py index 98889f38e..12f1fb946 100644 --- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py +++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py @@ -113,6 +113,25 @@ async def test_insert_chunks_missing_db_raises(vector_io_adapter): await vector_io_adapter.insert_chunks("db_not_exist", []) +async def test_insert_chunks_with_missing_document_id(vector_io_adapter): + """Ensure no KeyError when document_id is missing or in different places.""" + from llama_stack.apis.vector_io import Chunk, ChunkMetadata + + fake_index = AsyncMock() + vector_io_adapter.cache["db1"] = fake_index + + # Various document_id scenarios that shouldn't crash + chunks = [ + Chunk(content="has doc_id in metadata", metadata={"document_id": "doc-1"}), + Chunk(content="no doc_id anywhere", metadata={"source": "test"}), + Chunk(content="doc_id in chunk_metadata", chunk_metadata=ChunkMetadata(document_id="doc-3")), + ] + + # Should work without KeyError + await vector_io_adapter.insert_chunks("db1", chunks) + fake_index.insert_chunks.assert_awaited_once() + + async def test_query_chunks_calls_underlying_index_and_returns(vector_io_adapter): expected = QueryChunksResponse(chunks=[Chunk(content="c1")], scores=[0.1]) fake_index = AsyncMock(query_chunks=AsyncMock(return_value=expected)) From 2510bd349eb4b7a2e6516e26d65e1ae4b6828611 Mon Sep 17 00:00:00 2001 From: skamenan7 Date: Wed, 24 Sep 2025 16:07:47 -0400 Subject: [PATCH 2/3] Address PR feedback: optimize logging and encapsulate document_id access - Gate debug logging behind isEnabledFor check to avoid unnecessary computation - Add Chunk.document_id property to safely handle metadata/chunk_metadata extraction - Simplify RAG memory code using new property --- llama_stack/apis/vector_io/vector_io.py | 16 ++++++++++++++++ llama_stack/core/routers/vector_io.py | 14 +++++++------- .../providers/inline/tool_runtime/rag/memory.py | 5 +---- 3 files changed, 24 insertions(+), 11 deletions(-) diff --git a/llama_stack/apis/vector_io/vector_io.py b/llama_stack/apis/vector_io/vector_io.py index 3e8065cfb..4a09a5230 100644 --- a/llama_stack/apis/vector_io/vector_io.py +++ b/llama_stack/apis/vector_io/vector_io.py @@ -91,6 +91,22 @@ class Chunk(BaseModel): return generate_chunk_id(str(uuid.uuid4()), str(self.content)) + @property + def document_id(self) -> str | None: + """Returns the document_id from either metadata or chunk_metadata, with metadata taking precedence.""" + # Check metadata first (takes precedence) + doc_id = self.metadata.get("document_id") + if isinstance(doc_id, str): + return doc_id + + # Fall back to chunk_metadata if available + if self.chunk_metadata is not None: + chunk_doc_id = getattr(self.chunk_metadata, "document_id", None) + if isinstance(chunk_doc_id, str): + return chunk_doc_id + + return None + @json_schema_type class QueryChunksResponse(BaseModel): diff --git a/llama_stack/core/routers/vector_io.py b/llama_stack/core/routers/vector_io.py index ebf99bba9..7d4248107 100644 --- a/llama_stack/core/routers/vector_io.py +++ b/llama_stack/core/routers/vector_io.py @@ -5,6 +5,7 @@ # the root directory of this source tree. import asyncio +import logging import uuid from typing import Any @@ -101,13 +102,12 @@ class VectorIORouter(VectorIO): chunks: list[Chunk], ttl_seconds: int | None = None, ) -> None: - doc_ids = [ - getattr(chunk.chunk_metadata, "document_id", None) if chunk.chunk_metadata else None for chunk in chunks[:3] - ] - logger.debug( - f"VectorIORouter.insert_chunks: {vector_db_id}, {len(chunks)} chunks, " - f"ttl_seconds={ttl_seconds}, chunk_ids={doc_ids}{' and more...' if len(chunks) > 3 else ''}" - ) + if logger.isEnabledFor(logging.DEBUG): + doc_ids = [chunk.document_id for chunk in chunks[:3]] + logger.debug( + f"VectorIORouter.insert_chunks: {vector_db_id}, {len(chunks)} chunks, " + f"ttl_seconds={ttl_seconds}, chunk_ids={doc_ids}{' and more...' if len(chunks) > 3 else ''}" + ) provider = await self.routing_table.get_provider_impl(vector_db_id) await provider.insert_chunks(vector_db_id, chunks, ttl_seconds) diff --git a/llama_stack/providers/inline/tool_runtime/rag/memory.py b/llama_stack/providers/inline/tool_runtime/rag/memory.py index 80eb47573..29685f5bf 100644 --- a/llama_stack/providers/inline/tool_runtime/rag/memory.py +++ b/llama_stack/providers/inline/tool_runtime/rag/memory.py @@ -279,10 +279,7 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti return RAGQueryResult( content=picked, metadata={ - "document_ids": [ - c.metadata.get("document_id") or (c.chunk_metadata.document_id if c.chunk_metadata else None) - for c in chunks[: len(picked)] - ], + "document_ids": [c.document_id for c in chunks[: len(picked)]], "chunks": [c.content for c in chunks[: len(picked)]], "scores": scores[: len(picked)], "vector_db_ids": [c.metadata["vector_db_id"] for c in chunks[: len(picked)]], From 1e5780b8dc2a8187b718c8bd383f791fb2da89a6 Mon Sep 17 00:00:00 2001 From: skamenan7 Date: Thu, 25 Sep 2025 14:45:49 -0400 Subject: [PATCH 3/3] fix: add allow-direct-logging comment for debug check Required by project logging rules to use logging.DEBUG constant --- llama_stack/core/routers/vector_io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama_stack/core/routers/vector_io.py b/llama_stack/core/routers/vector_io.py index 7d4248107..26469db2d 100644 --- a/llama_stack/core/routers/vector_io.py +++ b/llama_stack/core/routers/vector_io.py @@ -5,7 +5,7 @@ # the root directory of this source tree. import asyncio -import logging +import logging # allow-direct-logging import uuid from typing import Any