diff --git a/llama_stack/apis/vector_io/vector_io.py b/llama_stack/apis/vector_io/vector_io.py index 4a09a5230..1b16d2832 100644 --- a/llama_stack/apis/vector_io/vector_io.py +++ b/llama_stack/apis/vector_io/vector_io.py @@ -96,14 +96,14 @@ class Chunk(BaseModel): """Returns the document_id from either metadata or chunk_metadata, with metadata taking precedence.""" # Check metadata first (takes precedence) doc_id = self.metadata.get("document_id") - if isinstance(doc_id, str): + if doc_id is not None: + if not isinstance(doc_id, str): + raise TypeError(f"metadata['document_id'] must be a string, got {type(doc_id).__name__}: {doc_id!r}") return doc_id - # Fall back to chunk_metadata if available + # Fall back to chunk_metadata if available (Pydantic ensures type safety) if self.chunk_metadata is not None: - chunk_doc_id = getattr(self.chunk_metadata, "document_id", None) - if isinstance(chunk_doc_id, str): - return chunk_doc_id + return self.chunk_metadata.document_id return None diff --git a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py index 12f1fb946..3c1981f49 100644 --- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py +++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py @@ -132,6 +132,18 @@ async def test_insert_chunks_with_missing_document_id(vector_io_adapter): fake_index.insert_chunks.assert_awaited_once() +async def test_document_id_with_invalid_type_raises_error(): + """Ensure TypeError is raised when document_id is not a string.""" + from llama_stack.apis.vector_io import Chunk + + # Integer document_id should raise TypeError + chunk = Chunk(content="test", metadata={"document_id": 12345}) + with pytest.raises(TypeError) as exc_info: + _ = chunk.document_id + assert "metadata['document_id'] must be a string" in str(exc_info.value) + assert "got int" in str(exc_info.value) + + async def test_query_chunks_calls_underlying_index_and_returns(vector_io_adapter): expected = QueryChunksResponse(chunks=[Chunk(content="c1")], scores=[0.1]) fake_index = AsyncMock(query_chunks=AsyncMock(return_value=expected))