mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-12 12:06:04 +00:00
Add type validation for document_id in metadata
Address PR feedback to validate document_id type when accessed from metadata dict. Since metadata is dict[str, Any], we now fail fast with a clear TypeError when document_id exists but isn't a string, rather than silently returning None. Also removed redundant isinstance check for chunk_metadata.document_id since Pydantic already guarantees it's str | None. Added test coverage for the new validation behavior.
This commit is contained in:
parent
fb4dfef14d
commit
54e06be75d
2 changed files with 17 additions and 5 deletions
|
|
@ -96,14 +96,14 @@ class Chunk(BaseModel):
|
|||
"""Returns the document_id from either metadata or chunk_metadata, with metadata taking precedence."""
|
||||
# Check metadata first (takes precedence)
|
||||
doc_id = self.metadata.get("document_id")
|
||||
if isinstance(doc_id, str):
|
||||
if doc_id is not None:
|
||||
if not isinstance(doc_id, str):
|
||||
raise TypeError(f"metadata['document_id'] must be a string, got {type(doc_id).__name__}: {doc_id!r}")
|
||||
return doc_id
|
||||
|
||||
# Fall back to chunk_metadata if available
|
||||
# Fall back to chunk_metadata if available (Pydantic ensures type safety)
|
||||
if self.chunk_metadata is not None:
|
||||
chunk_doc_id = getattr(self.chunk_metadata, "document_id", None)
|
||||
if isinstance(chunk_doc_id, str):
|
||||
return chunk_doc_id
|
||||
return self.chunk_metadata.document_id
|
||||
|
||||
return None
|
||||
|
||||
|
|
|
|||
|
|
@ -132,6 +132,18 @@ async def test_insert_chunks_with_missing_document_id(vector_io_adapter):
|
|||
fake_index.insert_chunks.assert_awaited_once()
|
||||
|
||||
|
||||
async def test_document_id_with_invalid_type_raises_error():
|
||||
"""Ensure TypeError is raised when document_id is not a string."""
|
||||
from llama_stack.apis.vector_io import Chunk
|
||||
|
||||
# Integer document_id should raise TypeError
|
||||
chunk = Chunk(content="test", metadata={"document_id": 12345})
|
||||
with pytest.raises(TypeError) as exc_info:
|
||||
_ = chunk.document_id
|
||||
assert "metadata['document_id'] must be a string" in str(exc_info.value)
|
||||
assert "got int" in str(exc_info.value)
|
||||
|
||||
|
||||
async def test_query_chunks_calls_underlying_index_and_returns(vector_io_adapter):
|
||||
expected = QueryChunksResponse(chunks=[Chunk(content="c1")], scores=[0.1])
|
||||
fake_index = AsyncMock(query_chunks=AsyncMock(return_value=expected))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue