adding exceptional handling for serializing the metadata

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
This commit is contained in:
Francisco Javier Arceo 2025-05-10 21:25:17 -04:00
parent 54d28d13e9
commit dc8e25c02b
2 changed files with 25 additions and 1 deletions

View file

@ -144,7 +144,12 @@ def make_overlapped_chunks(
) -> list[Chunk]:
tokenizer = Tokenizer.get_instance()
tokens = tokenizer.encode(text, bos=False, eos=False)
metadata_tokens = tokenizer.encode(str(metadata), bos=False, eos=False)
try:
metadata_string = str(metadata)
except Exception as e:
raise ValueError("Failed to serialize metadata to string") from e
metadata_tokens = tokenizer.encode(metadata_string, bos=False, eos=False)
chunks = []
for i in range(0, len(tokens), window_len - overlap_len):

View file

@ -108,3 +108,22 @@ class TestVectorStore:
assert isinstance(chunk.metadata["token_count"], int)
assert chunk.metadata["token_count"] > 0
assert chunk.metadata["metadata_token_count"] == len_metadata_tokens
def test_raise_overlapped_chunks_metadata_serialization_error(self):
document_id = "test_doc_ex"
text = "Some text"
window_len = 5
overlap_len = 2
class BadMetadata:
def __repr__(self):
raise TypeError("Cannot convert to string")
problematic_metadata = {"bad_metadata_example": BadMetadata()}
with pytest.raises(ValueError) as excinfo:
make_overlapped_chunks(document_id, text, window_len, overlap_len, problematic_metadata)
assert str(excinfo.value) == "Failed to serialize metadata to string"
assert isinstance(excinfo.value.__cause__, TypeError)
assert str(excinfo.value.__cause__) == "Cannot convert to string"