mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-01 16:24:44 +00:00
adding exceptional handling for serializing the metadata
Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
This commit is contained in:
parent
54d28d13e9
commit
dc8e25c02b
2 changed files with 25 additions and 1 deletions
|
@ -144,7 +144,12 @@ def make_overlapped_chunks(
|
|||
) -> list[Chunk]:
|
||||
tokenizer = Tokenizer.get_instance()
|
||||
tokens = tokenizer.encode(text, bos=False, eos=False)
|
||||
metadata_tokens = tokenizer.encode(str(metadata), bos=False, eos=False)
|
||||
try:
|
||||
metadata_string = str(metadata)
|
||||
except Exception as e:
|
||||
raise ValueError("Failed to serialize metadata to string") from e
|
||||
|
||||
metadata_tokens = tokenizer.encode(metadata_string, bos=False, eos=False)
|
||||
|
||||
chunks = []
|
||||
for i in range(0, len(tokens), window_len - overlap_len):
|
||||
|
|
|
@ -108,3 +108,22 @@ class TestVectorStore:
|
|||
assert isinstance(chunk.metadata["token_count"], int)
|
||||
assert chunk.metadata["token_count"] > 0
|
||||
assert chunk.metadata["metadata_token_count"] == len_metadata_tokens
|
||||
|
||||
def test_raise_overlapped_chunks_metadata_serialization_error(self):
|
||||
document_id = "test_doc_ex"
|
||||
text = "Some text"
|
||||
window_len = 5
|
||||
overlap_len = 2
|
||||
|
||||
class BadMetadata:
|
||||
def __repr__(self):
|
||||
raise TypeError("Cannot convert to string")
|
||||
|
||||
problematic_metadata = {"bad_metadata_example": BadMetadata()}
|
||||
|
||||
with pytest.raises(ValueError) as excinfo:
|
||||
make_overlapped_chunks(document_id, text, window_len, overlap_len, problematic_metadata)
|
||||
|
||||
assert str(excinfo.value) == "Failed to serialize metadata to string"
|
||||
assert isinstance(excinfo.value.__cause__, TypeError)
|
||||
assert str(excinfo.value.__cause__) == "Cannot convert to string"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue