fix!: remove chunk_id property from Chunk class (#3954)

# What does this PR do?

chunk_id in the Chunk class executes actual logic to compute a chunk ID.
This sort of logic should not live in the API spec.

Instead, the providers should be in charge of calling generate_chunk_id,
and pass it to `Chunk`.

this removes the incorrect dependency between Provider impl and API impl

Signed-off-by: Charlie Doern <cdoern@redhat.com>
This commit is contained in:
Charlie Doern 2025-10-29 21:59:59 -04:00 committed by GitHub
parent 0ef9166c7e
commit e8ecc99524
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
38 changed files with 40679 additions and 135 deletions

View file

@ -41,6 +41,7 @@ class TestRagQuery:
interleaved_content = MagicMock()
chunk = Chunk(
content=interleaved_content,
chunk_id="chunk1",
metadata={
"key1": "value1",
"token_count": 10,
@ -48,7 +49,6 @@ class TestRagQuery:
# Note this is inserted into `metadata` during MemoryToolRuntimeImpl().insert()
"document_id": "doc1",
},
stored_chunk_id="chunk1",
chunk_metadata=chunk_metadata,
)
@ -101,8 +101,8 @@ class TestRagQuery:
)
chunk1 = Chunk(
content="chunk from db1",
chunk_id="c1",
metadata={"vector_store_id": "db1", "document_id": "doc1"},
stored_chunk_id="c1",
chunk_metadata=chunk_metadata1,
)
@ -114,8 +114,8 @@ class TestRagQuery:
)
chunk2 = Chunk(
content="chunk from db2",
chunk_id="c2",
metadata={"vector_store_id": "db2", "document_id": "doc2"},
stored_chunk_id="c2",
chunk_metadata=chunk_metadata2,
)