fix!: remove chunk_id property from Chunk class (#3954)

# What does this PR do? chunk_id in the Chunk class executes actual logic to compute a chunk ID. This sort of logic should not live in the API spec. Instead, the providers should be in charge of calling generate_chunk_id, and pass it to `Chunk`. this removes the incorrect dependency between Provider impl and API impl Signed-off-by: Charlie Doern <cdoern@redhat.com>
2025-12-03 09:53:45 +00:00 · 2025-10-29 21:59:59 -04:00 · 2025-10-29 21:59:59 -04:00 · e8ecc99524
commit e8ecc99524
parent 0ef9166c7e
38 changed files with 40679 additions and 135 deletions
--- a/tests/unit/rag/test_rag_query.py
+++ b/tests/unit/rag/test_rag_query.py
@ -41,6 +41,7 @@ class TestRagQuery:
        interleaved_content = MagicMock()
        chunk = Chunk(
            content=interleaved_content,
+            chunk_id="chunk1",
            metadata={
                "key1": "value1",
                "token_count": 10,
@ -48,7 +49,6 @@ class TestRagQuery:
                # Note this is inserted into `metadata` during MemoryToolRuntimeImpl().insert()
                "document_id": "doc1",
            },
-            stored_chunk_id="chunk1",
            chunk_metadata=chunk_metadata,
        )

@ -101,8 +101,8 @@ class TestRagQuery:
        )
        chunk1 = Chunk(
            content="chunk from db1",
+            chunk_id="c1",
            metadata={"vector_store_id": "db1", "document_id": "doc1"},
-            stored_chunk_id="c1",
            chunk_metadata=chunk_metadata1,
        )

@ -114,8 +114,8 @@ class TestRagQuery:
        )
        chunk2 = Chunk(
            content="chunk from db2",
+            chunk_id="c2",
            metadata={"vector_store_id": "db2", "document_id": "doc2"},
-            stored_chunk_id="c2",
            chunk_metadata=chunk_metadata2,
        )