fix!: remove chunk_id property from Chunk class (#3954)

# What does this PR do? chunk_id in the Chunk class executes actual logic to compute a chunk ID. This sort of logic should not live in the API spec. Instead, the providers should be in charge of calling generate_chunk_id, and pass it to `Chunk`. this removes the incorrect dependency between Provider impl and API impl Signed-off-by: Charlie Doern <cdoern@redhat.com>
2025-12-03 09:53:45 +00:00 · 2025-10-29 21:59:59 -04:00 · 2025-10-29 21:59:59 -04:00 · e8ecc99524
commit e8ecc99524
parent 0ef9166c7e
38 changed files with 40679 additions and 135 deletions
--- a/tests/unit/providers/vector_io/conftest.py
+++ b/tests/unit/providers/vector_io/conftest.py
@ -43,9 +43,15 @@ def embedding_dimension() -> int:
@pytest.fixture(scope="session")
 def sample_chunks():
    """Generates chunks that force multiple batches for a single document to expose ID conflicts."""
+    from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
+
    n, k = 10, 3
    sample = [
-        Chunk(content=f"Sentence {i} from document {j}", metadata={"document_id": f"document-{j}"})
+        Chunk(
+            content=f"Sentence {i} from document {j}",
+            chunk_id=generate_chunk_id(f"document-{j}", f"Sentence {i} from document {j}"),
+            metadata={"document_id": f"document-{j}"},
+        )
        for j in range(k)
        for i in range(n)
    ]
@ -53,6 +59,7 @@ def sample_chunks():
        [
            Chunk(
                content=f"Sentence {i} from document {j + k}",
+                chunk_id=f"document-{j}-chunk-{i}",
                chunk_metadata=ChunkMetadata(
                    document_id=f"document-{j + k}",
                    chunk_id=f"document-{j}-chunk-{i}",
@ -73,6 +80,7 @@ def sample_chunks_with_metadata():
    sample = [
        Chunk(
            content=f"Sentence {i} from document {j}",
+            chunk_id=f"document-{j}-chunk-{i}",
            metadata={"document_id": f"document-{j}"},
            chunk_metadata=ChunkMetadata(
                document_id=f"document-{j}",