mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 09:53:45 +00:00
fix!: remove chunk_id property from Chunk class (#3954)
# What does this PR do? chunk_id in the Chunk class executes actual logic to compute a chunk ID. This sort of logic should not live in the API spec. Instead, the providers should be in charge of calling generate_chunk_id, and pass it to `Chunk`. this removes the incorrect dependency between Provider impl and API impl Signed-off-by: Charlie Doern <cdoern@redhat.com>
This commit is contained in:
parent
0ef9166c7e
commit
e8ecc99524
38 changed files with 40679 additions and 135 deletions
|
|
@ -43,9 +43,15 @@ def embedding_dimension() -> int:
|
|||
@pytest.fixture(scope="session")
|
||||
def sample_chunks():
|
||||
"""Generates chunks that force multiple batches for a single document to expose ID conflicts."""
|
||||
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
|
||||
|
||||
n, k = 10, 3
|
||||
sample = [
|
||||
Chunk(content=f"Sentence {i} from document {j}", metadata={"document_id": f"document-{j}"})
|
||||
Chunk(
|
||||
content=f"Sentence {i} from document {j}",
|
||||
chunk_id=generate_chunk_id(f"document-{j}", f"Sentence {i} from document {j}"),
|
||||
metadata={"document_id": f"document-{j}"},
|
||||
)
|
||||
for j in range(k)
|
||||
for i in range(n)
|
||||
]
|
||||
|
|
@ -53,6 +59,7 @@ def sample_chunks():
|
|||
[
|
||||
Chunk(
|
||||
content=f"Sentence {i} from document {j + k}",
|
||||
chunk_id=f"document-{j}-chunk-{i}",
|
||||
chunk_metadata=ChunkMetadata(
|
||||
document_id=f"document-{j + k}",
|
||||
chunk_id=f"document-{j}-chunk-{i}",
|
||||
|
|
@ -73,6 +80,7 @@ def sample_chunks_with_metadata():
|
|||
sample = [
|
||||
Chunk(
|
||||
content=f"Sentence {i} from document {j}",
|
||||
chunk_id=f"document-{j}-chunk-{i}",
|
||||
metadata={"document_id": f"document-{j}"},
|
||||
chunk_metadata=ChunkMetadata(
|
||||
document_id=f"document-{j}",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue