mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-05 05:35:22 +00:00
using a property for Chunk.chunk_id
Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
This commit is contained in:
parent
f90fce218e
commit
fa36b672f1
10 changed files with 163 additions and 86 deletions
|
@ -81,6 +81,7 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
|||
chunks = []
|
||||
for doc in documents:
|
||||
content = await content_from_doc(doc)
|
||||
# TODO: we should add enrichment here as URLs won't be added to the metadata by default
|
||||
chunks.extend(
|
||||
make_overlapped_chunks(
|
||||
doc.document_id,
|
||||
|
@ -161,18 +162,19 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
|||
break
|
||||
|
||||
metadata_fields_to_exclude_from_context = [
|
||||
"chunk_tokenizer",
|
||||
"chunk_window",
|
||||
"token_count",
|
||||
"metadata_token_count",
|
||||
"chunk_tokenizer",
|
||||
"chunk_embedding_model",
|
||||
"created_timestamp",
|
||||
"updated_timestamp",
|
||||
"chunk_window",
|
||||
"chunk_tokenizer",
|
||||
"chunk_embedding_model",
|
||||
"chunk_embedding_dimension",
|
||||
"token_count",
|
||||
"content_token_count",
|
||||
"metadata_token_count",
|
||||
]
|
||||
metadata_subset = {k: v for k, v in metadata.items() if k not in metadata_fields_to_exclude_from_context}
|
||||
metadata_subset = {
|
||||
k: v for k, v in metadata.items() if k not in metadata_fields_to_exclude_from_context and v
|
||||
}
|
||||
text_content = query_config.chunk_template.format(index=i + 1, chunk=chunk, metadata=metadata_subset)
|
||||
picked.append(TextContentItem(text=text_content))
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue