mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-28 02:01:59 +00:00
feat: Adding support for metadata in RAG insertion and querying
Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
This commit is contained in:
parent
473a07f624
commit
e50a546bc0
8 changed files with 149 additions and 25 deletions
|
|
@ -87,6 +87,7 @@ class MemoryToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime, RAGToolRuntime):
|
|||
content,
|
||||
chunk_size_in_tokens,
|
||||
chunk_size_in_tokens // 4,
|
||||
doc.metadata,
|
||||
)
|
||||
)
|
||||
|
||||
|
|
@ -140,19 +141,29 @@ class MemoryToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime, RAGToolRuntime):
|
|||
text=f"knowledge_search tool found {len(chunks)} chunks:\nBEGIN of knowledge_search tool results.\n"
|
||||
)
|
||||
]
|
||||
for i, c in enumerate(chunks):
|
||||
metadata = c.metadata
|
||||
for i, chunk in enumerate(chunks):
|
||||
metadata = chunk.metadata
|
||||
tokens += metadata["token_count"]
|
||||
if query_config.include_metadata_in_content:
|
||||
tokens += metadata["metadata_token_count"]
|
||||
|
||||
if tokens > query_config.max_tokens_in_context:
|
||||
log.error(
|
||||
f"Using {len(picked)} chunks; reached max tokens in context: {tokens}",
|
||||
)
|
||||
break
|
||||
picked.append(
|
||||
TextContentItem(
|
||||
text=f"Result {i + 1}:\nDocument_id:{metadata['document_id'][:5]}\nContent: {c.content}\n",
|
||||
)
|
||||
)
|
||||
|
||||
text_content = f"Result {i + 1}:\n"
|
||||
if query_config.include_metadata_in_content:
|
||||
metadata_subset = {
|
||||
k: v for k, v in metadata.items() if k not in ["token_count", "metadata_token_count"]
|
||||
}
|
||||
text_content += f"\nMetadata: {metadata_subset}"
|
||||
else:
|
||||
text_content += f"Document_id:{metadata['document_id'][:5]}"
|
||||
text_content += f"\nContent: {chunk.content}\n"
|
||||
picked.append(TextContentItem(text=text_content))
|
||||
|
||||
picked.append(TextContentItem(text="END of knowledge_search tool results.\n"))
|
||||
picked.append(
|
||||
TextContentItem(
|
||||
|
|
|
|||
|
|
@ -139,22 +139,27 @@ async def content_from_doc(doc: RAGDocument) -> str:
|
|||
return interleaved_content_as_str(doc.content)
|
||||
|
||||
|
||||
def make_overlapped_chunks(document_id: str, text: str, window_len: int, overlap_len: int) -> list[Chunk]:
|
||||
def make_overlapped_chunks(
|
||||
document_id: str, text: str, window_len: int, overlap_len: int, metadata: dict[str, Any]
|
||||
) -> list[Chunk]:
|
||||
tokenizer = Tokenizer.get_instance()
|
||||
tokens = tokenizer.encode(text, bos=False, eos=False)
|
||||
metadata_tokens = tokenizer.encode(str(metadata), bos=False, eos=False)
|
||||
|
||||
chunks = []
|
||||
for i in range(0, len(tokens), window_len - overlap_len):
|
||||
toks = tokens[i : i + window_len]
|
||||
chunk = tokenizer.decode(toks)
|
||||
chunk_metadata = metadata.copy()
|
||||
chunk_metadata["document_id"] = document_id
|
||||
chunk_metadata["token_count"] = len(toks)
|
||||
chunk_metadata["metadata_token_count"] = len(metadata_tokens)
|
||||
|
||||
# chunk is a string
|
||||
chunks.append(
|
||||
Chunk(
|
||||
content=chunk,
|
||||
metadata={
|
||||
"token_count": len(toks),
|
||||
"document_id": document_id,
|
||||
},
|
||||
metadata=chunk_metadata,
|
||||
)
|
||||
)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue