swapping to configuring the entire chunk template

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
This commit is contained in:
Francisco Javier Arceo 2025-05-13 22:47:35 -04:00
parent 2e70782e63
commit 66f7b42795
7 changed files with 58 additions and 28 deletions

View file

@ -7,7 +7,7 @@
from enum import Enum
from typing import Annotated, Any, Literal
from pydantic import BaseModel, Field
from pydantic import BaseModel, Field, field_validator
from typing_extensions import Protocol, runtime_checkable
from llama_stack.apis.common.content_types import URL, InterleavedContent
@ -72,7 +72,19 @@ class RAGQueryConfig(BaseModel):
query_generator_config: RAGQueryGeneratorConfig = Field(default=DefaultRAGQueryGeneratorConfig())
max_tokens_in_context: int = 4096
max_chunks: int = 5
include_metadata_in_content: bool = False
# Optional template for formatting each retrieved chunk in the context.
# Available placeholders: {index} (1-based chunk ordinal), {metadata} (chunk metadata dict), {chunk.content} (chunk content string).
chunk_template: str = "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n"
@field_validator("chunk_template")
def validate_chunk_template(cls, v: str) -> str:
if "{chunk.content}" not in v:
raise ValueError("chunk_template must contain {chunk.content}")
if "{index}" not in v:
raise ValueError("chunk_template must contain {index}")
if len(v) == 0:
raise ValueError("chunk_template must not be empty")
return v
@runtime_checkable

View file

@ -146,8 +146,7 @@ class MemoryToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime, RAGToolRuntime):
for i, chunk in enumerate(chunks):
metadata = chunk.metadata
tokens += metadata["token_count"]
if query_config.include_metadata_in_content:
tokens += metadata["metadata_token_count"]
tokens += metadata["metadata_token_count"]
if tokens > query_config.max_tokens_in_context:
log.error(
@ -155,15 +154,9 @@ class MemoryToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime, RAGToolRuntime):
)
break
text_content = f"Result {i + 1}:\n"
if query_config.include_metadata_in_content:
metadata_subset = {
k: v for k, v in metadata.items() if k not in ["token_count", "metadata_token_count"]
}
text_content += f"\nMetadata: {metadata_subset}"
else:
text_content += f"Document_id:{metadata['document_id'][:5]}"
text_content += f"\nContent: {chunk.content}\n"
# text_content = f"Result {i + 1}:\n"
metadata_subset = {k: v for k, v in metadata.items() if k not in ["token_count", "metadata_token_count"]}
text_content = query_config.chunk_template.format(index=i + 1, chunk=chunk, metadata=metadata_subset)
picked.append(TextContentItem(text=text_content))
picked.append(TextContentItem(text="END of knowledge_search tool results.\n"))