diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 0c82fc670..5df6db20c 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -11310,7 +11310,7 @@
"chunk_template": {
"type": "string",
"default": "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n",
- "description": "Template for formatting each retrieved chunk in the context."
+ "description": "Template for formatting each retrieved chunk in the context. Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk content string), {metadata} (chunk metadata dict). Default: \"Result {index}\\nContent: {chunk.content}\\nMetadata: {metadata}\\n\""
}
},
"additionalProperties": false,
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index c9de5316c..fb2dbf241 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -7812,7 +7812,10 @@ components:
Metadata: {metadata}
description: >-
- Template for formatting each retrieved chunk in the context.
+ Template for formatting each retrieved chunk in the context. Available
+ placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk
+ content string), {metadata} (chunk metadata dict). Default: "Result {index}\nContent:
+ {chunk.content}\nMetadata: {metadata}\n"
additionalProperties: false
required:
- query_generator_config
diff --git a/llama_stack/apis/tools/rag_tool.py b/llama_stack/apis/tools/rag_tool.py
index f20a882e7..de3e4c62c 100644
--- a/llama_stack/apis/tools/rag_tool.py
+++ b/llama_stack/apis/tools/rag_tool.py
@@ -74,6 +74,8 @@ class RAGQueryConfig(BaseModel):
:param max_tokens_in_context: Maximum number of tokens in the context.
:param max_chunks: Maximum number of chunks to retrieve.
:param chunk_template: Template for formatting each retrieved chunk in the context.
+ Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk content string), {metadata} (chunk metadata dict).
+ Default: "Result {index}\\nContent: {chunk.content}\\nMetadata: {metadata}\\n"
"""
# This config defines how a query is generated using the messages
@@ -81,8 +83,6 @@ class RAGQueryConfig(BaseModel):
query_generator_config: RAGQueryGeneratorConfig = Field(default=DefaultRAGQueryGeneratorConfig())
max_tokens_in_context: int = 4096
max_chunks: int = 5
- # Optional template for formatting each retrieved chunk in the context.
- # Available placeholders: {index} (1-based chunk ordinal), {metadata} (chunk metadata dict), {chunk.content} (chunk content string).
chunk_template: str = "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n"
@field_validator("chunk_template")
diff --git a/tests/unit/rag/test_vector_store.py b/tests/unit/rag/test_vector_store.py
index 611ede9d4..f97808a6d 100644
--- a/tests/unit/rag/test_vector_store.py
+++ b/tests/unit/rag/test_vector_store.py
@@ -93,7 +93,6 @@ class TestVectorStore:
chunks = make_overlapped_chunks(document_id, text, window_len, overlap_len, original_metadata)
assert len(chunks) == expected_chunks
- print(len(chunks), expected_chunks)
# Check that each chunk has the right metadata
for chunk in chunks: