diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index 0c82fc670..5df6db20c 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -11310,7 +11310,7 @@ "chunk_template": { "type": "string", "default": "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n", - "description": "Template for formatting each retrieved chunk in the context." + "description": "Template for formatting each retrieved chunk in the context. Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk content string), {metadata} (chunk metadata dict). Default: \"Result {index}\\nContent: {chunk.content}\\nMetadata: {metadata}\\n\"" } }, "additionalProperties": false, diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index c9de5316c..fb2dbf241 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -7812,7 +7812,10 @@ components: Metadata: {metadata} description: >- - Template for formatting each retrieved chunk in the context. + Template for formatting each retrieved chunk in the context. Available + placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk + content string), {metadata} (chunk metadata dict). Default: "Result {index}\nContent: + {chunk.content}\nMetadata: {metadata}\n" additionalProperties: false required: - query_generator_config diff --git a/llama_stack/apis/tools/rag_tool.py b/llama_stack/apis/tools/rag_tool.py index f20a882e7..de3e4c62c 100644 --- a/llama_stack/apis/tools/rag_tool.py +++ b/llama_stack/apis/tools/rag_tool.py @@ -74,6 +74,8 @@ class RAGQueryConfig(BaseModel): :param max_tokens_in_context: Maximum number of tokens in the context. :param max_chunks: Maximum number of chunks to retrieve. :param chunk_template: Template for formatting each retrieved chunk in the context. + Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk content string), {metadata} (chunk metadata dict). + Default: "Result {index}\\nContent: {chunk.content}\\nMetadata: {metadata}\\n" """ # This config defines how a query is generated using the messages @@ -81,8 +83,6 @@ class RAGQueryConfig(BaseModel): query_generator_config: RAGQueryGeneratorConfig = Field(default=DefaultRAGQueryGeneratorConfig()) max_tokens_in_context: int = 4096 max_chunks: int = 5 - # Optional template for formatting each retrieved chunk in the context. - # Available placeholders: {index} (1-based chunk ordinal), {metadata} (chunk metadata dict), {chunk.content} (chunk content string). chunk_template: str = "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n" @field_validator("chunk_template") diff --git a/tests/unit/rag/test_vector_store.py b/tests/unit/rag/test_vector_store.py index 611ede9d4..f97808a6d 100644 --- a/tests/unit/rag/test_vector_store.py +++ b/tests/unit/rag/test_vector_store.py @@ -93,7 +93,6 @@ class TestVectorStore: chunks = make_overlapped_chunks(document_id, text, window_len, overlap_len, original_metadata) assert len(chunks) == expected_chunks - print(len(chunks), expected_chunks) # Check that each chunk has the right metadata for chunk in chunks: