feat: Making static prompt values in Rag/File Search configurable in Vector Store Config (#4368)

# What does this PR do? - Enables users to configure prompts used throughout the File Search / Vector Retrieval - Configuration is defined in the Vector Stores Config so they can be modified at runtime - Backwards compatible, which means the fields are optional and default to the previously used values This is the summary of the new options in the `run.yaml` ```yaml vector_stores: file_search_params: header_template: 'knowledge_search tool found {num_chunks} chunks:\nBEGIN of knowledge_search tool results.\n' footer_template: 'END of knowledge_search tool results.\n' context_prompt_params: chunk_annotation_template: 'Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n' context_template: 'The above results were retrieved to help answer the user\'s query: "{query}". Use them as supporting information only in answering this query.{annotation_instruction}\n' annotation_prompt_params: enable_annotations: true annotation_instruction_template: 'Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format like \'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.\'. Do not add extra punctuation. Use only the file IDs provided, do not invent new ones.' chunk_annotation_template: '[{index}] {metadata_text} cite as <|{file_id}|>\n{chunk_text}\n' ```   ## Test Plan Added tests. --------- Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
2025-12-17 06:02:36 +00:00 · 2025-12-15 11:39:01 -05:00 · 2025-12-15 11:39:01 -05:00 · 62005dc1a9
commit 62005dc1a9
parent 4043dedeea
47 changed files with 42640 additions and 40 deletions
--- a/tests/unit/rag/test_rag_query.py
+++ b/tests/unit/rag/test_rag_query.py
@ -8,21 +8,26 @@ from unittest.mock import AsyncMock, MagicMock

 import pytest

+from llama_stack.providers.inline.tool_runtime.rag.config import RagToolRuntimeConfig
 from llama_stack.providers.inline.tool_runtime.rag.memory import MemoryToolRuntimeImpl
 from llama_stack_api import Chunk, ChunkMetadata, QueryChunksResponse, RAGQueryConfig


 class TestRagQuery:
    async def test_query_raises_on_empty_vector_store_ids(self):
+        config = RagToolRuntimeConfig()
        rag_tool = MemoryToolRuntimeImpl(
-            config=MagicMock(), vector_io_api=MagicMock(), inference_api=MagicMock(), files_api=MagicMock()
+            config=config, vector_io_api=MagicMock(), inference_api=MagicMock(), files_api=MagicMock()
        )
        with pytest.raises(ValueError):
            await rag_tool.query(content=MagicMock(), vector_store_ids=[])

    async def test_query_chunk_metadata_handling(self):
+        # Create config with default templates
+        config = RagToolRuntimeConfig()
+
        rag_tool = MemoryToolRuntimeImpl(
-            config=MagicMock(), vector_io_api=MagicMock(), inference_api=MagicMock(), files_api=MagicMock()
+            config=config, vector_io_api=MagicMock(), inference_api=MagicMock(), files_api=MagicMock()
        )
        content = "test query content"
        vector_store_ids = ["db1"]
@ -33,9 +38,8 @@ class TestRagQuery:
            source="test_source",
            metadata_token_count=5,
        )
-        interleaved_content = MagicMock()
        chunk = Chunk(
-            content=interleaved_content,
+            content="This is test chunk content from document 1",
            chunk_id="chunk1",
            metadata={
                "key1": "value1",
@ -78,8 +82,11 @@ class TestRagQuery:
            RAGQueryConfig(mode="wrong_mode")

    async def test_query_adds_vector_store_id_to_chunk_metadata(self):
+        # Create config with default templates
+        config = RagToolRuntimeConfig()
+
        rag_tool = MemoryToolRuntimeImpl(
-            config=MagicMock(),
+            config=config,
            vector_io_api=MagicMock(),
            inference_api=MagicMock(),
            files_api=MagicMock(),