feat: Making static prompt values in Rag/File Search configurable in Vector Store Config (#4368)

# What does this PR do?

- Enables users to configure prompts used throughout the File Search /
Vector Retrieval
- Configuration is defined in the Vector Stores Config so they can be
modified at runtime
- Backwards compatible, which means the fields are optional and default
to the previously used values

This is the summary of the new options in the `run.yaml`
```yaml
vector_stores:
  file_search_params:
    header_template: 'knowledge_search tool found {num_chunks} chunks:\nBEGIN of knowledge_search tool results.\n'
    footer_template: 'END of knowledge_search tool results.\n'
  context_prompt_params:
    chunk_annotation_template: 'Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n'
    context_template: 'The above results were retrieved to help answer the user\'s query: "{query}". Use them as supporting information only in answering this query.{annotation_instruction}\n'
  annotation_prompt_params:
    enable_annotations: true
    annotation_instruction_template: 'Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format like \'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.\'. Do not add
extra punctuation. Use only the file IDs provided, do not invent new ones.'
    chunk_annotation_template: '[{index}] {metadata_text} cite as <|{file_id}|>\n{chunk_text}\n'
```

<!-- If resolving an issue, uncomment and update the line below -->
<!-- Closes #[issue-number] -->

## Test Plan
Added tests.

---------

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
This commit is contained in:
Francisco Javier Arceo 2025-12-15 11:39:01 -05:00 committed by GitHub
parent 4043dedeea
commit 62005dc1a9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
47 changed files with 42640 additions and 40 deletions

View file

@ -8,21 +8,26 @@ from unittest.mock import AsyncMock, MagicMock
import pytest
from llama_stack.providers.inline.tool_runtime.rag.config import RagToolRuntimeConfig
from llama_stack.providers.inline.tool_runtime.rag.memory import MemoryToolRuntimeImpl
from llama_stack_api import Chunk, ChunkMetadata, QueryChunksResponse, RAGQueryConfig
class TestRagQuery:
async def test_query_raises_on_empty_vector_store_ids(self):
config = RagToolRuntimeConfig()
rag_tool = MemoryToolRuntimeImpl(
config=MagicMock(), vector_io_api=MagicMock(), inference_api=MagicMock(), files_api=MagicMock()
config=config, vector_io_api=MagicMock(), inference_api=MagicMock(), files_api=MagicMock()
)
with pytest.raises(ValueError):
await rag_tool.query(content=MagicMock(), vector_store_ids=[])
async def test_query_chunk_metadata_handling(self):
# Create config with default templates
config = RagToolRuntimeConfig()
rag_tool = MemoryToolRuntimeImpl(
config=MagicMock(), vector_io_api=MagicMock(), inference_api=MagicMock(), files_api=MagicMock()
config=config, vector_io_api=MagicMock(), inference_api=MagicMock(), files_api=MagicMock()
)
content = "test query content"
vector_store_ids = ["db1"]
@ -33,9 +38,8 @@ class TestRagQuery:
source="test_source",
metadata_token_count=5,
)
interleaved_content = MagicMock()
chunk = Chunk(
content=interleaved_content,
content="This is test chunk content from document 1",
chunk_id="chunk1",
metadata={
"key1": "value1",
@ -78,8 +82,11 @@ class TestRagQuery:
RAGQueryConfig(mode="wrong_mode")
async def test_query_adds_vector_store_id_to_chunk_metadata(self):
# Create config with default templates
config = RagToolRuntimeConfig()
rag_tool = MemoryToolRuntimeImpl(
config=MagicMock(),
config=config,
vector_io_api=MagicMock(),
inference_api=MagicMock(),
files_api=MagicMock(),