mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-17 06:02:36 +00:00
feat: Making static prompt values in Rag/File Search configurable in Vector Store Config (#4368)
# What does this PR do?
- Enables users to configure prompts used throughout the File Search /
Vector Retrieval
- Configuration is defined in the Vector Stores Config so they can be
modified at runtime
- Backwards compatible, which means the fields are optional and default
to the previously used values
This is the summary of the new options in the `run.yaml`
```yaml
vector_stores:
file_search_params:
header_template: 'knowledge_search tool found {num_chunks} chunks:\nBEGIN of knowledge_search tool results.\n'
footer_template: 'END of knowledge_search tool results.\n'
context_prompt_params:
chunk_annotation_template: 'Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n'
context_template: 'The above results were retrieved to help answer the user\'s query: "{query}". Use them as supporting information only in answering this query.{annotation_instruction}\n'
annotation_prompt_params:
enable_annotations: true
annotation_instruction_template: 'Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format like \'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.\'. Do not add
extra punctuation. Use only the file IDs provided, do not invent new ones.'
chunk_annotation_template: '[{index}] {metadata_text} cite as <|{file_id}|>\n{chunk_text}\n'
```
<!-- If resolving an issue, uncomment and update the line below -->
<!-- Closes #[issue-number] -->
## Test Plan
Added tests.
---------
Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
This commit is contained in:
parent
4043dedeea
commit
62005dc1a9
47 changed files with 42640 additions and 40 deletions
|
|
@ -8,21 +8,26 @@ from unittest.mock import AsyncMock, MagicMock
|
|||
|
||||
import pytest
|
||||
|
||||
from llama_stack.providers.inline.tool_runtime.rag.config import RagToolRuntimeConfig
|
||||
from llama_stack.providers.inline.tool_runtime.rag.memory import MemoryToolRuntimeImpl
|
||||
from llama_stack_api import Chunk, ChunkMetadata, QueryChunksResponse, RAGQueryConfig
|
||||
|
||||
|
||||
class TestRagQuery:
|
||||
async def test_query_raises_on_empty_vector_store_ids(self):
|
||||
config = RagToolRuntimeConfig()
|
||||
rag_tool = MemoryToolRuntimeImpl(
|
||||
config=MagicMock(), vector_io_api=MagicMock(), inference_api=MagicMock(), files_api=MagicMock()
|
||||
config=config, vector_io_api=MagicMock(), inference_api=MagicMock(), files_api=MagicMock()
|
||||
)
|
||||
with pytest.raises(ValueError):
|
||||
await rag_tool.query(content=MagicMock(), vector_store_ids=[])
|
||||
|
||||
async def test_query_chunk_metadata_handling(self):
|
||||
# Create config with default templates
|
||||
config = RagToolRuntimeConfig()
|
||||
|
||||
rag_tool = MemoryToolRuntimeImpl(
|
||||
config=MagicMock(), vector_io_api=MagicMock(), inference_api=MagicMock(), files_api=MagicMock()
|
||||
config=config, vector_io_api=MagicMock(), inference_api=MagicMock(), files_api=MagicMock()
|
||||
)
|
||||
content = "test query content"
|
||||
vector_store_ids = ["db1"]
|
||||
|
|
@ -33,9 +38,8 @@ class TestRagQuery:
|
|||
source="test_source",
|
||||
metadata_token_count=5,
|
||||
)
|
||||
interleaved_content = MagicMock()
|
||||
chunk = Chunk(
|
||||
content=interleaved_content,
|
||||
content="This is test chunk content from document 1",
|
||||
chunk_id="chunk1",
|
||||
metadata={
|
||||
"key1": "value1",
|
||||
|
|
@ -78,8 +82,11 @@ class TestRagQuery:
|
|||
RAGQueryConfig(mode="wrong_mode")
|
||||
|
||||
async def test_query_adds_vector_store_id_to_chunk_metadata(self):
|
||||
# Create config with default templates
|
||||
config = RagToolRuntimeConfig()
|
||||
|
||||
rag_tool = MemoryToolRuntimeImpl(
|
||||
config=MagicMock(),
|
||||
config=config,
|
||||
vector_io_api=MagicMock(),
|
||||
inference_api=MagicMock(),
|
||||
files_api=MagicMock(),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue