mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-17 09:42:35 +00:00
feat: Making static prompt values in Rag/File Search configurable in Vector Store Config (#4368)
# What does this PR do?
- Enables users to configure prompts used throughout the File Search /
Vector Retrieval
- Configuration is defined in the Vector Stores Config so they can be
modified at runtime
- Backwards compatible, which means the fields are optional and default
to the previously used values
This is the summary of the new options in the `run.yaml`
```yaml
vector_stores:
file_search_params:
header_template: 'knowledge_search tool found {num_chunks} chunks:\nBEGIN of knowledge_search tool results.\n'
footer_template: 'END of knowledge_search tool results.\n'
context_prompt_params:
chunk_annotation_template: 'Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n'
context_template: 'The above results were retrieved to help answer the user\'s query: "{query}". Use them as supporting information only in answering this query.{annotation_instruction}\n'
annotation_prompt_params:
enable_annotations: true
annotation_instruction_template: 'Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format like \'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.\'. Do not add
extra punctuation. Use only the file IDs provided, do not invent new ones.'
chunk_annotation_template: '[{index}] {metadata_text} cite as <|{file_id}|>\n{chunk_text}\n'
```
<!-- If resolving an issue, uncomment and update the line below -->
<!-- Closes #[issue-number] -->
## Test Plan
Added tests.
---------
Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
This commit is contained in:
parent
4043dedeea
commit
62005dc1a9
47 changed files with 42640 additions and 40 deletions
|
|
@ -18,7 +18,15 @@ from llama_stack.core.storage.datatypes import (
|
|||
StorageConfig,
|
||||
)
|
||||
from llama_stack.log import LoggingConfig
|
||||
from llama_stack.providers.utils.memory.constants import DEFAULT_QUERY_REWRITE_PROMPT
|
||||
from llama_stack.providers.utils.memory.constants import (
|
||||
DEFAULT_ANNOTATION_INSTRUCTION_TEMPLATE,
|
||||
DEFAULT_CHUNK_ANNOTATION_TEMPLATE,
|
||||
DEFAULT_CHUNK_WITH_SOURCES_TEMPLATE,
|
||||
DEFAULT_CONTEXT_TEMPLATE,
|
||||
DEFAULT_FILE_SEARCH_FOOTER_TEMPLATE,
|
||||
DEFAULT_FILE_SEARCH_HEADER_TEMPLATE,
|
||||
DEFAULT_QUERY_REWRITE_PROMPT,
|
||||
)
|
||||
from llama_stack_api import (
|
||||
Api,
|
||||
Benchmark,
|
||||
|
|
@ -371,6 +379,125 @@ class RewriteQueryParams(BaseModel):
|
|||
description="Temperature for query expansion model (0.0 = deterministic, 1.0 = creative).",
|
||||
)
|
||||
|
||||
@field_validator("prompt")
|
||||
@classmethod
|
||||
def validate_prompt(cls, v: str) -> str:
|
||||
if "{query}" not in v:
|
||||
raise ValueError("prompt must contain {query} placeholder")
|
||||
return v
|
||||
|
||||
@field_validator("max_tokens")
|
||||
@classmethod
|
||||
def validate_max_tokens(cls, v: int) -> int:
|
||||
if v <= 0:
|
||||
raise ValueError("max_tokens must be positive")
|
||||
if v > 4096:
|
||||
raise ValueError("max_tokens should not exceed 4096")
|
||||
return v
|
||||
|
||||
@field_validator("temperature")
|
||||
@classmethod
|
||||
def validate_temperature(cls, v: float) -> float:
|
||||
if v < 0.0 or v > 2.0:
|
||||
raise ValueError("temperature must be between 0.0 and 2.0")
|
||||
return v
|
||||
|
||||
|
||||
class FileSearchParams(BaseModel):
|
||||
"""Configuration for file search tool output formatting."""
|
||||
|
||||
header_template: str = Field(
|
||||
default=DEFAULT_FILE_SEARCH_HEADER_TEMPLATE,
|
||||
description="Template for the header text shown before search results. Available placeholders: {num_chunks} number of chunks found.",
|
||||
)
|
||||
footer_template: str = Field(
|
||||
default=DEFAULT_FILE_SEARCH_FOOTER_TEMPLATE,
|
||||
description="Template for the footer text shown after search results.",
|
||||
)
|
||||
|
||||
@field_validator("header_template")
|
||||
@classmethod
|
||||
def validate_header_template(cls, v: str) -> str:
|
||||
if len(v) == 0:
|
||||
raise ValueError("header_template must not be empty")
|
||||
if "{num_chunks}" not in v:
|
||||
raise ValueError("header_template must contain {num_chunks} placeholder")
|
||||
if "knowledge_search" not in v.lower():
|
||||
raise ValueError(
|
||||
"header_template must contain 'knowledge_search' keyword to ensure proper tool identification"
|
||||
)
|
||||
return v
|
||||
|
||||
|
||||
class ContextPromptParams(BaseModel):
|
||||
"""Configuration for LLM prompt content and chunk formatting."""
|
||||
|
||||
chunk_annotation_template: str = Field(
|
||||
default=DEFAULT_CHUNK_ANNOTATION_TEMPLATE,
|
||||
description="Template for formatting individual chunks in search results. Available placeholders: {index} 1-based chunk index, {chunk.content} chunk content, {metadata} chunk metadata dict.",
|
||||
)
|
||||
context_template: str = Field(
|
||||
default=DEFAULT_CONTEXT_TEMPLATE,
|
||||
description="Template for explaining the search results to the model. Available placeholders: {query} user's query, {num_chunks} number of chunks.",
|
||||
)
|
||||
|
||||
@field_validator("chunk_annotation_template")
|
||||
@classmethod
|
||||
def validate_chunk_annotation_template(cls, v: str) -> str:
|
||||
if len(v) == 0:
|
||||
raise ValueError("chunk_annotation_template must not be empty")
|
||||
if "{chunk.content}" not in v:
|
||||
raise ValueError("chunk_annotation_template must contain {chunk.content} placeholder")
|
||||
if "{index}" not in v:
|
||||
raise ValueError("chunk_annotation_template must contain {index} placeholder")
|
||||
return v
|
||||
|
||||
@field_validator("context_template")
|
||||
@classmethod
|
||||
def validate_context_template(cls, v: str) -> str:
|
||||
if len(v) == 0:
|
||||
raise ValueError("context_template must not be empty")
|
||||
if "{query}" not in v:
|
||||
raise ValueError("context_template must contain {query} placeholder")
|
||||
return v
|
||||
|
||||
|
||||
class AnnotationPromptParams(BaseModel):
|
||||
"""Configuration for source annotation and attribution features."""
|
||||
|
||||
enable_annotations: bool = Field(
|
||||
default=True,
|
||||
description="Whether to include annotation information in results.",
|
||||
)
|
||||
annotation_instruction_template: str = Field(
|
||||
default=DEFAULT_ANNOTATION_INSTRUCTION_TEMPLATE,
|
||||
description="Instructions for how the model should cite sources. Used when enable_annotations is True.",
|
||||
)
|
||||
chunk_annotation_template: str = Field(
|
||||
default=DEFAULT_CHUNK_WITH_SOURCES_TEMPLATE,
|
||||
description="Template for chunks with annotation information. Available placeholders: {index} 1-based chunk index, {metadata_text} formatted metadata, {file_id} document identifier, {chunk_text} chunk content.",
|
||||
)
|
||||
|
||||
@field_validator("chunk_annotation_template")
|
||||
@classmethod
|
||||
def validate_chunk_annotation_template(cls, v: str) -> str:
|
||||
if len(v) == 0:
|
||||
raise ValueError("chunk_annotation_template must not be empty")
|
||||
if "{index}" not in v:
|
||||
raise ValueError("chunk_annotation_template must contain {index} placeholder")
|
||||
if "{chunk_text}" not in v:
|
||||
raise ValueError("chunk_annotation_template must contain {chunk_text} placeholder")
|
||||
if "{file_id}" not in v:
|
||||
raise ValueError("chunk_annotation_template must contain {file_id} placeholder")
|
||||
return v
|
||||
|
||||
@field_validator("annotation_instruction_template")
|
||||
@classmethod
|
||||
def validate_annotation_instruction_template(cls, v: str) -> str:
|
||||
if len(v) == 0:
|
||||
raise ValueError("annotation_instruction_template must not be empty")
|
||||
return v
|
||||
|
||||
|
||||
class VectorStoresConfig(BaseModel):
|
||||
"""Configuration for vector stores in the stack."""
|
||||
|
|
@ -387,6 +514,18 @@ class VectorStoresConfig(BaseModel):
|
|||
default=None,
|
||||
description="Parameters for query rewriting/expansion. None disables query rewriting.",
|
||||
)
|
||||
file_search_params: FileSearchParams = Field(
|
||||
default_factory=FileSearchParams,
|
||||
description="Configuration for file search tool output formatting.",
|
||||
)
|
||||
context_prompt_params: ContextPromptParams = Field(
|
||||
default_factory=ContextPromptParams,
|
||||
description="Configuration for LLM prompt content and chunk formatting.",
|
||||
)
|
||||
annotation_prompt_params: AnnotationPromptParams = Field(
|
||||
default_factory=AnnotationPromptParams,
|
||||
description="Configuration for source annotation and attribution features.",
|
||||
)
|
||||
|
||||
|
||||
class SafetyConfig(BaseModel):
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import importlib
|
||||
import importlib.metadata
|
||||
import inspect
|
||||
|
|
@ -406,13 +407,17 @@ async def instantiate_provider(
|
|||
args = [provider_spec.api, inner_impls, deps, dist_registry, policy]
|
||||
else:
|
||||
method = "get_provider_impl"
|
||||
provider_config = provider.config.copy()
|
||||
|
||||
# Inject vector_stores_config for providers that need it (introspection-based)
|
||||
config_type = instantiate_class_type(provider_spec.config_class)
|
||||
config = config_type(**provider.config)
|
||||
if hasattr(config_type, "__fields__") and "vector_stores_config" in config_type.__fields__:
|
||||
provider_config["vector_stores_config"] = run_config.vector_stores
|
||||
|
||||
config = config_type(**provider_config)
|
||||
args = [config, deps]
|
||||
if "policy" in inspect.signature(getattr(module, method)).parameters:
|
||||
args.append(policy)
|
||||
|
||||
fn = getattr(module, method)
|
||||
impl = await fn(*args)
|
||||
impl.__provider_id__ = provider.provider_id
|
||||
|
|
|
|||
|
|
@ -229,8 +229,6 @@ async def validate_vector_stores_config(vector_stores_config: VectorStoresConfig
|
|||
if vector_stores_config.rewrite_query_params:
|
||||
if vector_stores_config.rewrite_query_params.model:
|
||||
await _validate_rewrite_query_model(vector_stores_config.rewrite_query_params.model, impls)
|
||||
if "{query}" not in vector_stores_config.rewrite_query_params.prompt:
|
||||
raise ValueError("'{query}' placeholder is required in the prompt template")
|
||||
|
||||
|
||||
async def _validate_embedding_model(embedding_model: QualifiedModel, impls: dict[Api, Any]) -> None:
|
||||
|
|
|
|||
|
|
@ -277,5 +277,38 @@ vector_stores:
|
|||
default_embedding_model:
|
||||
provider_id: sentence-transformers
|
||||
model_id: nomic-ai/nomic-embed-text-v1.5
|
||||
file_search_params:
|
||||
header_template: 'knowledge_search tool found {num_chunks} chunks:
|
||||
|
||||
BEGIN of knowledge_search tool results.
|
||||
|
||||
'
|
||||
footer_template: 'END of knowledge_search tool results.
|
||||
|
||||
'
|
||||
context_prompt_params:
|
||||
chunk_annotation_template: 'Result {index}
|
||||
|
||||
Content: {chunk.content}
|
||||
|
||||
Metadata: {metadata}
|
||||
|
||||
'
|
||||
context_template: 'The above results were retrieved to help answer the user''s
|
||||
query: "{query}". Use them as supporting information only in answering this
|
||||
query.{annotation_instruction}
|
||||
|
||||
'
|
||||
annotation_prompt_params:
|
||||
enable_annotations: true
|
||||
annotation_instruction_template: ' Cite sources immediately at the end of sentences
|
||||
before punctuation, using `<|file-id|>` format like ''This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.''.
|
||||
Do not add extra punctuation. Use only the file IDs provided, do not invent
|
||||
new ones.'
|
||||
chunk_annotation_template: '[{index}] {metadata_text} cite as <|{file_id}|>
|
||||
|
||||
{chunk_text}
|
||||
|
||||
'
|
||||
safety:
|
||||
default_shield_id: llama-guard
|
||||
|
|
|
|||
|
|
@ -286,5 +286,38 @@ vector_stores:
|
|||
default_embedding_model:
|
||||
provider_id: sentence-transformers
|
||||
model_id: nomic-ai/nomic-embed-text-v1.5
|
||||
file_search_params:
|
||||
header_template: 'knowledge_search tool found {num_chunks} chunks:
|
||||
|
||||
BEGIN of knowledge_search tool results.
|
||||
|
||||
'
|
||||
footer_template: 'END of knowledge_search tool results.
|
||||
|
||||
'
|
||||
context_prompt_params:
|
||||
chunk_annotation_template: 'Result {index}
|
||||
|
||||
Content: {chunk.content}
|
||||
|
||||
Metadata: {metadata}
|
||||
|
||||
'
|
||||
context_template: 'The above results were retrieved to help answer the user''s
|
||||
query: "{query}". Use them as supporting information only in answering this
|
||||
query.{annotation_instruction}
|
||||
|
||||
'
|
||||
annotation_prompt_params:
|
||||
enable_annotations: true
|
||||
annotation_instruction_template: ' Cite sources immediately at the end of sentences
|
||||
before punctuation, using `<|file-id|>` format like ''This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.''.
|
||||
Do not add extra punctuation. Use only the file IDs provided, do not invent
|
||||
new ones.'
|
||||
chunk_annotation_template: '[{index}] {metadata_text} cite as <|{file_id}|>
|
||||
|
||||
{chunk_text}
|
||||
|
||||
'
|
||||
safety:
|
||||
default_shield_id: llama-guard
|
||||
|
|
|
|||
|
|
@ -280,5 +280,38 @@ vector_stores:
|
|||
default_embedding_model:
|
||||
provider_id: sentence-transformers
|
||||
model_id: nomic-ai/nomic-embed-text-v1.5
|
||||
file_search_params:
|
||||
header_template: 'knowledge_search tool found {num_chunks} chunks:
|
||||
|
||||
BEGIN of knowledge_search tool results.
|
||||
|
||||
'
|
||||
footer_template: 'END of knowledge_search tool results.
|
||||
|
||||
'
|
||||
context_prompt_params:
|
||||
chunk_annotation_template: 'Result {index}
|
||||
|
||||
Content: {chunk.content}
|
||||
|
||||
Metadata: {metadata}
|
||||
|
||||
'
|
||||
context_template: 'The above results were retrieved to help answer the user''s
|
||||
query: "{query}". Use them as supporting information only in answering this
|
||||
query.{annotation_instruction}
|
||||
|
||||
'
|
||||
annotation_prompt_params:
|
||||
enable_annotations: true
|
||||
annotation_instruction_template: ' Cite sources immediately at the end of sentences
|
||||
before punctuation, using `<|file-id|>` format like ''This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.''.
|
||||
Do not add extra punctuation. Use only the file IDs provided, do not invent
|
||||
new ones.'
|
||||
chunk_annotation_template: '[{index}] {metadata_text} cite as <|{file_id}|>
|
||||
|
||||
{chunk_text}
|
||||
|
||||
'
|
||||
safety:
|
||||
default_shield_id: llama-guard
|
||||
|
|
|
|||
|
|
@ -289,5 +289,38 @@ vector_stores:
|
|||
default_embedding_model:
|
||||
provider_id: sentence-transformers
|
||||
model_id: nomic-ai/nomic-embed-text-v1.5
|
||||
file_search_params:
|
||||
header_template: 'knowledge_search tool found {num_chunks} chunks:
|
||||
|
||||
BEGIN of knowledge_search tool results.
|
||||
|
||||
'
|
||||
footer_template: 'END of knowledge_search tool results.
|
||||
|
||||
'
|
||||
context_prompt_params:
|
||||
chunk_annotation_template: 'Result {index}
|
||||
|
||||
Content: {chunk.content}
|
||||
|
||||
Metadata: {metadata}
|
||||
|
||||
'
|
||||
context_template: 'The above results were retrieved to help answer the user''s
|
||||
query: "{query}". Use them as supporting information only in answering this
|
||||
query.{annotation_instruction}
|
||||
|
||||
'
|
||||
annotation_prompt_params:
|
||||
enable_annotations: true
|
||||
annotation_instruction_template: ' Cite sources immediately at the end of sentences
|
||||
before punctuation, using `<|file-id|>` format like ''This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.''.
|
||||
Do not add extra punctuation. Use only the file IDs provided, do not invent
|
||||
new ones.'
|
||||
chunk_annotation_template: '[{index}] {metadata_text} cite as <|{file_id}|>
|
||||
|
||||
{chunk_text}
|
||||
|
||||
'
|
||||
safety:
|
||||
default_shield_id: llama-guard
|
||||
|
|
|
|||
|
|
@ -277,5 +277,38 @@ vector_stores:
|
|||
default_embedding_model:
|
||||
provider_id: sentence-transformers
|
||||
model_id: nomic-ai/nomic-embed-text-v1.5
|
||||
file_search_params:
|
||||
header_template: 'knowledge_search tool found {num_chunks} chunks:
|
||||
|
||||
BEGIN of knowledge_search tool results.
|
||||
|
||||
'
|
||||
footer_template: 'END of knowledge_search tool results.
|
||||
|
||||
'
|
||||
context_prompt_params:
|
||||
chunk_annotation_template: 'Result {index}
|
||||
|
||||
Content: {chunk.content}
|
||||
|
||||
Metadata: {metadata}
|
||||
|
||||
'
|
||||
context_template: 'The above results were retrieved to help answer the user''s
|
||||
query: "{query}". Use them as supporting information only in answering this
|
||||
query.{annotation_instruction}
|
||||
|
||||
'
|
||||
annotation_prompt_params:
|
||||
enable_annotations: true
|
||||
annotation_instruction_template: ' Cite sources immediately at the end of sentences
|
||||
before punctuation, using `<|file-id|>` format like ''This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.''.
|
||||
Do not add extra punctuation. Use only the file IDs provided, do not invent
|
||||
new ones.'
|
||||
chunk_annotation_template: '[{index}] {metadata_text} cite as <|{file_id}|>
|
||||
|
||||
{chunk_text}
|
||||
|
||||
'
|
||||
safety:
|
||||
default_shield_id: llama-guard
|
||||
|
|
|
|||
|
|
@ -286,5 +286,38 @@ vector_stores:
|
|||
default_embedding_model:
|
||||
provider_id: sentence-transformers
|
||||
model_id: nomic-ai/nomic-embed-text-v1.5
|
||||
file_search_params:
|
||||
header_template: 'knowledge_search tool found {num_chunks} chunks:
|
||||
|
||||
BEGIN of knowledge_search tool results.
|
||||
|
||||
'
|
||||
footer_template: 'END of knowledge_search tool results.
|
||||
|
||||
'
|
||||
context_prompt_params:
|
||||
chunk_annotation_template: 'Result {index}
|
||||
|
||||
Content: {chunk.content}
|
||||
|
||||
Metadata: {metadata}
|
||||
|
||||
'
|
||||
context_template: 'The above results were retrieved to help answer the user''s
|
||||
query: "{query}". Use them as supporting information only in answering this
|
||||
query.{annotation_instruction}
|
||||
|
||||
'
|
||||
annotation_prompt_params:
|
||||
enable_annotations: true
|
||||
annotation_instruction_template: ' Cite sources immediately at the end of sentences
|
||||
before punctuation, using `<|file-id|>` format like ''This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.''.
|
||||
Do not add extra punctuation. Use only the file IDs provided, do not invent
|
||||
new ones.'
|
||||
chunk_annotation_template: '[{index}] {metadata_text} cite as <|{file_id}|>
|
||||
|
||||
{chunk_text}
|
||||
|
||||
'
|
||||
safety:
|
||||
default_shield_id: llama-guard
|
||||
|
|
|
|||
|
|
@ -78,6 +78,7 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
conversations_api=self.conversations_api,
|
||||
prompts_api=self.prompts_api,
|
||||
files_api=self.files_api,
|
||||
vector_stores_config=self.config.vector_stores_config,
|
||||
)
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
|
|
|
|||
|
|
@ -6,8 +6,9 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.core.datatypes import VectorStoresConfig
|
||||
from llama_stack.core.storage.datatypes import KVStoreReference, ResponsesStoreReference
|
||||
|
||||
|
||||
|
|
@ -20,6 +21,10 @@ class AgentPersistenceConfig(BaseModel):
|
|||
|
||||
class MetaReferenceAgentsImplConfig(BaseModel):
|
||||
persistence: AgentPersistenceConfig
|
||||
vector_stores_config: VectorStoresConfig | None = Field(
|
||||
default=None,
|
||||
description="Configuration for vector store prompt templates and behavior",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
|
||||
|
|
|
|||
|
|
@ -80,6 +80,7 @@ class OpenAIResponsesImpl:
|
|||
conversations_api: Conversations,
|
||||
prompts_api: Prompts,
|
||||
files_api: Files,
|
||||
vector_stores_config=None,
|
||||
):
|
||||
self.inference_api = inference_api
|
||||
self.tool_groups_api = tool_groups_api
|
||||
|
|
@ -92,6 +93,7 @@ class OpenAIResponsesImpl:
|
|||
tool_groups_api=tool_groups_api,
|
||||
tool_runtime_api=tool_runtime_api,
|
||||
vector_io_api=vector_io_api,
|
||||
vector_stores_config=vector_stores_config,
|
||||
)
|
||||
self.prompts_api = prompts_api
|
||||
self.files_api = files_api
|
||||
|
|
|
|||
|
|
@ -12,6 +12,10 @@ from typing import Any
|
|||
from opentelemetry import trace
|
||||
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.utils.memory.constants import (
|
||||
DEFAULT_ANNOTATION_INSTRUCTION_TEMPLATE,
|
||||
DEFAULT_CHUNK_WITH_SOURCES_TEMPLATE,
|
||||
)
|
||||
from llama_stack_api import (
|
||||
ImageContentItem,
|
||||
OpenAIChatCompletionContentPartImageParam,
|
||||
|
|
@ -52,10 +56,12 @@ class ToolExecutor:
|
|||
tool_groups_api: ToolGroups,
|
||||
tool_runtime_api: ToolRuntime,
|
||||
vector_io_api: VectorIO,
|
||||
vector_stores_config=None,
|
||||
):
|
||||
self.tool_groups_api = tool_groups_api
|
||||
self.tool_runtime_api = tool_runtime_api
|
||||
self.vector_io_api = vector_io_api
|
||||
self.vector_stores_config = vector_stores_config
|
||||
|
||||
async def execute_tool_call(
|
||||
self,
|
||||
|
|
@ -148,13 +154,33 @@ class ToolExecutor:
|
|||
for results in all_results:
|
||||
search_results.extend(results)
|
||||
|
||||
content_items = []
|
||||
content_items.append(
|
||||
TextContentItem(
|
||||
text=f"knowledge_search tool found {len(search_results)} chunks:\nBEGIN of knowledge_search tool results.\n"
|
||||
)
|
||||
# Get templates from vector stores config, fallback to constants
|
||||
|
||||
# Check if annotations are enabled
|
||||
enable_annotations = (
|
||||
self.vector_stores_config
|
||||
and self.vector_stores_config.annotation_prompt_params
|
||||
and self.vector_stores_config.annotation_prompt_params.enable_annotations
|
||||
)
|
||||
|
||||
# Get templates
|
||||
header_template = self.vector_stores_config.file_search_params.header_template
|
||||
footer_template = self.vector_stores_config.file_search_params.footer_template
|
||||
context_template = self.vector_stores_config.context_prompt_params.context_template
|
||||
|
||||
# Get annotation templates (use defaults if annotations disabled)
|
||||
if enable_annotations:
|
||||
chunk_annotation_template = self.vector_stores_config.annotation_prompt_params.chunk_annotation_template
|
||||
annotation_instruction_template = (
|
||||
self.vector_stores_config.annotation_prompt_params.annotation_instruction_template
|
||||
)
|
||||
else:
|
||||
chunk_annotation_template = DEFAULT_CHUNK_WITH_SOURCES_TEMPLATE
|
||||
annotation_instruction_template = DEFAULT_ANNOTATION_INSTRUCTION_TEMPLATE
|
||||
|
||||
content_items = []
|
||||
content_items.append(TextContentItem(text=header_template.format(num_chunks=len(search_results))))
|
||||
|
||||
unique_files = set()
|
||||
for i, result_item in enumerate(search_results):
|
||||
chunk_text = result_item.content[0].text if result_item.content else ""
|
||||
|
|
@ -166,22 +192,23 @@ class ToolExecutor:
|
|||
if result_item.attributes:
|
||||
metadata_text += f", attributes: {result_item.attributes}"
|
||||
|
||||
text_content = f"[{i + 1}] {metadata_text} (cite as <|{file_id}|>)\n{chunk_text}\n"
|
||||
text_content = chunk_annotation_template.format(
|
||||
index=i + 1, metadata_text=metadata_text, file_id=file_id, chunk_text=chunk_text
|
||||
)
|
||||
content_items.append(TextContentItem(text=text_content))
|
||||
unique_files.add(file_id)
|
||||
|
||||
content_items.append(TextContentItem(text="END of knowledge_search tool results.\n"))
|
||||
content_items.append(TextContentItem(text=footer_template))
|
||||
|
||||
citation_instruction = ""
|
||||
annotation_instruction = ""
|
||||
if unique_files:
|
||||
citation_instruction = (
|
||||
" Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format (e.g., 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'). "
|
||||
"Do not add extra punctuation. Use only the file IDs provided (do not invent new ones)."
|
||||
)
|
||||
annotation_instruction = annotation_instruction_template
|
||||
|
||||
content_items.append(
|
||||
TextContentItem(
|
||||
text=f'The above results were retrieved to help answer the user\'s query: "{query}". Use them as supporting information only in answering this query.{citation_instruction}\n',
|
||||
text=context_template.format(
|
||||
query=query, num_chunks=len(search_results), annotation_instruction=annotation_instruction
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -6,10 +6,17 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.core.datatypes import VectorStoresConfig
|
||||
|
||||
|
||||
class RagToolRuntimeConfig(BaseModel):
|
||||
vector_stores_config: VectorStoresConfig = Field(
|
||||
default_factory=VectorStoresConfig,
|
||||
description="Configuration for vector store prompt templates and behavior",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
|
||||
return {}
|
||||
|
|
|
|||
|
|
@ -221,11 +221,15 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime):
|
|||
chunks = chunks[: query_config.max_chunks]
|
||||
|
||||
tokens = 0
|
||||
picked: list[InterleavedContentItem] = [
|
||||
TextContentItem(
|
||||
text=f"knowledge_search tool found {len(chunks)} chunks:\nBEGIN of knowledge_search tool results.\n"
|
||||
)
|
||||
]
|
||||
|
||||
# Get templates from vector stores config
|
||||
vector_stores_config = self.config.vector_stores_config
|
||||
header_template = vector_stores_config.file_search_params.header_template
|
||||
footer_template = vector_stores_config.file_search_params.footer_template
|
||||
chunk_template = vector_stores_config.context_prompt_params.chunk_annotation_template
|
||||
context_template = vector_stores_config.context_prompt_params.context_template
|
||||
|
||||
picked: list[InterleavedContentItem] = [TextContentItem(text=header_template.format(num_chunks=len(chunks)))]
|
||||
for i, chunk in enumerate(chunks):
|
||||
metadata = chunk.metadata
|
||||
tokens += metadata.get("token_count", 0)
|
||||
|
|
@ -255,13 +259,13 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime):
|
|||
if k not in metadata_keys_to_exclude_from_context:
|
||||
metadata_for_context[k] = metadata[k]
|
||||
|
||||
text_content = query_config.chunk_template.format(index=i + 1, chunk=chunk, metadata=metadata_for_context)
|
||||
text_content = chunk_template.format(index=i + 1, chunk=chunk, metadata=metadata_for_context)
|
||||
picked.append(TextContentItem(text=text_content))
|
||||
|
||||
picked.append(TextContentItem(text="END of knowledge_search tool results.\n"))
|
||||
picked.append(TextContentItem(text=footer_template))
|
||||
picked.append(
|
||||
TextContentItem(
|
||||
text=f'The above results were retrieved to help answer the user\'s query: "{interleaved_content_as_str(content)}". Use them as supporting information only in answering this query.\n',
|
||||
text=context_template.format(query=interleaved_content_as_str(content), annotation_instruction="")
|
||||
)
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -6,3 +6,17 @@
|
|||
|
||||
# Default prompt template for query rewriting in vector search
|
||||
DEFAULT_QUERY_REWRITE_PROMPT = "Expand this query with relevant synonyms and related terms. Return only the improved query, no explanations:\n\n{query}\n\nImproved query:"
|
||||
|
||||
# Default templates for file search tool output formatting
|
||||
DEFAULT_FILE_SEARCH_HEADER_TEMPLATE = (
|
||||
"knowledge_search tool found {num_chunks} chunks:\nBEGIN of knowledge_search tool results.\n"
|
||||
)
|
||||
DEFAULT_FILE_SEARCH_FOOTER_TEMPLATE = "END of knowledge_search tool results.\n"
|
||||
|
||||
# Default templates for LLM prompt content and chunk formatting
|
||||
DEFAULT_CHUNK_ANNOTATION_TEMPLATE = "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n"
|
||||
DEFAULT_CONTEXT_TEMPLATE = 'The above results were retrieved to help answer the user\'s query: "{query}". Use them as supporting information only in answering this query.{annotation_instruction}\n'
|
||||
|
||||
# Default templates for source annotation and attribution features
|
||||
DEFAULT_ANNOTATION_INSTRUCTION_TEMPLATE = " Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format like 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'. Do not add extra punctuation. Use only the file IDs provided, do not invent new ones."
|
||||
DEFAULT_CHUNK_WITH_SOURCES_TEMPLATE = "[{index}] {metadata_text} cite as <|{file_id}|>\n{chunk_text}\n"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue