feat: Enhance Vector Stores config with full configurations (#4397)

# What does this PR do? Enhances the Vector Stores config with full set of appropriate configurations - Add FileIngestionParams, ChunkRetrievalParams, and FileBatchParams subconfigs - Update RAG memory, OpenAI vector store mixin, and vector store utils to use configuration - Fix import organization across vector store components - Add comprehensive vector stores configuration documentation - Update docs navigation to include vector store configuration guide - Delete `memory/constants.py` and move constant values directly into Pydantic models ## Test Plan Tests updated + CI --------- Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
2025-12-21 05:32:26 +00:00 · 2025-12-17 16:56:46 -05:00 · 2025-12-17 16:56:46 -05:00 · 2d149e3d2d
commit 2d149e3d2d
parent a7d509aaf9
22 changed files with 3249 additions and 110 deletions
--- a/src/llama_stack/core/datatypes.py
+++ b/src/llama_stack/core/datatypes.py
@ -18,15 +18,6 @@ from llama_stack.core.storage.datatypes import (
    StorageConfig,
 )
 from llama_stack.log import LoggingConfig
-from llama_stack.providers.utils.memory.constants import (
-    DEFAULT_ANNOTATION_INSTRUCTION_TEMPLATE,
-    DEFAULT_CHUNK_ANNOTATION_TEMPLATE,
-    DEFAULT_CHUNK_WITH_SOURCES_TEMPLATE,
-    DEFAULT_CONTEXT_TEMPLATE,
-    DEFAULT_FILE_SEARCH_FOOTER_TEMPLATE,
-    DEFAULT_FILE_SEARCH_HEADER_TEMPLATE,
-    DEFAULT_QUERY_REWRITE_PROMPT,
-)
 from llama_stack_api import (
    Api,
    Benchmark,
@ -367,7 +358,7 @@ class RewriteQueryParams(BaseModel):
        description="LLM model for query rewriting/expansion in vector search.",
    )
    prompt: str = Field(
-        default=DEFAULT_QUERY_REWRITE_PROMPT,
+        default="Expand this query with relevant synonyms and related terms. Return only the improved query, no explanations:\n\n{query}\n\nImproved query:",
        description="Prompt template for query rewriting. Use {query} as placeholder for the original query.",
    )
    max_tokens: int = Field(
@ -407,11 +398,11 @@ class FileSearchParams(BaseModel):
    """Configuration for file search tool output formatting."""

    header_template: str = Field(
-        default=DEFAULT_FILE_SEARCH_HEADER_TEMPLATE,
+        default="knowledge_search tool found {num_chunks} chunks:\nBEGIN of knowledge_search tool results.\n",
        description="Template for the header text shown before search results. Available placeholders: {num_chunks} number of chunks found.",
    )
    footer_template: str = Field(
-        default=DEFAULT_FILE_SEARCH_FOOTER_TEMPLATE,
+        default="END of knowledge_search tool results.\n",
        description="Template for the footer text shown after search results.",
    )

@ -433,11 +424,11 @@ class ContextPromptParams(BaseModel):
    """Configuration for LLM prompt content and chunk formatting."""

    chunk_annotation_template: str = Field(
-        default=DEFAULT_CHUNK_ANNOTATION_TEMPLATE,
+        default="Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n",
        description="Template for formatting individual chunks in search results. Available placeholders: {index} 1-based chunk index, {chunk.content} chunk content, {metadata} chunk metadata dict.",
    )
    context_template: str = Field(
-        default=DEFAULT_CONTEXT_TEMPLATE,
+        default='The above results were retrieved to help answer the user\'s query: "{query}". Use them as supporting information only in answering this query. {annotation_instruction}\n',
        description="Template for explaining the search results to the model. Available placeholders: {query} user's query, {num_chunks} number of chunks.",
    )

@ -470,11 +461,11 @@ class AnnotationPromptParams(BaseModel):
        description="Whether to include annotation information in results.",
    )
    annotation_instruction_template: str = Field(
-        default=DEFAULT_ANNOTATION_INSTRUCTION_TEMPLATE,
+        default="Cite sources immediately at the end of sentences before punctuation, using `<|file-id|>` format like 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'. Do not add extra punctuation. Use only the file IDs provided, do not invent new ones.",
        description="Instructions for how the model should cite sources. Used when enable_annotations is True.",
    )
    chunk_annotation_template: str = Field(
-        default=DEFAULT_CHUNK_WITH_SOURCES_TEMPLATE,
+        default="[{index}] {metadata_text} cite as <|{file_id}|>\n{chunk_text}\n",
        description="Template for chunks with annotation information. Available placeholders: {index} 1-based chunk index, {metadata_text} formatted metadata, {file_id} document identifier, {chunk_text} chunk content.",
    )

@ -499,6 +490,61 @@ class AnnotationPromptParams(BaseModel):
        return v


+class FileIngestionParams(BaseModel):
+    """Configuration for file processing during ingestion."""
+
+    default_chunk_size_tokens: int = Field(
+        default=512,
+        description="Default chunk size for RAG tool operations when not specified",
+    )
+    default_chunk_overlap_tokens: int = Field(
+        default=128,
+        description="Default overlap in tokens between chunks (original default: 512 // 4 = 128)",
+    )
+
+
+class ChunkRetrievalParams(BaseModel):
+    """Configuration for chunk retrieval and ranking during search."""
+
+    chunk_multiplier: int = Field(
+        default=5,
+        description="Multiplier for OpenAI API over-retrieval (affects all providers)",
+    )
+    max_tokens_in_context: int = Field(
+        default=4000,
+        description="Maximum tokens allowed in RAG context before truncation",
+    )
+    default_reranker_strategy: str = Field(
+        default="rrf",
+        description="Default reranker when not specified: 'rrf', 'weighted', or 'normalized'",
+    )
+    rrf_impact_factor: float = Field(
+        default=60.0,
+        description="Impact factor for RRF (Reciprocal Rank Fusion) reranking",
+    )
+    weighted_search_alpha: float = Field(
+        default=0.5,
+        description="Alpha weight for weighted search reranking (0.0-1.0)",
+    )
+
+
+class FileBatchParams(BaseModel):
+    """Configuration for file batch processing."""
+
+    max_concurrent_files_per_batch: int = Field(
+        default=3,
+        description="Maximum files processed concurrently in file batches",
+    )
+    file_batch_chunk_size: int = Field(
+        default=10,
+        description="Number of files to process in each batch chunk",
+    )
+    cleanup_interval_seconds: int = Field(
+        default=86400,  # 24 hours
+        description="Interval for cleaning up expired file batches (seconds)",
+    )
+
+
 class VectorStoresConfig(BaseModel):
    """Configuration for vector stores in the stack."""

@ -527,6 +573,19 @@ class VectorStoresConfig(BaseModel):
        description="Configuration for source annotation and attribution features.",
    )

+    file_ingestion_params: FileIngestionParams = Field(
+        default_factory=FileIngestionParams,
+        description="Configuration for file processing during ingestion.",
+    )
+    chunk_retrieval_params: ChunkRetrievalParams = Field(
+        default_factory=ChunkRetrievalParams,
+        description="Configuration for chunk retrieval and ranking during search.",
+    )
+    file_batch_params: FileBatchParams = Field(
+        default_factory=FileBatchParams,
+        description="Configuration for file batch processing.",
+    )
+

 class SafetyConfig(BaseModel):
    """Configuration for default moderations model."""
--- a/src/llama_stack/core/utils/config.py
+++ b/src/llama_stack/core/utils/config.py
@ -11,6 +11,9 @@ def redact_sensitive_fields(data: dict[str, Any]) -> dict[str, Any]:
    """Redact sensitive information from config before printing."""
    sensitive_patterns = ["api_key", "api_token", "password", "secret", "token"]

+    # Specific configuration field names that should NOT be redacted despite containing "token"
+    safe_token_fields = ["chunk_size_tokens", "max_tokens", "default_chunk_overlap_tokens"]
+
    def _redact_value(v: Any) -> Any:
        if isinstance(v, dict):
            return _redact_dict(v)
@ -21,7 +24,10 @@ def redact_sensitive_fields(data: dict[str, Any]) -> dict[str, Any]:
    def _redact_dict(d: dict[str, Any]) -> dict[str, Any]:
        result = {}
        for k, v in d.items():
-            if any(pattern in k.lower() for pattern in sensitive_patterns):
+            # Don't redact if it's a safe field
+            if any(safe_field in k.lower() for safe_field in safe_token_fields):
+                result[k] = _redact_value(v)
+            elif any(pattern in k.lower() for pattern in sensitive_patterns):
                result[k] = "********"
            else:
                result[k] = _redact_value(v)