feat: Enhance Vector Stores config with full configurations (#4397)

# What does this PR do? Enhances the Vector Stores config with full set of appropriate configurations - Add FileIngestionParams, ChunkRetrievalParams, and FileBatchParams subconfigs - Update RAG memory, OpenAI vector store mixin, and vector store utils to use configuration - Fix import organization across vector store components - Add comprehensive vector stores configuration documentation - Update docs navigation to include vector store configuration guide - Delete `memory/constants.py` and move constant values directly into Pydantic models ## Test Plan Tests updated + CI --------- Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
2025-12-19 20:48:41 +00:00 · 2025-12-17 16:56:46 -05:00 · 2025-12-17 16:56:46 -05:00 · 2d149e3d2d
commit 2d149e3d2d
parent a7d509aaf9
22 changed files with 3249 additions and 110 deletions
--- a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml
+++ b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml
@ -308,19 +308,32 @@ vector_stores:
      '
    context_template: 'The above results were retrieved to help answer the user''s
      query: "{query}". Use them as supporting information only in answering this
-      query.{annotation_instruction}
+      query. {annotation_instruction}

      '
  annotation_prompt_params:
    enable_annotations: true
-    annotation_instruction_template: ' Cite sources immediately at the end of sentences
-      before punctuation, using `<|file-id|>` format like ''This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.''.
+    annotation_instruction_template: Cite sources immediately at the end of sentences
+      before punctuation, using `<|file-id|>` format like 'This is a fact <|file-Cn3MSNn72ENTiiq11Qda4A|>.'.
      Do not add extra punctuation. Use only the file IDs provided, do not invent
-      new ones.'
+      new ones.
    chunk_annotation_template: '[{index}] {metadata_text} cite as <|{file_id}|>

      {chunk_text}

      '
+  file_ingestion_params:
+    default_chunk_size_tokens: 512
+    default_chunk_overlap_tokens: 128
+  chunk_retrieval_params:
+    chunk_multiplier: 5
+    max_tokens_in_context: 4000
+    default_reranker_strategy: rrf
+    rrf_impact_factor: 60.0
+    weighted_search_alpha: 0.5
+  file_batch_params:
+    max_concurrent_files_per_batch: 3
+    file_batch_chunk_size: 10
+    cleanup_interval_seconds: 86400
 safety:
  default_shield_id: llama-guard