feat: Enhance Vector Stores config with full configurations (#4397)

# What does this PR do? Enhances the Vector Stores config with full set of appropriate configurations - Add FileIngestionParams, ChunkRetrievalParams, and FileBatchParams subconfigs - Update RAG memory, OpenAI vector store mixin, and vector store utils to use configuration - Fix import organization across vector store components - Add comprehensive vector stores configuration documentation - Update docs navigation to include vector store configuration guide - Delete `memory/constants.py` and move constant values directly into Pydantic models ## Test Plan Tests updated + CI --------- Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
2025-12-20 22:32:27 +00:00 · 2025-12-17 16:56:46 -05:00 · 2025-12-17 16:56:46 -05:00 · 2d149e3d2d
commit 2d149e3d2d
parent a7d509aaf9
22 changed files with 3249 additions and 110 deletions
--- a/tests/integration/responses/recordings/0995df80c05acd7a1c386b09d5b4520ffff5233bf1fdd222607ec879cb5bcdb1.json
+++ b/tests/integration/responses/recordings/0995df80c05acd7a1c386b09d5b4520ffff5233bf1fdd222607ec879cb5bcdb1.json
--- a/tests/integration/responses/recordings/b6ea82498b4cd08dbbfec50c2bf7e20bf3f40ed0acbe79695f18c787ad0e3ed7.json
+++ b/tests/integration/responses/recordings/b6ea82498b4cd08dbbfec50c2bf7e20bf3f40ed0acbe79695f18c787ad0e3ed7.json
--- a/tests/unit/core/routers/test_vector_io.py
+++ b/tests/unit/core/routers/test_vector_io.py
@ -156,7 +156,6 @@ async def test_query_rewrite_functionality():
    from unittest.mock import MagicMock

    from llama_stack.core.datatypes import QualifiedModel, RewriteQueryParams, VectorStoresConfig
-    from llama_stack.providers.utils.memory.constants import DEFAULT_QUERY_REWRITE_PROMPT
    from llama_stack_api import VectorStoreSearchResponsePage

    mock_routing_table = Mock()
@ -197,7 +196,7 @@ async def test_query_rewrite_functionality():

    # Verify default prompt is used
    prompt_text = chat_call_args.messages[0].content
-    expected_prompt = DEFAULT_QUERY_REWRITE_PROMPT.format(query="test query")
+    expected_prompt = "Expand this query with relevant synonyms and related terms. Return only the improved query, no explanations:\n\ntest query\n\nImproved query:"
    assert prompt_text == expected_prompt

    # Verify routing table was called with rewritten query and rewrite_query=False
--- a/tests/unit/core/test_vector_stores_config.py
+++ b/tests/unit/core/test_vector_stores_config.py
@ -110,22 +110,23 @@ class TestOptionalArchitecture:
        assert config.annotation_prompt_params is not None
        assert "{num_chunks}" in config.file_search_params.header_template

-    def test_guaranteed_defaults_match_constants(self):
-        """Test that guaranteed defaults match expected constant values."""
-        from llama_stack.providers.utils.memory.constants import (
-            DEFAULT_CONTEXT_TEMPLATE,
-            DEFAULT_FILE_SEARCH_HEADER_TEMPLATE,
-        )
-
+    def test_guaranteed_defaults_have_expected_values(self):
+        """Test that guaranteed defaults have expected hardcoded values."""
        # Create config with guaranteed defaults
        config = VectorStoresConfig()

-        # Verify defaults match constants
+        # Verify defaults have expected values
        header_template = config.file_search_params.header_template
        context_template = config.context_prompt_params.context_template

-        assert header_template == DEFAULT_FILE_SEARCH_HEADER_TEMPLATE
-        assert context_template == DEFAULT_CONTEXT_TEMPLATE
+        assert (
+            header_template
+            == "knowledge_search tool found {num_chunks} chunks:\nBEGIN of knowledge_search tool results.\n"
+        )
+        assert (
+            context_template
+            == 'The above results were retrieved to help answer the user\'s query: "{query}". Use them as supporting information only in answering this query. {annotation_instruction}\n'
+        )

        # Verify templates can be formatted successfully
        formatted_header = header_template.format(num_chunks=3)
--- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
+++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
@ -1091,13 +1091,11 @@ async def test_max_concurrent_files_per_batch(vector_io_adapter):
    # Give time for the semaphore logic to start processing files
    await asyncio.sleep(0.2)

-    # Verify that only MAX_CONCURRENT_FILES_PER_BATCH files are processing concurrently
+    # Verify that only max_concurrent_files_per_batch files are processing concurrently
    # The semaphore in _process_files_with_concurrency should limit this
-    from llama_stack.providers.utils.memory.openai_vector_store_mixin import MAX_CONCURRENT_FILES_PER_BATCH
+    max_concurrent_files = vector_io_adapter.vector_stores_config.file_batch_params.max_concurrent_files_per_batch

-    assert active_files == MAX_CONCURRENT_FILES_PER_BATCH, (
-        f"Expected {MAX_CONCURRENT_FILES_PER_BATCH} active files, got {active_files}"
-    )
+    assert active_files == max_concurrent_files, f"Expected {max_concurrent_files} active files, got {active_files}"

    # Verify batch is in progress
    assert batch.status == "in_progress"