mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-20 22:32:27 +00:00
feat: Enhance Vector Stores config with full configurations (#4397)
# What does this PR do? Enhances the Vector Stores config with full set of appropriate configurations - Add FileIngestionParams, ChunkRetrievalParams, and FileBatchParams subconfigs - Update RAG memory, OpenAI vector store mixin, and vector store utils to use configuration - Fix import organization across vector store components - Add comprehensive vector stores configuration documentation - Update docs navigation to include vector store configuration guide - Delete `memory/constants.py` and move constant values directly into Pydantic models ## Test Plan Tests updated + CI --------- Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
This commit is contained in:
parent
a7d509aaf9
commit
2d149e3d2d
22 changed files with 3249 additions and 110 deletions
1569
tests/integration/responses/recordings/0995df80c05acd7a1c386b09d5b4520ffff5233bf1fdd222607ec879cb5bcdb1.json
generated
Normal file
1569
tests/integration/responses/recordings/0995df80c05acd7a1c386b09d5b4520ffff5233bf1fdd222607ec879cb5bcdb1.json
generated
Normal file
File diff suppressed because it is too large
Load diff
1164
tests/integration/responses/recordings/b6ea82498b4cd08dbbfec50c2bf7e20bf3f40ed0acbe79695f18c787ad0e3ed7.json
generated
Normal file
1164
tests/integration/responses/recordings/b6ea82498b4cd08dbbfec50c2bf7e20bf3f40ed0acbe79695f18c787ad0e3ed7.json
generated
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -156,7 +156,6 @@ async def test_query_rewrite_functionality():
|
|||
from unittest.mock import MagicMock
|
||||
|
||||
from llama_stack.core.datatypes import QualifiedModel, RewriteQueryParams, VectorStoresConfig
|
||||
from llama_stack.providers.utils.memory.constants import DEFAULT_QUERY_REWRITE_PROMPT
|
||||
from llama_stack_api import VectorStoreSearchResponsePage
|
||||
|
||||
mock_routing_table = Mock()
|
||||
|
|
@ -197,7 +196,7 @@ async def test_query_rewrite_functionality():
|
|||
|
||||
# Verify default prompt is used
|
||||
prompt_text = chat_call_args.messages[0].content
|
||||
expected_prompt = DEFAULT_QUERY_REWRITE_PROMPT.format(query="test query")
|
||||
expected_prompt = "Expand this query with relevant synonyms and related terms. Return only the improved query, no explanations:\n\ntest query\n\nImproved query:"
|
||||
assert prompt_text == expected_prompt
|
||||
|
||||
# Verify routing table was called with rewritten query and rewrite_query=False
|
||||
|
|
|
|||
|
|
@ -110,22 +110,23 @@ class TestOptionalArchitecture:
|
|||
assert config.annotation_prompt_params is not None
|
||||
assert "{num_chunks}" in config.file_search_params.header_template
|
||||
|
||||
def test_guaranteed_defaults_match_constants(self):
|
||||
"""Test that guaranteed defaults match expected constant values."""
|
||||
from llama_stack.providers.utils.memory.constants import (
|
||||
DEFAULT_CONTEXT_TEMPLATE,
|
||||
DEFAULT_FILE_SEARCH_HEADER_TEMPLATE,
|
||||
)
|
||||
|
||||
def test_guaranteed_defaults_have_expected_values(self):
|
||||
"""Test that guaranteed defaults have expected hardcoded values."""
|
||||
# Create config with guaranteed defaults
|
||||
config = VectorStoresConfig()
|
||||
|
||||
# Verify defaults match constants
|
||||
# Verify defaults have expected values
|
||||
header_template = config.file_search_params.header_template
|
||||
context_template = config.context_prompt_params.context_template
|
||||
|
||||
assert header_template == DEFAULT_FILE_SEARCH_HEADER_TEMPLATE
|
||||
assert context_template == DEFAULT_CONTEXT_TEMPLATE
|
||||
assert (
|
||||
header_template
|
||||
== "knowledge_search tool found {num_chunks} chunks:\nBEGIN of knowledge_search tool results.\n"
|
||||
)
|
||||
assert (
|
||||
context_template
|
||||
== 'The above results were retrieved to help answer the user\'s query: "{query}". Use them as supporting information only in answering this query. {annotation_instruction}\n'
|
||||
)
|
||||
|
||||
# Verify templates can be formatted successfully
|
||||
formatted_header = header_template.format(num_chunks=3)
|
||||
|
|
|
|||
|
|
@ -1091,13 +1091,11 @@ async def test_max_concurrent_files_per_batch(vector_io_adapter):
|
|||
# Give time for the semaphore logic to start processing files
|
||||
await asyncio.sleep(0.2)
|
||||
|
||||
# Verify that only MAX_CONCURRENT_FILES_PER_BATCH files are processing concurrently
|
||||
# Verify that only max_concurrent_files_per_batch files are processing concurrently
|
||||
# The semaphore in _process_files_with_concurrency should limit this
|
||||
from llama_stack.providers.utils.memory.openai_vector_store_mixin import MAX_CONCURRENT_FILES_PER_BATCH
|
||||
max_concurrent_files = vector_io_adapter.vector_stores_config.file_batch_params.max_concurrent_files_per_batch
|
||||
|
||||
assert active_files == MAX_CONCURRENT_FILES_PER_BATCH, (
|
||||
f"Expected {MAX_CONCURRENT_FILES_PER_BATCH} active files, got {active_files}"
|
||||
)
|
||||
assert active_files == max_concurrent_files, f"Expected {max_concurrent_files} active files, got {active_files}"
|
||||
|
||||
# Verify batch is in progress
|
||||
assert batch.status == "in_progress"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue