From ce8ea2f50557c8cd59c497e55aaa00d55bff10ec Mon Sep 17 00:00:00 2001 From: slekkala1 Date: Wed, 15 Oct 2025 06:53:36 -0700 Subject: [PATCH] chore: Support embedding params from metadata for Vector Store (#3811) # What does this PR do? Support reading embedding model and dimensions from metadata for vector store ## Test Plan Unit Tests --- .../utils/memory/openai_vector_store_mixin.py | 42 ++++- .../vector_io/test_openai_vector_stores.py | 49 +++++ .../test_vector_io_openai_vector_stores.py | 171 ++++++++++++++++++ 3 files changed, 256 insertions(+), 6 deletions(-) diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py index 37fd94db0..d9f8ba550 100644 --- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py +++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py @@ -53,6 +53,8 @@ from llama_stack.providers.utils.memory.vector_store import ( make_overlapped_chunks, ) +EMBEDDING_DIMENSION = 768 + logger = get_logger(name=__name__, category="providers::utils") # Constants for OpenAI vector stores @@ -352,12 +354,41 @@ class OpenAIVectorStoreMixin(ABC): """Creates a vector store.""" created_at = int(time.time()) - extra = params.model_extra or {} - provider_vector_db_id = extra.get("provider_vector_db_id") - embedding_model = extra.get("embedding_model") - embedding_dimension = extra.get("embedding_dimension") + # Extract llama-stack-specific parameters from extra_body + extra_body = params.model_extra or {} + metadata = params.metadata or {} + + provider_vector_db_id = extra_body.get("provider_vector_db_id") + + # Use embedding info from metadata if available, otherwise from extra_body + if metadata.get("embedding_model"): + # If either is in metadata, use metadata as source + embedding_model = metadata.get("embedding_model") + embedding_dimension = ( + int(metadata["embedding_dimension"]) if metadata.get("embedding_dimension") else EMBEDDING_DIMENSION + ) + logger.debug( + f"Using embedding config from metadata (takes precedence over extra_body): model='{embedding_model}', dimension={embedding_dimension}" + ) + + # Check for conflicts with extra_body + if extra_body.get("embedding_model") and extra_body["embedding_model"] != embedding_model: + raise ValueError( + f"Embedding model inconsistent between metadata ('{embedding_model}') and extra_body ('{extra_body['embedding_model']}')" + ) + if extra_body.get("embedding_dimension") and extra_body["embedding_dimension"] != embedding_dimension: + raise ValueError( + f"Embedding dimension inconsistent between metadata ({embedding_dimension}) and extra_body ({extra_body['embedding_dimension']})" + ) + else: + embedding_model = extra_body.get("embedding_model") + embedding_dimension = extra_body.get("embedding_dimension", EMBEDDING_DIMENSION) + logger.debug( + f"Using embedding config from extra_body: model='{embedding_model}', dimension={embedding_dimension}" + ) + # use provider_id set by router; fallback to provider's own ID when used directly via --stack-config - provider_id = extra.get("provider_id") or getattr(self, "__provider_id__", None) + provider_id = extra_body.get("provider_id") or getattr(self, "__provider_id__", None) # Derive the canonical vector_db_id (allow override, else generate) vector_db_id = provider_vector_db_id or generate_object_id("vector_store", lambda: f"vs_{uuid.uuid4()}") @@ -422,7 +453,6 @@ class OpenAIVectorStoreMixin(ABC): } # Add provider information to metadata if provided - metadata = params.metadata or {} if provider_id: metadata["provider_id"] = provider_id if provider_vector_db_id: diff --git a/tests/integration/vector_io/test_openai_vector_stores.py b/tests/integration/vector_io/test_openai_vector_stores.py index e23a08361..e21b233bc 100644 --- a/tests/integration/vector_io/test_openai_vector_stores.py +++ b/tests/integration/vector_io/test_openai_vector_stores.py @@ -1454,3 +1454,52 @@ def test_openai_vector_store_file_batch_error_handling( vector_store_id="non_existent_vector_store", file_ids=["any_file_id"], ) + + +def test_openai_vector_store_embedding_config_from_metadata( + compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension +): + """Test that embedding configuration works from metadata source.""" + skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) + client = compat_client_with_empty_stores + + # Test 1: Create vector store with embedding config in metadata only + vector_store_metadata = client.vector_stores.create( + name="metadata_config_store", + metadata={ + "embedding_model": embedding_model_id, + "embedding_dimension": str(embedding_dimension), + "test_source": "metadata", + }, + ) + + assert vector_store_metadata is not None + assert vector_store_metadata.name == "metadata_config_store" + assert vector_store_metadata.status in ["completed", "in_progress"] + assert vector_store_metadata.metadata["test_source"] == "metadata" + + # Test 2: Create vector store with consistent config in both sources + vector_store_consistent = client.vector_stores.create( + name="consistent_config_store", + metadata={ + "embedding_model": embedding_model_id, + "embedding_dimension": str(embedding_dimension), + "test_source": "consistent", + }, + extra_body={ + "embedding_model": embedding_model_id, + "embedding_dimension": int(embedding_dimension), # Ensure same type/value + }, + ) + + assert vector_store_consistent is not None + assert vector_store_consistent.name == "consistent_config_store" + assert vector_store_consistent.status in ["completed", "in_progress"] + assert vector_store_consistent.metadata["test_source"] == "consistent" + + # Verify both vector stores can be listed + response = client.vector_stores.list() + store_names = [store.name for store in response.data] + + assert "metadata_config_store" in store_names + assert "consistent_config_store" in store_names diff --git a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py index 5cccdec5e..7038f8a41 100644 --- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py +++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py @@ -1053,3 +1053,174 @@ async def test_openai_create_vector_store_uses_default_model(vector_io_adapter): call_args = vector_io_adapter.register_vector_db.call_args[0][0] assert call_args.embedding_model == "default-model" assert call_args.embedding_dimension == 512 + + +async def test_embedding_config_from_metadata(vector_io_adapter): + """Test that embedding configuration is correctly extracted from metadata.""" + + # Mock register_vector_db to avoid actual registration + vector_io_adapter.register_vector_db = AsyncMock() + # Set provider_id attribute for the adapter + vector_io_adapter.__provider_id__ = "test_provider" + + # Test with embedding config in metadata + params = OpenAICreateVectorStoreRequestWithExtraBody( + name="test_store", + metadata={ + "embedding_model": "test-embedding-model", + "embedding_dimension": "512", + }, + model_extra={}, + ) + + await vector_io_adapter.openai_create_vector_store(params) + + # Verify VectorDB was registered with correct embedding config from metadata + vector_io_adapter.register_vector_db.assert_called_once() + call_args = vector_io_adapter.register_vector_db.call_args[0][0] + assert call_args.embedding_model == "test-embedding-model" + assert call_args.embedding_dimension == 512 + + +async def test_embedding_config_from_extra_body(vector_io_adapter): + """Test that embedding configuration is correctly extracted from extra_body when metadata is empty.""" + + # Mock register_vector_db to avoid actual registration + vector_io_adapter.register_vector_db = AsyncMock() + # Set provider_id attribute for the adapter + vector_io_adapter.__provider_id__ = "test_provider" + + # Test with embedding config in extra_body only (metadata has no embedding_model) + params = OpenAICreateVectorStoreRequestWithExtraBody( + name="test_store", + metadata={}, # Empty metadata to ensure extra_body is used + **{ + "embedding_model": "extra-body-model", + "embedding_dimension": 1024, + }, + ) + + await vector_io_adapter.openai_create_vector_store(params) + + # Verify VectorDB was registered with correct embedding config from extra_body + vector_io_adapter.register_vector_db.assert_called_once() + call_args = vector_io_adapter.register_vector_db.call_args[0][0] + assert call_args.embedding_model == "extra-body-model" + assert call_args.embedding_dimension == 1024 + + +async def test_embedding_config_consistency_check_passes(vector_io_adapter): + """Test that consistent embedding config in both metadata and extra_body passes validation.""" + + # Mock register_vector_db to avoid actual registration + vector_io_adapter.register_vector_db = AsyncMock() + # Set provider_id attribute for the adapter + vector_io_adapter.__provider_id__ = "test_provider" + + # Test with consistent embedding config in both metadata and extra_body + params = OpenAICreateVectorStoreRequestWithExtraBody( + name="test_store", + metadata={ + "embedding_model": "consistent-model", + "embedding_dimension": "768", + }, + **{ + "embedding_model": "consistent-model", + "embedding_dimension": 768, + }, + ) + + await vector_io_adapter.openai_create_vector_store(params) + + # Should not raise any error and use metadata config + vector_io_adapter.register_vector_db.assert_called_once() + call_args = vector_io_adapter.register_vector_db.call_args[0][0] + assert call_args.embedding_model == "consistent-model" + assert call_args.embedding_dimension == 768 + + +async def test_embedding_config_inconsistency_errors(vector_io_adapter): + """Test that inconsistent embedding config between metadata and extra_body raises errors.""" + + # Mock register_vector_db to avoid actual registration + vector_io_adapter.register_vector_db = AsyncMock() + # Set provider_id attribute for the adapter + vector_io_adapter.__provider_id__ = "test_provider" + + # Test with inconsistent embedding model + params = OpenAICreateVectorStoreRequestWithExtraBody( + name="test_store", + metadata={ + "embedding_model": "metadata-model", + "embedding_dimension": "768", + }, + **{ + "embedding_model": "extra-body-model", + "embedding_dimension": 768, + }, + ) + + with pytest.raises(ValueError, match="Embedding model inconsistent between metadata"): + await vector_io_adapter.openai_create_vector_store(params) + + # Reset mock for second test + vector_io_adapter.register_vector_db.reset_mock() + + # Test with inconsistent embedding dimension + params = OpenAICreateVectorStoreRequestWithExtraBody( + name="test_store", + metadata={ + "embedding_model": "same-model", + "embedding_dimension": "512", + }, + **{ + "embedding_model": "same-model", + "embedding_dimension": 1024, + }, + ) + + with pytest.raises(ValueError, match="Embedding dimension inconsistent between metadata"): + await vector_io_adapter.openai_create_vector_store(params) + + +async def test_embedding_config_defaults_when_missing(vector_io_adapter): + """Test that embedding dimension defaults to 768 when not provided.""" + + # Mock register_vector_db to avoid actual registration + vector_io_adapter.register_vector_db = AsyncMock() + # Set provider_id attribute for the adapter + vector_io_adapter.__provider_id__ = "test_provider" + + # Test with only embedding model, no dimension (metadata empty to use extra_body) + params = OpenAICreateVectorStoreRequestWithExtraBody( + name="test_store", + metadata={}, # Empty metadata to ensure extra_body is used + **{ + "embedding_model": "model-without-dimension", + }, + ) + + await vector_io_adapter.openai_create_vector_store(params) + + # Should default to 768 dimensions + vector_io_adapter.register_vector_db.assert_called_once() + call_args = vector_io_adapter.register_vector_db.call_args[0][0] + assert call_args.embedding_model == "model-without-dimension" + assert call_args.embedding_dimension == 768 + + +async def test_embedding_config_required_model_missing(vector_io_adapter): + """Test that missing embedding model raises error.""" + + # Mock register_vector_db to avoid actual registration + vector_io_adapter.register_vector_db = AsyncMock() + # Set provider_id attribute for the adapter + vector_io_adapter.__provider_id__ = "test_provider" + # Mock the default model lookup to return None (no default model available) + vector_io_adapter._get_default_embedding_model_and_dimension = AsyncMock(return_value=None) + + # Test with no embedding model provided + params = OpenAICreateVectorStoreRequestWithExtraBody(name="test_store", metadata={}) + + with pytest.raises(ValueError, match="embedding_model is required in extra_body when creating a vector store"): + await vector_io_adapter.openai_create_vector_store(params)