chore: Updating how default embedding model is set in stack (#3818)

# What does this PR do? Refactor setting default vector store provider and embedding model to use an optional `vector_stores` config in the `StackRunConfig` and clean up code to do so (had to add back in some pieces of VectorDB). Also added remote Qdrant and Weaviate to starter distro (based on other PR where inference providers were added for UX). New config is simply (default for Starter distro): ```yaml vector_stores: default_provider_id: faiss default_embedding_model: provider_id: sentence-transformers model_id: nomic-ai/nomic-embed-text-v1.5 ``` ## Test Plan CI and Unit tests. --------- Signed-off-by: Francisco Javier Arceo <farceo@redhat.com> Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
2025-10-22 16:23:08 +00:00 · 2025-10-20 17:22:45 -04:00 · 2025-10-20 17:22:45 -04:00 · 48581bf651
commit 48581bf651
parent 2c43285e22
48 changed files with 973 additions and 818 deletions
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@ -317,3 +317,72 @@ def pytest_ignore_collect(path: str, config: pytest.Config) -> bool:
            if p.is_relative_to(rp):
                return False
    return True
+
+
+def get_vector_io_provider_ids(client):
+    """Get all available vector_io provider IDs."""
+    providers = [p for p in client.providers.list() if p.api == "vector_io"]
+    return [p.provider_id for p in providers]
+
+
+def vector_provider_wrapper(func):
+    """Decorator to run a test against all available vector_io providers."""
+    import functools
+    import os
+
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        # Get the vector_io_provider_id from the test arguments
+        import inspect
+
+        sig = inspect.signature(func)
+        bound_args = sig.bind(*args, **kwargs)
+        bound_args.apply_defaults()
+
+        vector_io_provider_id = bound_args.arguments.get("vector_io_provider_id")
+        if not vector_io_provider_id:
+            pytest.skip("No vector_io_provider_id provided")
+
+        # Get client_with_models to check available providers
+        client_with_models = bound_args.arguments.get("client_with_models")
+        if client_with_models:
+            available_providers = get_vector_io_provider_ids(client_with_models)
+            if vector_io_provider_id not in available_providers:
+                pytest.skip(f"Provider '{vector_io_provider_id}' not available. Available: {available_providers}")
+
+        return func(*args, **kwargs)
+
+    # For replay tests, only use providers that are available in ci-tests environment
+    if os.environ.get("LLAMA_STACK_TEST_INFERENCE_MODE") == "replay":
+        all_providers = ["faiss", "sqlite-vec"]
+    else:
+        # For live tests, try all providers (they'll skip if not available)
+        all_providers = [
+            "faiss",
+            "sqlite-vec",
+            "milvus",
+            "chromadb",
+            "pgvector",
+            "weaviate",
+            "qdrant",
+        ]
+
+    return pytest.mark.parametrize("vector_io_provider_id", all_providers)(wrapper)
+
+
+@pytest.fixture
+def vector_io_provider_id(request, client_with_models):
+    """Fixture that provides a specific vector_io provider ID, skipping if not available."""
+    if hasattr(request, "param"):
+        requested_provider = request.param
+        available_providers = get_vector_io_provider_ids(client_with_models)
+
+        if requested_provider not in available_providers:
+            pytest.skip(f"Provider '{requested_provider}' not available. Available: {available_providers}")
+
+        return requested_provider
+    else:
+        provider_ids = get_vector_io_provider_ids(client_with_models)
+        if not provider_ids:
+            pytest.skip("No vector_io providers available")
+        return provider_ids[0]
--- a/tests/integration/fixtures/common.py
+++ b/tests/integration/fixtures/common.py
@ -21,6 +21,7 @@ from llama_stack_client import LlamaStackClient
 from openai import OpenAI

 from llama_stack import LlamaStackAsLibraryClient
+from llama_stack.core.datatypes import VectorStoresConfig
 from llama_stack.core.stack import run_config_from_adhoc_config_spec
 from llama_stack.env import get_env_or_fail

@ -236,6 +237,13 @@ def instantiate_llama_stack_client(session):

    if "=" in config:
        run_config = run_config_from_adhoc_config_spec(config)
+
+        # --stack-config bypasses template so need this to set default embedding model
+        if "vector_io" in config and "inference" in config:
+            run_config.vector_stores = VectorStoresConfig(
+                embedding_model_id="inline::sentence-transformers/nomic-ai/nomic-embed-text-v1.5"
+            )
+
        run_config_file = tempfile.NamedTemporaryFile(delete=False, suffix=".yaml")
        with open(run_config_file.name, "w") as f:
            yaml.dump(run_config.model_dump(mode="json"), f)
--- a/tests/integration/vector_io/test_openai_vector_stores.py
+++ b/tests/integration/vector_io/test_openai_vector_stores.py
@ -8,14 +8,15 @@ import time
 from io import BytesIO

 import pytest
-from llama_stack_client import BadRequestError, NotFoundError
+from llama_stack_client import BadRequestError
 from openai import BadRequestError as OpenAIBadRequestError
-from openai import NotFoundError as OpenAINotFoundError

 from llama_stack.apis.vector_io import Chunk
 from llama_stack.core.library_client import LlamaStackAsLibraryClient
 from llama_stack.log import get_logger

+from ..conftest import vector_provider_wrapper
+
 logger = get_logger(name=__name__, category="vector_io")


@ -133,8 +134,9 @@ def compat_client_with_empty_stores(compat_client):
    clear_files()


+@vector_provider_wrapper
 def test_openai_create_vector_store(
-    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
 ):
    """Test creating a vector store using OpenAI API."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@ -146,6 +148,7 @@ def test_openai_create_vector_store(
        metadata={"purpose": "testing", "environment": "integration"},
        extra_body={
            "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
        },
    )

@ -159,14 +162,18 @@ def test_openai_create_vector_store(
    assert hasattr(vector_store, "created_at")


-def test_openai_create_vector_store_default(compat_client_with_empty_stores, client_with_models):
+@vector_provider_wrapper
+def test_openai_create_vector_store_default(compat_client_with_empty_stores, client_with_models, vector_io_provider_id):
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
-    vector_store = compat_client_with_empty_stores.vector_stores.create()
+    vector_store = compat_client_with_empty_stores.vector_stores.create(
+        extra_body={"provider_id": vector_io_provider_id}
+    )
    assert vector_store.id


+@vector_provider_wrapper
 def test_openai_list_vector_stores(
-    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
 ):
    """Test listing vector stores using OpenAI API."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@ -179,6 +186,7 @@ def test_openai_list_vector_stores(
        metadata={"type": "test"},
        extra_body={
            "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
        },
    )
    store2 = client.vector_stores.create(
@ -186,6 +194,7 @@ def test_openai_list_vector_stores(
        metadata={"type": "test"},
        extra_body={
            "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
        },
    )

@ -206,8 +215,9 @@ def test_openai_list_vector_stores(
    assert len(limited_response.data) == 1


+@vector_provider_wrapper
 def test_openai_retrieve_vector_store(
-    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
 ):
    """Test retrieving a specific vector store using OpenAI API."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@ -220,6 +230,7 @@ def test_openai_retrieve_vector_store(
        metadata={"purpose": "retrieval_test"},
        extra_body={
            "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
        },
    )

@ -233,8 +244,9 @@ def test_openai_retrieve_vector_store(
    assert retrieved_store.object == "vector_store"


+@vector_provider_wrapper
 def test_openai_update_vector_store(
-    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
 ):
    """Test modifying a vector store using OpenAI API."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@ -247,6 +259,7 @@ def test_openai_update_vector_store(
        metadata={"version": "1.0"},
        extra_body={
            "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
        },
    )
    time.sleep(1)
@ -264,8 +277,9 @@ def test_openai_update_vector_store(
    assert modified_store.last_active_at > created_store.last_active_at


+@vector_provider_wrapper
 def test_openai_delete_vector_store(
-    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
 ):
    """Test deleting a vector store using OpenAI API."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@ -278,6 +292,7 @@ def test_openai_delete_vector_store(
        metadata={"purpose": "deletion_test"},
        extra_body={
            "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
        },
    )

@ -294,8 +309,9 @@ def test_openai_delete_vector_store(
        client.vector_stores.retrieve(vector_store_id=created_store.id)


+@vector_provider_wrapper
 def test_openai_vector_store_search_empty(
-    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
 ):
    """Test searching an empty vector store using OpenAI API."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@ -308,6 +324,7 @@ def test_openai_vector_store_search_empty(
        metadata={"purpose": "search_testing"},
        extra_body={
            "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
        },
    )

@ -323,8 +340,14 @@ def test_openai_vector_store_search_empty(
    assert search_response.has_more is False


+@vector_provider_wrapper
 def test_openai_vector_store_with_chunks(
-    compat_client_with_empty_stores, client_with_models, sample_chunks, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores,
+    client_with_models,
+    sample_chunks,
+    embedding_model_id,
+    embedding_dimension,
+    vector_io_provider_id,
 ):
    """Test vector store functionality with actual chunks using both OpenAI and native APIs."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@ -338,6 +361,7 @@ def test_openai_vector_store_with_chunks(
        metadata={"purpose": "chunks_testing"},
        extra_body={
            "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
        },
    )

@ -380,6 +404,7 @@ def test_openai_vector_store_with_chunks(
        ("What inspires neural networks?", "doc4", "ai"),
    ],
 )
+@vector_provider_wrapper
 def test_openai_vector_store_search_relevance(
    compat_client_with_empty_stores,
    client_with_models,
@ -387,6 +412,7 @@ def test_openai_vector_store_search_relevance(
    test_case,
    embedding_model_id,
    embedding_dimension,
+    vector_io_provider_id,
 ):
    """Test that OpenAI vector store search returns relevant results for different queries."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@ -402,6 +428,7 @@ def test_openai_vector_store_search_relevance(
        metadata={"purpose": "relevance_testing"},
        extra_body={
            "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
        },
    )

@ -430,8 +457,14 @@ def test_openai_vector_store_search_relevance(
    assert top_result.score > 0


+@vector_provider_wrapper
 def test_openai_vector_store_search_with_ranking_options(
-    compat_client_with_empty_stores, client_with_models, sample_chunks, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores,
+    client_with_models,
+    sample_chunks,
+    embedding_model_id,
+    embedding_dimension,
+    vector_io_provider_id,
 ):
    """Test OpenAI vector store search with ranking options."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@ -445,6 +478,7 @@ def test_openai_vector_store_search_with_ranking_options(
        metadata={"purpose": "ranking_testing"},
        extra_body={
            "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
        },
    )

@ -483,8 +517,14 @@ def test_openai_vector_store_search_with_ranking_options(
        assert result.score >= threshold


+@vector_provider_wrapper
 def test_openai_vector_store_search_with_high_score_filter(
-    compat_client_with_empty_stores, client_with_models, sample_chunks, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores,
+    client_with_models,
+    sample_chunks,
+    embedding_model_id,
+    embedding_dimension,
+    vector_io_provider_id,
 ):
    """Test that searching with text very similar to a document and high score threshold returns only that document."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@ -498,6 +538,7 @@ def test_openai_vector_store_search_with_high_score_filter(
        metadata={"purpose": "high_score_filtering"},
        extra_body={
            "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
        },
    )

@ -542,8 +583,14 @@ def test_openai_vector_store_search_with_high_score_filter(
    assert "python" in top_content.lower() or "programming" in top_content.lower()


+@vector_provider_wrapper
 def test_openai_vector_store_search_with_max_num_results(
-    compat_client_with_empty_stores, client_with_models, sample_chunks, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores,
+    client_with_models,
+    sample_chunks,
+    embedding_model_id,
+    embedding_dimension,
+    vector_io_provider_id,
 ):
    """Test OpenAI vector store search with max_num_results."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@ -557,6 +604,7 @@ def test_openai_vector_store_search_with_max_num_results(
        metadata={"purpose": "max_num_results_testing"},
        extra_body={
            "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
        },
    )

@ -577,8 +625,9 @@ def test_openai_vector_store_search_with_max_num_results(
    assert len(search_response.data) == 2


+@vector_provider_wrapper
 def test_openai_vector_store_attach_file(
-    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
 ):
    """Test OpenAI vector store attach file."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@ -591,6 +640,7 @@ def test_openai_vector_store_attach_file(
        name="test_store",
        extra_body={
            "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
        },
    )

@ -637,8 +687,9 @@ def test_openai_vector_store_attach_file(
    assert "foobazbar" in top_content.lower()


+@vector_provider_wrapper
 def test_openai_vector_store_attach_files_on_creation(
-    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
 ):
    """Test OpenAI vector store attach files on creation."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@ -668,6 +719,7 @@ def test_openai_vector_store_attach_files_on_creation(
        file_ids=file_ids,
        extra_body={
            "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
        },
    )

@ -700,8 +752,9 @@ def test_openai_vector_store_attach_files_on_creation(
    assert updated_vector_store.file_counts.failed == 0


+@vector_provider_wrapper
 def test_openai_vector_store_list_files(
-    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
 ):
    """Test OpenAI vector store list files."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@ -714,6 +767,7 @@ def test_openai_vector_store_list_files(
        name="test_store",
        extra_body={
            "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
        },
    )

@ -773,8 +827,9 @@ def test_openai_vector_store_list_files(
    assert updated_vector_store.file_counts.in_progress == 0


+@vector_provider_wrapper
 def test_openai_vector_store_list_files_invalid_vector_store(
-    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
 ):
    """Test OpenAI vector store list files with invalid vector store ID."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@ -783,14 +838,15 @@ def test_openai_vector_store_list_files_invalid_vector_store(
    if isinstance(compat_client, LlamaStackAsLibraryClient):
        errors = ValueError
    else:
-        errors = (NotFoundError, OpenAINotFoundError)
+        errors = (BadRequestError, OpenAIBadRequestError)

    with pytest.raises(errors):
        compat_client.vector_stores.files.list(vector_store_id="abc123")


+@vector_provider_wrapper
 def test_openai_vector_store_retrieve_file_contents(
-    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
 ):
    """Test OpenAI vector store retrieve file contents."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@ -803,6 +859,7 @@ def test_openai_vector_store_retrieve_file_contents(
        name="test_store",
        extra_body={
            "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
        },
    )

@ -848,8 +905,9 @@ def test_openai_vector_store_retrieve_file_contents(
    assert file_contents.attributes == attributes


+@vector_provider_wrapper
 def test_openai_vector_store_delete_file(
-    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
 ):
    """Test OpenAI vector store delete file."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@ -862,6 +920,7 @@ def test_openai_vector_store_delete_file(
        name="test_store",
        extra_body={
            "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
        },
    )

@ -912,8 +971,9 @@ def test_openai_vector_store_delete_file(
    assert updated_vector_store.file_counts.in_progress == 0


+@vector_provider_wrapper
 def test_openai_vector_store_delete_file_removes_from_vector_store(
-    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
 ):
    """Test OpenAI vector store delete file removes from vector store."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@ -926,6 +986,7 @@ def test_openai_vector_store_delete_file_removes_from_vector_store(
        name="test_store",
        extra_body={
            "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
        },
    )

@ -962,8 +1023,9 @@ def test_openai_vector_store_delete_file_removes_from_vector_store(
    assert not search_response.data


+@vector_provider_wrapper
 def test_openai_vector_store_update_file(
-    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
 ):
    """Test OpenAI vector store update file."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@ -976,6 +1038,7 @@ def test_openai_vector_store_update_file(
        name="test_store",
        extra_body={
            "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
        },
    )

@ -1017,8 +1080,9 @@ def test_openai_vector_store_update_file(
    assert retrieved_file.attributes["foo"] == "baz"


+@vector_provider_wrapper
 def test_create_vector_store_files_duplicate_vector_store_name(
-    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
 ):
    """
    This test confirms that client.vector_stores.create() creates a unique ID
@ -1044,6 +1108,7 @@ def test_create_vector_store_files_duplicate_vector_store_name(
        name="test_store_with_files",
        extra_body={
            "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
        },
    )
    assert vector_store.file_counts.completed == 0
@ -1056,6 +1121,7 @@ def test_create_vector_store_files_duplicate_vector_store_name(
        name="test_store_with_files",
        extra_body={
            "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
        },
    )

@ -1086,8 +1152,15 @@ def test_create_vector_store_files_duplicate_vector_store_name(


@pytest.mark.parametrize("search_mode", ["vector", "keyword", "hybrid"])
+@vector_provider_wrapper
 def test_openai_vector_store_search_modes(
-    llama_stack_client, client_with_models, sample_chunks, search_mode, embedding_model_id, embedding_dimension
+    llama_stack_client,
+    client_with_models,
+    sample_chunks,
+    search_mode,
+    embedding_model_id,
+    embedding_dimension,
+    vector_io_provider_id,
 ):
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
    skip_if_provider_doesnt_support_openai_vector_stores_search(client_with_models, search_mode)
@ -1097,6 +1170,7 @@ def test_openai_vector_store_search_modes(
        metadata={"purpose": "search_mode_testing"},
        extra_body={
            "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
        },
    )

@ -1115,8 +1189,9 @@ def test_openai_vector_store_search_modes(
    assert search_response is not None


+@vector_provider_wrapper
 def test_openai_vector_store_file_batch_create_and_retrieve(
-    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
 ):
    """Test creating and retrieving a vector store file batch."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@ -1128,6 +1203,7 @@ def test_openai_vector_store_file_batch_create_and_retrieve(
        name="batch_test_store",
        extra_body={
            "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
        },
    )

@ -1178,8 +1254,9 @@ def test_openai_vector_store_file_batch_create_and_retrieve(
    assert retrieved_batch.status == "completed"  # Should be completed after processing


+@vector_provider_wrapper
 def test_openai_vector_store_file_batch_list_files(
-    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
 ):
    """Test listing files in a vector store file batch."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@ -1191,6 +1268,7 @@ def test_openai_vector_store_file_batch_list_files(
        name="batch_list_test_store",
        extra_body={
            "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
        },
    )

@ -1271,8 +1349,9 @@ def test_openai_vector_store_file_batch_list_files(
    assert first_page_ids.isdisjoint(second_page_ids)


+@vector_provider_wrapper
 def test_openai_vector_store_file_batch_cancel(
-    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
 ):
    """Test cancelling a vector store file batch."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@ -1284,6 +1363,7 @@ def test_openai_vector_store_file_batch_cancel(
        name="batch_cancel_test_store",
        extra_body={
            "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
        },
    )

@ -1326,8 +1406,9 @@ def test_openai_vector_store_file_batch_cancel(
        assert final_batch.status in ["completed", "cancelled"]


+@vector_provider_wrapper
 def test_openai_vector_store_file_batch_retrieve_contents(
-    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
 ):
    """Test retrieving file contents after file batch processing."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@ -1339,6 +1420,7 @@ def test_openai_vector_store_file_batch_retrieve_contents(
        name="batch_contents_test_store",
        extra_body={
            "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
        },
    )

@ -1399,8 +1481,9 @@ def test_openai_vector_store_file_batch_retrieve_contents(
        assert file_data[i][1].decode("utf-8") in content_text


+@vector_provider_wrapper
 def test_openai_vector_store_file_batch_error_handling(
-    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
 ):
    """Test error handling for file batch operations."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@ -1412,6 +1495,7 @@ def test_openai_vector_store_file_batch_error_handling(
        name="batch_error_test_store",
        extra_body={
            "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
        },
    )

@ -1443,11 +1527,11 @@ def test_openai_vector_store_file_batch_error_handling(
            batch_id="non_existent_batch_id",
        )

-    # Test operations on non-existent vector store (returns NotFoundError)
+    # Test operations on non-existent vector store (returns BadRequestError)
    if isinstance(compat_client, LlamaStackAsLibraryClient):
        vector_store_errors = ValueError
    else:
-        vector_store_errors = (NotFoundError, OpenAINotFoundError)
+        vector_store_errors = (BadRequestError, OpenAIBadRequestError)

    with pytest.raises(vector_store_errors):  # Should raise an error for non-existent vector store
        compat_client.vector_stores.file_batches.create(
@ -1456,8 +1540,9 @@ def test_openai_vector_store_file_batch_error_handling(
        )


+@vector_provider_wrapper
 def test_openai_vector_store_embedding_config_from_metadata(
-    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension
+    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
 ):
    """Test that embedding configuration works from metadata source."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@ -1471,6 +1556,9 @@ def test_openai_vector_store_embedding_config_from_metadata(
            "embedding_dimension": str(embedding_dimension),
            "test_source": "metadata",
        },
+        extra_body={
+            "provider_id": vector_io_provider_id,
+        },
    )

    assert vector_store_metadata is not None
@ -1489,6 +1577,7 @@ def test_openai_vector_store_embedding_config_from_metadata(
        extra_body={
            "embedding_model": embedding_model_id,
            "embedding_dimension": int(embedding_dimension),  # Ensure same type/value
+            "provider_id": vector_io_provider_id,
        },
    )

--- a/tests/integration/vector_io/test_vector_io.py
+++ b/tests/integration/vector_io/test_vector_io.py
@ -8,6 +8,8 @@ import pytest

 from llama_stack.apis.vector_io import Chunk

+from ..conftest import vector_provider_wrapper
+

@pytest.fixture(scope="session")
 def sample_chunks():
@ -46,12 +48,13 @@ def client_with_empty_registry(client_with_models):
    clear_registry()


-def test_vector_db_retrieve(client_with_empty_registry, embedding_model_id, embedding_dimension):
+@vector_provider_wrapper
+def test_vector_db_retrieve(client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id):
    vector_db_name = "test_vector_db"
    create_response = client_with_empty_registry.vector_stores.create(
        name=vector_db_name,
        extra_body={
-            "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
        },
    )

@ -65,12 +68,13 @@ def test_vector_db_retrieve(client_with_empty_registry, embedding_model_id, embe
    assert response.id.startswith("vs_")


-def test_vector_db_register(client_with_empty_registry, embedding_model_id, embedding_dimension):
+@vector_provider_wrapper
+def test_vector_db_register(client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id):
    vector_db_name = "test_vector_db"
    response = client_with_empty_registry.vector_stores.create(
        name=vector_db_name,
        extra_body={
-            "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
        },
    )

@ -100,12 +104,15 @@ def test_vector_db_register(client_with_empty_registry, embedding_model_id, embe
        ("How does machine learning improve over time?", "doc2"),
    ],
 )
-def test_insert_chunks(client_with_empty_registry, embedding_model_id, embedding_dimension, sample_chunks, test_case):
+@vector_provider_wrapper
+def test_insert_chunks(
+    client_with_empty_registry, embedding_model_id, embedding_dimension, sample_chunks, test_case, vector_io_provider_id
+):
    vector_db_name = "test_vector_db"
    create_response = client_with_empty_registry.vector_stores.create(
        name=vector_db_name,
        extra_body={
-            "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
        },
    )

@ -135,7 +142,10 @@ def test_insert_chunks(client_with_empty_registry, embedding_model_id, embedding
    assert top_match.metadata["document_id"] == expected_doc_id, f"Query '{query}' should match {expected_doc_id}"


-def test_insert_chunks_with_precomputed_embeddings(client_with_empty_registry, embedding_model_id, embedding_dimension):
+@vector_provider_wrapper
+def test_insert_chunks_with_precomputed_embeddings(
+    client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id
+):
    vector_io_provider_params_dict = {
        "inline::milvus": {"score_threshold": -1.0},
        "inline::qdrant": {"score_threshold": -1.0},
@ -145,7 +155,7 @@ def test_insert_chunks_with_precomputed_embeddings(client_with_empty_registry, e
    register_response = client_with_empty_registry.vector_stores.create(
        name=vector_db_name,
        extra_body={
-            "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
        },
    )

@ -181,8 +191,9 @@ def test_insert_chunks_with_precomputed_embeddings(client_with_empty_registry, e


 # expect this test to fail
+@vector_provider_wrapper
 def test_query_returns_valid_object_when_identical_to_embedding_in_vdb(
-    client_with_empty_registry, embedding_model_id, embedding_dimension
+    client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id
 ):
    vector_io_provider_params_dict = {
        "inline::milvus": {"score_threshold": 0.0},
@ -194,6 +205,7 @@ def test_query_returns_valid_object_when_identical_to_embedding_in_vdb(
        name=vector_db_name,
        extra_body={
            "embedding_model": embedding_model_id,
+            "provider_id": vector_io_provider_id,
        },
    )

@ -226,33 +238,44 @@ def test_query_returns_valid_object_when_identical_to_embedding_in_vdb(
    assert response.chunks[0].metadata["source"] == "precomputed"


-def test_auto_extract_embedding_dimension(client_with_empty_registry, embedding_model_id):
+@vector_provider_wrapper
+def test_auto_extract_embedding_dimension(
+    client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id
+):
+    # This test specifically tests embedding model override, so we keep embedding_model
    vs = client_with_empty_registry.vector_stores.create(
-        name="test_auto_extract", extra_body={"embedding_model": embedding_model_id}
+        name="test_auto_extract",
+        extra_body={"embedding_model": embedding_model_id, "provider_id": vector_io_provider_id},
    )
    assert vs.id is not None


-def test_provider_auto_selection_single_provider(client_with_empty_registry, embedding_model_id):
+@vector_provider_wrapper
+def test_provider_auto_selection_single_provider(
+    client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id
+):
    providers = [p for p in client_with_empty_registry.providers.list() if p.api == "vector_io"]
    if len(providers) != 1:
        pytest.skip(f"Test requires exactly one vector_io provider, found {len(providers)}")

-    vs = client_with_empty_registry.vector_stores.create(
-        name="test_auto_provider", extra_body={"embedding_model": embedding_model_id}
-    )
+    # Test that when only one provider is available, it's auto-selected (no provider_id needed)
+    vs = client_with_empty_registry.vector_stores.create(name="test_auto_provider")
    assert vs.id is not None


-def test_provider_id_override(client_with_empty_registry, embedding_model_id):
+@vector_provider_wrapper
+def test_provider_id_override(
+    client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id
+):
    providers = [p for p in client_with_empty_registry.providers.list() if p.api == "vector_io"]
    if len(providers) != 1:
        pytest.skip(f"Test requires exactly one vector_io provider, found {len(providers)}")

    provider_id = providers[0].provider_id

+    # Test explicit provider_id specification (using default embedding model)
    vs = client_with_empty_registry.vector_stores.create(
-        name="test_provider_override", extra_body={"embedding_model": embedding_model_id, "provider_id": provider_id}
+        name="test_provider_override", extra_body={"provider_id": provider_id}
    )
    assert vs.id is not None
    assert vs.metadata.get("provider_id") == provider_id
--- a/tests/unit/core/test_stack_validation.py
+++ b/tests/unit/core/test_stack_validation.py
@ -4,90 +4,64 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-"""
-Unit tests for Stack validation functions.
-"""
+"""Unit tests for Stack validation functions."""

 from unittest.mock import AsyncMock

 import pytest

-from llama_stack.apis.models import Model, ModelType
-from llama_stack.core.stack import validate_default_embedding_model
+from llama_stack.apis.models import ListModelsResponse, Model, ModelType
+from llama_stack.core.datatypes import QualifiedModel, StackRunConfig, StorageConfig, VectorStoresConfig
+from llama_stack.core.stack import validate_vector_stores_config
 from llama_stack.providers.datatypes import Api


-class TestStackValidation:
-    """Test Stack validation functions."""
+class TestVectorStoresValidation:
+    async def test_validate_missing_model(self):
+        """Test validation fails when model not found."""
+        run_config = StackRunConfig(
+            image_name="test",
+            providers={},
+            storage=StorageConfig(backends={}, stores={}),
+            vector_stores=VectorStoresConfig(
+                default_provider_id="faiss",
+                default_embedding_model=QualifiedModel(
+                    provider_id="p",
+                    model_id="missing",
+                ),
+            ),
+        )
+        mock_models = AsyncMock()
+        mock_models.list_models.return_value = ListModelsResponse(data=[])

-    @pytest.mark.parametrize(
-        "models,should_raise",
-        [
-            ([], False),  # No models
-            (
-                [
-                    Model(
-                        identifier="emb1",
-                        model_type=ModelType.embedding,
-                        metadata={"default_configured": True},
-                        provider_id="p",
-                        provider_resource_id="emb1",
-                    )
-                ],
-                False,
-            ),  # Single default
-            (
-                [
-                    Model(
-                        identifier="emb1",
-                        model_type=ModelType.embedding,
-                        metadata={"default_configured": True},
-                        provider_id="p",
-                        provider_resource_id="emb1",
-                    ),
-                    Model(
-                        identifier="emb2",
-                        model_type=ModelType.embedding,
-                        metadata={"default_configured": True},
-                        provider_id="p",
-                        provider_resource_id="emb2",
-                    ),
-                ],
-                True,
-            ),  # Multiple defaults
-            (
-                [
-                    Model(
-                        identifier="emb1",
-                        model_type=ModelType.embedding,
-                        metadata={"default_configured": True},
-                        provider_id="p",
-                        provider_resource_id="emb1",
-                    ),
-                    Model(
-                        identifier="llm1",
-                        model_type=ModelType.llm,
-                        metadata={"default_configured": True},
-                        provider_id="p",
-                        provider_resource_id="llm1",
-                    ),
-                ],
-                False,
-            ),  # Ignores non-embedding
-        ],
-    )
-    async def test_validate_default_embedding_model(self, models, should_raise):
-        """Test validation with various model configurations."""
-        mock_models_impl = AsyncMock()
-        mock_models_impl.list_models.return_value = models
-        impls = {Api.models: mock_models_impl}
+        with pytest.raises(ValueError, match="not found"):
+            await validate_vector_stores_config(run_config.vector_stores, {Api.models: mock_models})

-        if should_raise:
-            with pytest.raises(ValueError, match="Multiple embedding models marked as default_configured=True"):
-                await validate_default_embedding_model(impls)
-        else:
-            await validate_default_embedding_model(impls)
+    async def test_validate_success(self):
+        """Test validation passes with valid model."""
+        run_config = StackRunConfig(
+            image_name="test",
+            providers={},
+            storage=StorageConfig(backends={}, stores={}),
+            vector_stores=VectorStoresConfig(
+                default_provider_id="faiss",
+                default_embedding_model=QualifiedModel(
+                    provider_id="p",
+                    model_id="valid",
+                ),
+            ),
+        )
+        mock_models = AsyncMock()
+        mock_models.list_models.return_value = ListModelsResponse(
+            data=[
+                Model(
+                    identifier="p/valid",  # Must match provider_id/model_id format
+                    model_type=ModelType.embedding,
+                    metadata={"embedding_dimension": 768},
+                    provider_id="p",
+                    provider_resource_id="valid",
+                )
+            ]
+        )

-    async def test_validate_default_embedding_model_no_models_api(self):
-        """Test validation when models API is not available."""
-        await validate_default_embedding_model({})
+        await validate_vector_stores_config(run_config.vector_stores, {Api.models: mock_models})
--- a/tests/unit/providers/vector_io/conftest.py
+++ b/tests/unit/providers/vector_io/conftest.py
@ -146,7 +146,6 @@ async def sqlite_vec_adapter(sqlite_vec_db_path, unique_kvstore_config, mock_inf
        config=config,
        inference_api=mock_inference_api,
        files_api=None,
-        models_api=None,
    )
    collection_id = f"sqlite_test_collection_{np.random.randint(1e6)}"
    await adapter.initialize()
@ -185,7 +184,6 @@ async def faiss_vec_adapter(unique_kvstore_config, mock_inference_api, embedding
        config=config,
        inference_api=mock_inference_api,
        files_api=None,
-        models_api=None,
    )
    await adapter.initialize()
    await adapter.register_vector_db(
--- a/tests/unit/providers/vector_io/test_faiss.py
+++ b/tests/unit/providers/vector_io/test_faiss.py
@ -11,7 +11,6 @@ import numpy as np
 import pytest

 from llama_stack.apis.files import Files
-from llama_stack.apis.models import Models
 from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import Chunk, QueryChunksResponse
 from llama_stack.providers.datatypes import HealthStatus
@ -76,12 +75,6 @@ def mock_files_api():
    return mock_api


-@pytest.fixture
-def mock_models_api():
-    mock_api = MagicMock(spec=Models)
-    return mock_api
-
-
@pytest.fixture
 def faiss_config():
    config = MagicMock(spec=FaissVectorIOConfig)
@ -117,7 +110,7 @@ async def test_faiss_query_vector_returns_infinity_when_query_and_embedding_are_
        assert response.chunks[1] == sample_chunks[1]


-async def test_health_success(mock_models_api):
+async def test_health_success():
    """Test that the health check returns OK status when faiss is working correctly."""
    # Create a fresh instance of FaissVectorIOAdapter for testing
    config = MagicMock()
@ -126,9 +119,7 @@ async def test_health_success(mock_models_api):

    with patch("llama_stack.providers.inline.vector_io.faiss.faiss.faiss.IndexFlatL2") as mock_index_flat:
        mock_index_flat.return_value = MagicMock()
-        adapter = FaissVectorIOAdapter(
-            config=config, inference_api=inference_api, models_api=mock_models_api, files_api=files_api
-        )
+        adapter = FaissVectorIOAdapter(config=config, inference_api=inference_api, files_api=files_api)

        # Calling the health method directly
        response = await adapter.health()
@ -142,7 +133,7 @@ async def test_health_success(mock_models_api):
        mock_index_flat.assert_called_once_with(128)  # VECTOR_DIMENSION is 128


-async def test_health_failure(mock_models_api):
+async def test_health_failure():
    """Test that the health check returns ERROR status when faiss encounters an error."""
    # Create a fresh instance of FaissVectorIOAdapter for testing
    config = MagicMock()
@ -152,9 +143,7 @@ async def test_health_failure(mock_models_api):
    with patch("llama_stack.providers.inline.vector_io.faiss.faiss.faiss.IndexFlatL2") as mock_index_flat:
        mock_index_flat.side_effect = Exception("Test error")

-        adapter = FaissVectorIOAdapter(
-            config=config, inference_api=inference_api, models_api=mock_models_api, files_api=files_api
-        )
+        adapter = FaissVectorIOAdapter(config=config, inference_api=inference_api, files_api=files_api)

        # Calling the health method directly
        response = await adapter.health()
--- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
+++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
@ -6,13 +6,12 @@

 import json
 import time
-from unittest.mock import AsyncMock, Mock, patch
+from unittest.mock import AsyncMock, patch

 import numpy as np
 import pytest

 from llama_stack.apis.common.errors import VectorStoreNotFoundError
-from llama_stack.apis.models import Model, ModelType
 from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import (
    Chunk,
@ -996,96 +995,6 @@ async def test_max_concurrent_files_per_batch(vector_io_adapter):
    assert batch.file_counts.in_progress == 8


-async def test_get_default_embedding_model_success(vector_io_adapter):
-    """Test successful default embedding model detection."""
-    # Mock models API with a default model
-    mock_models_api = Mock()
-    mock_models_api.list_models = AsyncMock(
-        return_value=Mock(
-            data=[
-                Model(
-                    identifier="nomic-embed-text-v1.5",
-                    model_type=ModelType.embedding,
-                    provider_id="test-provider",
-                    metadata={
-                        "embedding_dimension": 768,
-                        "default_configured": True,
-                    },
-                )
-            ]
-        )
-    )
-
-    vector_io_adapter.models_api = mock_models_api
-    result = await vector_io_adapter._get_default_embedding_model_and_dimension()
-
-    assert result is not None
-    model_id, dimension = result
-    assert model_id == "nomic-embed-text-v1.5"
-    assert dimension == 768
-
-
-async def test_get_default_embedding_model_multiple_defaults_error(vector_io_adapter):
-    """Test error when multiple models are marked as default."""
-    mock_models_api = Mock()
-    mock_models_api.list_models = AsyncMock(
-        return_value=Mock(
-            data=[
-                Model(
-                    identifier="model1",
-                    model_type=ModelType.embedding,
-                    provider_id="test-provider",
-                    metadata={"embedding_dimension": 768, "default_configured": True},
-                ),
-                Model(
-                    identifier="model2",
-                    model_type=ModelType.embedding,
-                    provider_id="test-provider",
-                    metadata={"embedding_dimension": 512, "default_configured": True},
-                ),
-            ]
-        )
-    )
-
-    vector_io_adapter.models_api = mock_models_api
-
-    with pytest.raises(ValueError, match="Multiple embedding models marked as default_configured=True"):
-        await vector_io_adapter._get_default_embedding_model_and_dimension()
-
-
-async def test_openai_create_vector_store_uses_default_model(vector_io_adapter):
-    """Test that vector store creation uses default embedding model when none specified."""
-    # Mock models API and dependencies
-    mock_models_api = Mock()
-    mock_models_api.list_models = AsyncMock(
-        return_value=Mock(
-            data=[
-                Model(
-                    identifier="default-model",
-                    model_type=ModelType.embedding,
-                    provider_id="test-provider",
-                    metadata={"embedding_dimension": 512, "default_configured": True},
-                )
-            ]
-        )
-    )
-
-    vector_io_adapter.models_api = mock_models_api
-    vector_io_adapter.register_vector_db = AsyncMock()
-    vector_io_adapter.__provider_id__ = "test-provider"
-
-    # Create vector store without specifying embedding model
-    params = OpenAICreateVectorStoreRequestWithExtraBody(name="test-store")
-    result = await vector_io_adapter.openai_create_vector_store(params)
-
-    # Verify the vector store was created with default model
-    assert result.name == "test-store"
-    vector_io_adapter.register_vector_db.assert_called_once()
-    call_args = vector_io_adapter.register_vector_db.call_args[0][0]
-    assert call_args.embedding_model == "default-model"
-    assert call_args.embedding_dimension == 512
-
-
 async def test_embedding_config_from_metadata(vector_io_adapter):
    """Test that embedding configuration is correctly extracted from metadata."""

@ -1253,5 +1162,5 @@ async def test_embedding_config_required_model_missing(vector_io_adapter):
    # Test with no embedding model provided
    params = OpenAICreateVectorStoreRequestWithExtraBody(name="test_store", metadata={})

-    with pytest.raises(ValueError, match="embedding_model is required in extra_body when creating a vector store"):
+    with pytest.raises(ValueError, match="embedding_model is required"):
        await vector_io_adapter.openai_create_vector_store(params)