diff --git a/docs/source/distributions/configuration.md b/docs/source/distributions/configuration.md
index 335fa3a68..62f11cc30 100644
--- a/docs/source/distributions/configuration.md
+++ b/docs/source/distributions/configuration.md
@@ -687,3 +687,51 @@ shields:
   provider_shield_id: null
 ...
 ```
+
+## Global Vector Store Defaults
+
+You can provide a stack-level default embedding model that will be used whenever a new vector store is created and the caller does not specify an `embedding_model` parameter.
+
+Add a top-level `vector_store_config` block at the root of your build/run YAML, alongside other root-level keys such as `models`, `shields`, `server`, and `metadata_store`:
+
+```yaml
+# ... other configuration sections ...
+metadata_store:
+  namespace: null
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/registry.db
+models:
+- metadata: {}
+  model_id: ${env.INFERENCE_MODEL}
+  provider_id: ollama
+  provider_model_id: null
+shields: []
+server:
+  port: 8321
+vector_store_config:
+  default_embedding_model: ${env.LLAMA_STACK_DEFAULT_EMBEDDING_MODEL:=all-MiniLM-L6-v2}
+  # optional - if omitted, defaults to 384
+  default_embedding_dimension: ${env.LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION:=384}
+```
+
+Precedence rules at runtime:
+
+1. If `embedding_model` is explicitly passed in an API call, that value is used.
+2. Otherwise the value in `vector_store_config.default_embedding_model` is used.
+3. If neither is available the server will fall back to the system default (all-MiniLM-L6-v2).
+
+#### Environment variables
+
+| Variable | Purpose | Example |
+|----------|---------|---------|
+| `LLAMA_STACK_DEFAULT_EMBEDDING_MODEL` | Global default embedding model id | `all-MiniLM-L6-v2` |
+| `LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION` | Dimension for embeddings (optional, defaults to 384) | `384` |
+
+If you include the `${env.…}` placeholder in `vector_store_config`, deployments can override the default without editing YAML:
+
+```bash
+export LLAMA_STACK_DEFAULT_EMBEDDING_MODEL="sentence-transformers/all-MiniLM-L6-v2"
+llama stack run --config run.yaml
+```
+
+> Tip: If you omit `vector_store_config` entirely and don't set `LLAMA_STACK_DEFAULT_EMBEDDING_MODEL`, the system will fall back to the default `all-MiniLM-L6-v2` model with 384 dimensions for vector store creation.
diff --git a/llama_stack/apis/common/vector_store_config.py b/llama_stack/apis/common/vector_store_config.py
new file mode 100644
index 000000000..c3233685c
--- /dev/null
+++ b/llama_stack/apis/common/vector_store_config.py
@@ -0,0 +1,33 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from __future__ import annotations
+
+"""Vector store global config stuff.
+
+Basically just holds default embedding model settings so we don't have to
+pass them around everywhere. Router picks these up when client doesn't specify.
+"""
+
+import os
+
+from pydantic import BaseModel, ConfigDict, Field
+
+__all__ = ["VectorStoreConfig"]
+
+
+class VectorStoreConfig(BaseModel):
+    """Default embedding model config that gets picked up from env vars."""
+
+    default_embedding_model: str | None = Field(
+        default_factory=lambda: os.getenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL")
+    )
+    # dimension from env - fallback to None if not set or invalid
+    default_embedding_dimension: int | None = Field(
+        default_factory=lambda: int(os.getenv("LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION", 0)) or None, ge=1
+    )
+
+    model_config = ConfigDict(frozen=True)
diff --git a/llama_stack/core/datatypes.py b/llama_stack/core/datatypes.py
index a1b6ad32b..34e4b331d 100644
--- a/llama_stack/core/datatypes.py
+++ b/llama_stack/core/datatypes.py
@@ -11,6 +11,7 @@ from typing import Annotated, Any, Literal, Self
 from pydantic import BaseModel, Field, field_validator, model_validator
 
 from llama_stack.apis.benchmarks import Benchmark, BenchmarkInput
+from llama_stack.apis.common.vector_store_config import VectorStoreConfig
 from llama_stack.apis.datasetio import DatasetIO
 from llama_stack.apis.datasets import Dataset, DatasetInput
 from llama_stack.apis.eval import Eval
@@ -391,6 +392,12 @@ Configuration for the persistence store used by the inference API. If not specif
 a default SQLite store will be used.""",
     )
 
+    # Global vector-store defaults (embedding model etc.)
+    vector_store_config: VectorStoreConfig = Field(
+        default_factory=VectorStoreConfig,
+        description="Global defaults for vector-store creation (embedding model, dimension, …)",
+    )
+
     # registry of "resources" in the distribution
     models: list[ModelInput] = Field(default_factory=list)
     shields: list[ShieldInput] = Field(default_factory=list)
diff --git a/llama_stack/core/routers/vector_io.py b/llama_stack/core/routers/vector_io.py
index 3d0996c49..e48c14e0e 100644
--- a/llama_stack/core/routers/vector_io.py
+++ b/llama_stack/core/routers/vector_io.py
@@ -11,6 +11,7 @@ from typing import Any
 from llama_stack.apis.common.content_types import (
     InterleavedContent,
 )
+from llama_stack.apis.common.vector_store_config import VectorStoreConfig
 from llama_stack.apis.models import ModelType
 from llama_stack.apis.vector_io import (
     Chunk,
@@ -76,6 +77,30 @@ class VectorIORouter(VectorIO):
             logger.error(f"Error getting embedding models: {e}")
             return None
 
+    async def _resolve_embedding_model(self, explicit_model: str | None = None) -> tuple[str, int]:
+        """Figure out which embedding model to use and what dimension it has."""
+
+        # if they passed a model explicitly, use that
+        if explicit_model is not None:
+            # try to look up dimension from our routing table
+            models = await self.routing_table.get_all_with_type("model")
+            for model in models:
+                if getattr(model, "identifier", None) == explicit_model:
+                    dim = model.metadata.get("embedding_dimension")
+                    if dim is None:
+                        raise ValueError(f"Model {explicit_model} found but no embedding dimension in metadata")
+                    return explicit_model, dim
+            # model not in our registry, let caller deal with dimension
+            return explicit_model, None  # type: ignore
+
+        # check if we have global defaults set via env vars
+        config = VectorStoreConfig()
+        if config.default_embedding_model is not None:
+            return config.default_embedding_model, config.default_embedding_dimension or 384
+
+        # fallback to existing default model for compatibility
+        return "all-MiniLM-L6-v2", 384
+
     async def register_vector_db(
         self,
         vector_db_id: str,
@@ -102,7 +127,7 @@ class VectorIORouter(VectorIO):
         ttl_seconds: int | None = None,
     ) -> None:
         logger.debug(
-            f"VectorIORouter.insert_chunks: {vector_db_id}, {len(chunks)} chunks, ttl_seconds={ttl_seconds}, chunk_ids={[chunk.metadata['document_id'] for chunk in chunks[:3]]}{' and more...' if len(chunks) > 3 else ''}",
+            f"VectorIORouter.insert_chunks: {vector_db_id}, {len(chunks)} chunks, ttl_seconds={ttl_seconds}, chunk_ids={[chunk.chunk_id for chunk in chunks[:3]]}{' and more...' if len(chunks) > 3 else ''}",
         )
         provider = await self.routing_table.get_provider_impl(vector_db_id)
         return await provider.insert_chunks(vector_db_id, chunks, ttl_seconds)
@@ -131,13 +156,8 @@ class VectorIORouter(VectorIO):
     ) -> VectorStoreObject:
         logger.debug(f"VectorIORouter.openai_create_vector_store: name={name}, provider_id={provider_id}")
 
-        # If no embedding model is provided, use the first available one
-        if embedding_model is None:
-            embedding_model_info = await self._get_first_embedding_model()
-            if embedding_model_info is None:
-                raise ValueError("No embedding model provided and no embedding models available in the system")
-            embedding_model, embedding_dimension = embedding_model_info
-            logger.info(f"No embedding model specified, using first available: {embedding_model}")
+        # Determine which embedding model to use based on new precedence
+        embedding_model, embedding_dimension = await self._resolve_embedding_model(embedding_model)
 
         vector_db_id = f"vs_{uuid.uuid4()}"
         registered_vector_db = await self.routing_table.register_vector_db(
diff --git a/llama_stack/distributions/watsonx/build.yaml b/llama_stack/distributions/watsonx/build.yaml
index bf4be7eaf..3db9a1f38 100644
--- a/llama_stack/distributions/watsonx/build.yaml
+++ b/llama_stack/distributions/watsonx/build.yaml
@@ -39,6 +39,9 @@ distribution_spec:
     - provider_type: remote::tavily-search
     - provider_type: inline::rag-runtime
     - provider_type: remote::model-context-protocol
+vector_store_config:
+  default_embedding_model: ${env.LLAMA_STACK_DEFAULT_EMBEDDING_MODEL:=all-MiniLM-L6-v2}
+  default_embedding_dimension: ${env.LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION:=384}
 image_type: venv
 additional_pip_packages:
 - sqlalchemy[asyncio]
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index 234d762ce..2bc3373a5 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -63,6 +63,19 @@ def pytest_configure(config):
         os.environ["DISABLE_CODE_SANDBOX"] = "1"
         logger.info("Setting DISABLE_CODE_SANDBOX=1 for macOS")
 
+    # After processing CLI --env overrides, ensure global default embedding model is set for vector-store operations
+    embedding_model_opt = config.getoption("--embedding-model") or "sentence-transformers/all-MiniLM-L6-v2"
+    if embedding_model_opt and not os.getenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL"):
+        # Use first value in comma-separated list (if any)
+        default_model = embedding_model_opt.split(",")[0].strip()
+        os.environ["LLAMA_STACK_DEFAULT_EMBEDDING_MODEL"] = default_model
+        logger.info(f"Setting LLAMA_STACK_DEFAULT_EMBEDDING_MODEL={default_model}")
+
+    embedding_dim_opt = config.getoption("--embedding-dimension") or 384
+    if not os.getenv("LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION") and embedding_dim_opt:
+        os.environ["LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION"] = str(embedding_dim_opt)
+        logger.info(f"Setting LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION={embedding_dim_opt}")
+
 
 def pytest_addoption(parser):
     parser.addoption(
diff --git a/tests/unit/common/test_vector_store_config.py b/tests/unit/common/test_vector_store_config.py
new file mode 100644
index 000000000..2b45fa5b9
--- /dev/null
+++ b/tests/unit/common/test_vector_store_config.py
@@ -0,0 +1,29 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.apis.common.vector_store_config import VectorStoreConfig
+
+
+def test_defaults(monkeypatch):
+    # ensure env is clean to avoid flaky defaults
+    monkeypatch.delenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL", raising=False)
+    monkeypatch.delenv("LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION", raising=False)
+    config = VectorStoreConfig()
+    assert config.default_embedding_model is None
+    assert config.default_embedding_dimension is None
+
+
+def test_env_loading(monkeypatch):
+    monkeypatch.setenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL", "test-model")
+    monkeypatch.setenv("LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION", "123")
+
+    config = VectorStoreConfig()
+    assert config.default_embedding_model == "test-model"
+    assert config.default_embedding_dimension == 123
+
+    # cleanup
+    monkeypatch.delenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL", raising=False)
+    monkeypatch.delenv("LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION", raising=False)
diff --git a/tests/unit/router/test_embedding_precedence.py b/tests/unit/router/test_embedding_precedence.py
new file mode 100644
index 000000000..5f8d81e05
--- /dev/null
+++ b/tests/unit/router/test_embedding_precedence.py
@@ -0,0 +1,82 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+
+import pytest
+
+from llama_stack.apis.models import ModelType
+from llama_stack.core.routers.vector_io import VectorIORouter
+
+pytestmark = pytest.mark.asyncio
+
+
+class _DummyModel:
+    def __init__(self, identifier: str, dim: int):
+        self.identifier = identifier
+        self.model_type = ModelType.embedding
+        self.metadata = {"embedding_dimension": dim}
+
+
+class _DummyRoutingTable:
+    """Just a fake routing table for testing."""
+
+    def __init__(self):
+        self._models = [
+            _DummyModel("first-model", 123),
+            _DummyModel("second-model", 512),
+        ]
+
+    async def get_all_with_type(self, _type: str):
+        # just return embedding models for tests
+        return self._models
+
+    # VectorIORouter needs these but we don't use them in tests
+    async def register_vector_db(self, *_args, **_kwargs):
+        raise NotImplementedError
+
+    async def get_provider_impl(self, *_args, **_kwargs):
+        raise NotImplementedError
+
+
+async def test_global_default_used(monkeypatch):
+    """Should use env var defaults when no explicit model given."""
+
+    monkeypatch.setenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL", "env-default-model")
+    monkeypatch.setenv("LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION", "256")
+
+    router = VectorIORouter(routing_table=_DummyRoutingTable())
+
+    model, dim = await router._resolve_embedding_model(None)
+    assert model == "env-default-model"
+    assert dim == 256
+
+    # cleanup
+    monkeypatch.delenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL", raising=False)
+    monkeypatch.delenv("LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION", raising=False)
+
+
+async def test_explicit_override(monkeypatch):
+    """Explicit model should win over env defaults."""
+
+    monkeypatch.setenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL", "env-default-model")
+
+    router = VectorIORouter(routing_table=_DummyRoutingTable())
+
+    model, dim = await router._resolve_embedding_model("first-model")
+    assert model == "first-model"
+    assert dim == 123
+
+    monkeypatch.delenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL", raising=False)
+
+
+async def test_fallback_to_default():
+    """Should fallback to all-MiniLM-L6-v2 when no defaults set."""
+
+    router = VectorIORouter(routing_table=_DummyRoutingTable())
+
+    model, dim = await router._resolve_embedding_model(None)
+    assert model == "all-MiniLM-L6-v2"
+    assert dim == 384