From 32930868de21f5f2ef9608d03d3923df8fd9acd5 Mon Sep 17 00:00:00 2001
From: skamenan7 <skamenan@redhat.com>
Date: Thu, 18 Sep 2025 10:46:53 -0400
Subject: [PATCH] tightened vector store embedding model validation

includes:
- require models to exist in registry before use
- make default_embedding_dimension mandatory when setting default model
- use first available model fallback instead of hardcoded all-MiniLM-L6-v2
- add tests for error cases and update docs
---
 docs/source/distributions/configuration.md    | 12 +++----
 llama_stack/core/routers/vector_io.py         | 21 ++++++++---
 .../unit/router/test_embedding_precedence.py  | 36 +++++++++++++++++--
 3 files changed, 55 insertions(+), 14 deletions(-)

diff --git a/docs/source/distributions/configuration.md b/docs/source/distributions/configuration.md
index 29015ae53..ac030980c 100644
--- a/docs/source/distributions/configuration.md
+++ b/docs/source/distributions/configuration.md
@@ -823,22 +823,22 @@ server:
   port: 8321
 vector_store_config:
   default_embedding_model: ${env.LLAMA_STACK_DEFAULT_EMBEDDING_MODEL:=all-MiniLM-L6-v2}
-  # optional - if omitted, defaults to 384
+  # required when default_embedding_model is set
   default_embedding_dimension: ${env.LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION:=384}
 ```
 
 Precedence rules at runtime:
 
-1. If `embedding_model` is explicitly passed in an API call, that value is used.
-2. Otherwise the value in `vector_store_config.default_embedding_model` is used.
-3. If neither is available the server will fall back to the system default (all-MiniLM-L6-v2).
+1. If `embedding_model` is explicitly passed in an API call, that value is used (model must be registered in the stack).
+2. Otherwise the value in `vector_store_config.default_embedding_model` is used (requires `default_embedding_dimension` to be set).
+3. If neither is available, the server will fall back to the first available embedding model in the registry.
 
 #### Environment variables
 
 | Variable | Purpose | Example |
 |----------|---------|---------|
 | `LLAMA_STACK_DEFAULT_EMBEDDING_MODEL` | Global default embedding model id | `all-MiniLM-L6-v2` |
-| `LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION` | Dimension for embeddings (optional, defaults to 384) | `384` |
+| `LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION` | Dimension for embeddings (required when model is set) | `384` |
 
 If you include the `${env.…}` placeholder in `vector_store_config`, deployments can override the default without editing YAML:
 
@@ -847,4 +847,4 @@ export LLAMA_STACK_DEFAULT_EMBEDDING_MODEL="sentence-transformers/all-MiniLM-L6-
 llama stack run --config run.yaml
 ```
 
-> Tip: If you omit `vector_store_config` entirely and don't set `LLAMA_STACK_DEFAULT_EMBEDDING_MODEL`, the system will fall back to the default `all-MiniLM-L6-v2` model with 384 dimensions for vector store creation.
+> Tip: If you omit `vector_store_config` entirely and don't set `LLAMA_STACK_DEFAULT_EMBEDDING_MODEL`, the system will fall back to using the first available embedding model in the registry for vector store creation.
diff --git a/llama_stack/core/routers/vector_io.py b/llama_stack/core/routers/vector_io.py
index 0ff814527..064490833 100644
--- a/llama_stack/core/routers/vector_io.py
+++ b/llama_stack/core/routers/vector_io.py
@@ -90,16 +90,27 @@ class VectorIORouter(VectorIO):
                     if dim is None:
                         raise ValueError(f"Model {explicit_model} found but no embedding dimension in metadata")
                     return explicit_model, dim
-            # model not in our registry, let caller deal with dimension
-            return explicit_model, None  # type: ignore
+            # model not found in registry - this is an error
+            raise ValueError(f"Embedding model '{explicit_model}' not found in model registry")
 
         # check if we have global defaults set via env vars
         config = VectorStoreConfig()
         if config.default_embedding_model is not None:
-            return config.default_embedding_model, config.default_embedding_dimension or 384
+            if config.default_embedding_dimension is None:
+                raise ValueError(
+                    f"default_embedding_model '{config.default_embedding_model}' is set but default_embedding_dimension is missing"
+                )
+            return config.default_embedding_model, config.default_embedding_dimension
 
-        # fallback to existing default model for compatibility
-        return "all-MiniLM-L6-v2", 384
+        # fallback to first available embedding model for compatibility
+        fallback = await self._get_first_embedding_model()
+        if fallback is not None:
+            return fallback
+
+        # if no models available, raise error
+        raise ValueError(
+            "No embedding model specified and no default configured. Either provide an embedding_model parameter or set vector_store_config.default_embedding_model"
+        )
 
     async def register_vector_db(
         self,
diff --git a/tests/unit/router/test_embedding_precedence.py b/tests/unit/router/test_embedding_precedence.py
index 5f8d81e05..70c7c8a47 100644
--- a/tests/unit/router/test_embedding_precedence.py
+++ b/tests/unit/router/test_embedding_precedence.py
@@ -73,10 +73,40 @@ async def test_explicit_override(monkeypatch):
 
 
 async def test_fallback_to_default():
-    """Should fallback to all-MiniLM-L6-v2 when no defaults set."""
+    """Should fallback to first available embedding model when no defaults set."""
 
     router = VectorIORouter(routing_table=_DummyRoutingTable())
 
     model, dim = await router._resolve_embedding_model(None)
-    assert model == "all-MiniLM-L6-v2"
-    assert dim == 384
+    assert model == "first-model"
+    assert dim == 123
+
+
+async def test_missing_dimension_requirement(monkeypatch):
+    monkeypatch.setenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL", "some-model")
+
+    router = VectorIORouter(routing_table=_DummyRoutingTable())
+
+    with pytest.raises(ValueError, match="default_embedding_model.*is set but default_embedding_dimension is missing"):
+        await router._resolve_embedding_model(None)
+
+    monkeypatch.delenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL", raising=False)
+
+
+async def test_unregistered_model_error():
+    router = VectorIORouter(routing_table=_DummyRoutingTable())
+
+    with pytest.raises(ValueError, match="Embedding model 'unknown-model' not found in model registry"):
+        await router._resolve_embedding_model("unknown-model")
+
+
+class _EmptyRoutingTable:
+    async def get_all_with_type(self, _type: str):
+        return []
+
+
+async def test_no_models_available_error():
+    router = VectorIORouter(routing_table=_EmptyRoutingTable())
+
+    with pytest.raises(ValueError, match="No embedding model specified and no default configured"):
+        await router._resolve_embedding_model(None)