From 32930868de21f5f2ef9608d03d3923df8fd9acd5 Mon Sep 17 00:00:00 2001 From: skamenan7 Date: Thu, 18 Sep 2025 10:46:53 -0400 Subject: [PATCH] tightened vector store embedding model validation includes: - require models to exist in registry before use - make default_embedding_dimension mandatory when setting default model - use first available model fallback instead of hardcoded all-MiniLM-L6-v2 - add tests for error cases and update docs --- docs/source/distributions/configuration.md | 12 +++---- llama_stack/core/routers/vector_io.py | 21 ++++++++--- .../unit/router/test_embedding_precedence.py | 36 +++++++++++++++++-- 3 files changed, 55 insertions(+), 14 deletions(-) diff --git a/docs/source/distributions/configuration.md b/docs/source/distributions/configuration.md index 29015ae53..ac030980c 100644 --- a/docs/source/distributions/configuration.md +++ b/docs/source/distributions/configuration.md @@ -823,22 +823,22 @@ server: port: 8321 vector_store_config: default_embedding_model: ${env.LLAMA_STACK_DEFAULT_EMBEDDING_MODEL:=all-MiniLM-L6-v2} - # optional - if omitted, defaults to 384 + # required when default_embedding_model is set default_embedding_dimension: ${env.LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION:=384} ``` Precedence rules at runtime: -1. If `embedding_model` is explicitly passed in an API call, that value is used. -2. Otherwise the value in `vector_store_config.default_embedding_model` is used. -3. If neither is available the server will fall back to the system default (all-MiniLM-L6-v2). +1. If `embedding_model` is explicitly passed in an API call, that value is used (model must be registered in the stack). +2. Otherwise the value in `vector_store_config.default_embedding_model` is used (requires `default_embedding_dimension` to be set). +3. If neither is available, the server will fall back to the first available embedding model in the registry. #### Environment variables | Variable | Purpose | Example | |----------|---------|---------| | `LLAMA_STACK_DEFAULT_EMBEDDING_MODEL` | Global default embedding model id | `all-MiniLM-L6-v2` | -| `LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION` | Dimension for embeddings (optional, defaults to 384) | `384` | +| `LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION` | Dimension for embeddings (required when model is set) | `384` | If you include the `${env.…}` placeholder in `vector_store_config`, deployments can override the default without editing YAML: @@ -847,4 +847,4 @@ export LLAMA_STACK_DEFAULT_EMBEDDING_MODEL="sentence-transformers/all-MiniLM-L6- llama stack run --config run.yaml ``` -> Tip: If you omit `vector_store_config` entirely and don't set `LLAMA_STACK_DEFAULT_EMBEDDING_MODEL`, the system will fall back to the default `all-MiniLM-L6-v2` model with 384 dimensions for vector store creation. +> Tip: If you omit `vector_store_config` entirely and don't set `LLAMA_STACK_DEFAULT_EMBEDDING_MODEL`, the system will fall back to using the first available embedding model in the registry for vector store creation. diff --git a/llama_stack/core/routers/vector_io.py b/llama_stack/core/routers/vector_io.py index 0ff814527..064490833 100644 --- a/llama_stack/core/routers/vector_io.py +++ b/llama_stack/core/routers/vector_io.py @@ -90,16 +90,27 @@ class VectorIORouter(VectorIO): if dim is None: raise ValueError(f"Model {explicit_model} found but no embedding dimension in metadata") return explicit_model, dim - # model not in our registry, let caller deal with dimension - return explicit_model, None # type: ignore + # model not found in registry - this is an error + raise ValueError(f"Embedding model '{explicit_model}' not found in model registry") # check if we have global defaults set via env vars config = VectorStoreConfig() if config.default_embedding_model is not None: - return config.default_embedding_model, config.default_embedding_dimension or 384 + if config.default_embedding_dimension is None: + raise ValueError( + f"default_embedding_model '{config.default_embedding_model}' is set but default_embedding_dimension is missing" + ) + return config.default_embedding_model, config.default_embedding_dimension - # fallback to existing default model for compatibility - return "all-MiniLM-L6-v2", 384 + # fallback to first available embedding model for compatibility + fallback = await self._get_first_embedding_model() + if fallback is not None: + return fallback + + # if no models available, raise error + raise ValueError( + "No embedding model specified and no default configured. Either provide an embedding_model parameter or set vector_store_config.default_embedding_model" + ) async def register_vector_db( self, diff --git a/tests/unit/router/test_embedding_precedence.py b/tests/unit/router/test_embedding_precedence.py index 5f8d81e05..70c7c8a47 100644 --- a/tests/unit/router/test_embedding_precedence.py +++ b/tests/unit/router/test_embedding_precedence.py @@ -73,10 +73,40 @@ async def test_explicit_override(monkeypatch): async def test_fallback_to_default(): - """Should fallback to all-MiniLM-L6-v2 when no defaults set.""" + """Should fallback to first available embedding model when no defaults set.""" router = VectorIORouter(routing_table=_DummyRoutingTable()) model, dim = await router._resolve_embedding_model(None) - assert model == "all-MiniLM-L6-v2" - assert dim == 384 + assert model == "first-model" + assert dim == 123 + + +async def test_missing_dimension_requirement(monkeypatch): + monkeypatch.setenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL", "some-model") + + router = VectorIORouter(routing_table=_DummyRoutingTable()) + + with pytest.raises(ValueError, match="default_embedding_model.*is set but default_embedding_dimension is missing"): + await router._resolve_embedding_model(None) + + monkeypatch.delenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL", raising=False) + + +async def test_unregistered_model_error(): + router = VectorIORouter(routing_table=_DummyRoutingTable()) + + with pytest.raises(ValueError, match="Embedding model 'unknown-model' not found in model registry"): + await router._resolve_embedding_model("unknown-model") + + +class _EmptyRoutingTable: + async def get_all_with_type(self, _type: str): + return [] + + +async def test_no_models_available_error(): + router = VectorIORouter(routing_table=_EmptyRoutingTable()) + + with pytest.raises(ValueError, match="No embedding model specified and no default configured"): + await router._resolve_embedding_model(None)