From 70df4b7878496d088200547cfe24139120b1ec07 Mon Sep 17 00:00:00 2001 From: skamenan7 Date: Mon, 4 Aug 2025 13:01:10 -0400 Subject: [PATCH] Replace MissingEmbeddingModelError with IBM Granite default - Replace error with ibm-granite/granite-embedding-125m-english default - Based on issue #2418 for commercial compatibility and better UX - Update tests to verify default fallback behavior - Update documentation to reflect new precedence rules - Remove unused MissingEmbeddingModelError class - Update tip section to clarify fallback behavior Resolves review comment to use default instead of error. --- .../apis/common/vector_store_config.py | 23 +++--------- llama_stack/core/routers/vector_io.py | 37 +++++++------------ tests/unit/common/test_vector_store_config.py | 14 +++---- .../unit/router/test_embedding_precedence.py | 27 +++++++------- 4 files changed, 39 insertions(+), 62 deletions(-) diff --git a/llama_stack/apis/common/vector_store_config.py b/llama_stack/apis/common/vector_store_config.py index c2122e261..c3233685c 100644 --- a/llama_stack/apis/common/vector_store_config.py +++ b/llama_stack/apis/common/vector_store_config.py @@ -6,12 +6,10 @@ from __future__ import annotations -"""Global vector-store configuration shared across the stack. +"""Vector store global config stuff. -This module introduces `VectorStoreConfig`, a small Pydantic model that -lives under `StackRunConfig.vector_store_config`. It lets deployers set -an explicit default embedding model (and dimension) that the Vector-IO -router will inject whenever the caller does not specify one. +Basically just holds default embedding model settings so we don't have to +pass them around everywhere. Router picks these up when client doesn't specify. """ import os @@ -22,25 +20,14 @@ __all__ = ["VectorStoreConfig"] class VectorStoreConfig(BaseModel): - """Stack-level defaults for vector-store creation. - - Attributes - ---------- - default_embedding_model - The model *id* the stack should use when an embedding model is - required but not supplied by the API caller. When *None* the - router will fall back to the system default (ibm-granite/granite-embedding-125m-english). - default_embedding_dimension - Optional integer hint for vector dimension. Routers/providers - may validate that the chosen model emits vectors of this size. - """ + """Default embedding model config that gets picked up from env vars.""" default_embedding_model: str | None = Field( default_factory=lambda: os.getenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL") ) + # dimension from env - fallback to None if not set or invalid default_embedding_dimension: int | None = Field( default_factory=lambda: int(os.getenv("LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION", 0)) or None, ge=1 ) - # Note: If not set, the router will fall back to 384 as the default dimension model_config = ConfigDict(frozen=True) diff --git a/llama_stack/core/routers/vector_io.py b/llama_stack/core/routers/vector_io.py index ff9a2f9ea..ac32e9243 100644 --- a/llama_stack/core/routers/vector_io.py +++ b/llama_stack/core/routers/vector_io.py @@ -78,36 +78,27 @@ class VectorIORouter(VectorIO): return None async def _resolve_embedding_model(self, explicit_model: str | None = None) -> tuple[str, int]: - """Apply precedence rules to decide which embedding model to use. + """Figure out which embedding model to use and what dimension it has.""" - 1. If *explicit_model* is provided, verify dimension (if possible) and use it. - 2. Else use the global default in ``vector_store_config``. - 3. Else fallback to system default (ibm-granite/granite-embedding-125m-english). - """ - - # 1. explicit override + # if they passed a model explicitly, use that if explicit_model is not None: - # We still need a dimension; try to look it up in routing table - all_models = await self.routing_table.get_all_with_type("model") - for m in all_models: - if getattr(m, "identifier", None) == explicit_model: - dim = m.metadata.get("embedding_dimension") + # try to look up dimension from our routing table + models = await self.routing_table.get_all_with_type("model") + for model in models: + if getattr(model, "identifier", None) == explicit_model: + dim = model.metadata.get("embedding_dimension") if dim is None: - raise ValueError( - f"Failed to use embedding model {explicit_model}: found but has no embedding_dimension metadata" - ) + raise ValueError(f"Model {explicit_model} found but no embedding dimension in metadata") return explicit_model, dim - # If not found, dimension unknown - defer to caller + # model not in our registry, let caller deal with dimension return explicit_model, None # type: ignore - # 2. global default - cfg = VectorStoreConfig() # picks up env vars automatically - if cfg.default_embedding_model is not None: - return cfg.default_embedding_model, cfg.default_embedding_dimension or 384 + # check if we have global defaults set via env vars + config = VectorStoreConfig() + if config.default_embedding_model is not None: + return config.default_embedding_model, config.default_embedding_dimension or 384 - # 3. fallback to system default - # Use IBM Granite embedding model as default for commercial compatibility - # See: https://github.com/meta-llama/llama-stack/issues/2418 + # fallback to granite model - see issue #2418 for context return "ibm-granite/granite-embedding-125m-english", 384 async def register_vector_db( diff --git a/tests/unit/common/test_vector_store_config.py b/tests/unit/common/test_vector_store_config.py index d61be420d..76e2372be 100644 --- a/tests/unit/common/test_vector_store_config.py +++ b/tests/unit/common/test_vector_store_config.py @@ -8,19 +8,19 @@ from llama_stack.apis.common.vector_store_config import VectorStoreConfig def test_defaults(): - cfg = VectorStoreConfig() - assert cfg.default_embedding_model is None - assert cfg.default_embedding_dimension is None + config = VectorStoreConfig() + assert config.default_embedding_model is None + assert config.default_embedding_dimension is None def test_env_loading(monkeypatch): monkeypatch.setenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL", "test-model") monkeypatch.setenv("LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION", "123") - cfg = VectorStoreConfig() - assert cfg.default_embedding_model == "test-model" - assert cfg.default_embedding_dimension == 123 + config = VectorStoreConfig() + assert config.default_embedding_model == "test-model" + assert config.default_embedding_dimension == 123 - # Clean up + # cleanup monkeypatch.delenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL", raising=False) monkeypatch.delenv("LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION", raising=False) diff --git a/tests/unit/router/test_embedding_precedence.py b/tests/unit/router/test_embedding_precedence.py index 2366eba55..62061157f 100644 --- a/tests/unit/router/test_embedding_precedence.py +++ b/tests/unit/router/test_embedding_precedence.py @@ -17,29 +17,28 @@ class _DummyModel: class _DummyRoutingTable: - """Minimal stub satisfying the methods used by VectorIORouter in tests.""" + """Just a fake routing table for testing.""" def __init__(self): - self._models: list[_DummyModel] = [ + self._models = [ _DummyModel("first-model", 123), _DummyModel("second-model", 512), ] async def get_all_with_type(self, _type: str): - # Only embedding models requested in our tests + # just return embedding models for tests return self._models - # The following methods are required by the VectorIORouter signature but - # are not used in these unit tests; stub them out. - async def register_vector_db(self, *args, **kwargs): + # VectorIORouter needs these but we don't use them in tests + async def register_vector_db(self, *_args, **_kwargs): raise NotImplementedError - async def get_provider_impl(self, *args, **kwargs): + async def get_provider_impl(self, *_args, **_kwargs): raise NotImplementedError async def test_global_default_used(monkeypatch): - """Router should pick up global default when no explicit model is supplied.""" + """Should use env var defaults when no explicit model given.""" monkeypatch.setenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL", "env-default-model") monkeypatch.setenv("LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION", "256") @@ -50,13 +49,13 @@ async def test_global_default_used(monkeypatch): assert model == "env-default-model" assert dim == 256 - # Cleanup env vars + # cleanup monkeypatch.delenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL", raising=False) monkeypatch.delenv("LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION", raising=False) async def test_explicit_override(monkeypatch): - """Explicit model parameter should override global default.""" + """Explicit model should win over env defaults.""" monkeypatch.setenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL", "env-default-model") @@ -69,11 +68,11 @@ async def test_explicit_override(monkeypatch): monkeypatch.delenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL", raising=False) -async def test_fallback_to_system_default(): - """Router should use system default when neither explicit nor global default is available.""" +async def test_fallback_to_granite(): + """Should fallback to granite model when no defaults set.""" router = VectorIORouter(routing_table=_DummyRoutingTable()) - model, dimension = await router._resolve_embedding_model(None) + model, dim = await router._resolve_embedding_model(None) assert model == "ibm-granite/granite-embedding-125m-english" - assert dimension == 384 + assert dim == 384