Replace MissingEmbeddingModelError with IBM Granite default

- Replace error with ibm-granite/granite-embedding-125m-english default - Based on issue #2418 for commercial compatibility and better UX - Update tests to verify default fallback behavior - Update documentation to reflect new precedence rules - Remove unused MissingEmbeddingModelError class - Update tip section to clarify fallback behavior Resolves review comment to use default instead of error.
2025-12-08 11:07:22 +00:00 · 2025-08-04 13:01:10 -04:00 · 2025-08-04 13:01:10 -04:00 · e411099cbf
commit e411099cbf
parent 8e2675f50c
4 changed files with 39 additions and 62 deletions
--- a/llama_stack/apis/common/vector_store_config.py
+++ b/llama_stack/apis/common/vector_store_config.py
@ -6,12 +6,10 @@

 from __future__ import annotations

-"""Global vector-store configuration shared across the stack.
+"""Vector store global config stuff.

-This module introduces `VectorStoreConfig`, a small Pydantic model that
-lives under `StackRunConfig.vector_store_config`.  It lets deployers set
-an explicit default embedding model (and dimension) that the Vector-IO
-router will inject whenever the caller does not specify one.
+Basically just holds default embedding model settings so we don't have to
+pass them around everywhere. Router picks these up when client doesn't specify.
 """

 import os
@ -22,25 +20,14 @@ __all__ = ["VectorStoreConfig"]


 class VectorStoreConfig(BaseModel):
-    """Stack-level defaults for vector-store creation.
-
-    Attributes
-    ----------
-    default_embedding_model
-        The model *id* the stack should use when an embedding model is
-        required but not supplied by the API caller.  When *None* the
-        router will fall back to the system default (ibm-granite/granite-embedding-125m-english).
-    default_embedding_dimension
-        Optional integer hint for vector dimension.  Routers/providers
-        may validate that the chosen model emits vectors of this size.
-    """
+    """Default embedding model config that gets picked up from env vars."""

    default_embedding_model: str | None = Field(
        default_factory=lambda: os.getenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL")
    )
+    # dimension from env - fallback to None if not set or invalid
    default_embedding_dimension: int | None = Field(
        default_factory=lambda: int(os.getenv("LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION", 0)) or None, ge=1
    )
-    # Note: If not set, the router will fall back to 384 as the default dimension

    model_config = ConfigDict(frozen=True)
--- a/llama_stack/core/routers/vector_io.py
+++ b/llama_stack/core/routers/vector_io.py
@ -78,36 +78,27 @@ class VectorIORouter(VectorIO):
            return None

    async def _resolve_embedding_model(self, explicit_model: str | None = None) -> tuple[str, int]:
-        """Apply precedence rules to decide which embedding model to use.
+        """Figure out which embedding model to use and what dimension it has."""

-        1. If *explicit_model* is provided, verify dimension (if possible) and use it.
-        2. Else use the global default in ``vector_store_config``.
-        3. Else fallback to system default (ibm-granite/granite-embedding-125m-english).
-        """
-
-        # 1. explicit override
+        # if they passed a model explicitly, use that
        if explicit_model is not None:
-            # We still need a dimension; try to look it up in routing table
-            all_models = await self.routing_table.get_all_with_type("model")
-            for m in all_models:
-                if getattr(m, "identifier", None) == explicit_model:
-                    dim = m.metadata.get("embedding_dimension")
+            # try to look up dimension from our routing table
+            models = await self.routing_table.get_all_with_type("model")
+            for model in models:
+                if getattr(model, "identifier", None) == explicit_model:
+                    dim = model.metadata.get("embedding_dimension")
                    if dim is None:
-                        raise ValueError(
-                            f"Failed to use embedding model {explicit_model}: found but has no embedding_dimension metadata"
-                        )
+                        raise ValueError(f"Model {explicit_model} found but no embedding dimension in metadata")
                    return explicit_model, dim
-            # If not found, dimension unknown - defer to caller
+            # model not in our registry, let caller deal with dimension
            return explicit_model, None  # type: ignore

-        # 2. global default
-        cfg = VectorStoreConfig()  # picks up env vars automatically
-        if cfg.default_embedding_model is not None:
-            return cfg.default_embedding_model, cfg.default_embedding_dimension or 384
+        # check if we have global defaults set via env vars
+        config = VectorStoreConfig()
+        if config.default_embedding_model is not None:
+            return config.default_embedding_model, config.default_embedding_dimension or 384

-        # 3. fallback to system default
-        # Use IBM Granite embedding model as default for commercial compatibility
-        # See: https://github.com/meta-llama/llama-stack/issues/2418
+        # fallback to granite model - see issue #2418 for context
        return "ibm-granite/granite-embedding-125m-english", 384

    async def register_vector_db(