Replace MissingEmbeddingModelError with IBM Granite default

- Replace error with ibm-granite/granite-embedding-125m-english default - Based on issue #2418 for commercial compatibility and better UX - Update tests to verify default fallback behavior - Update documentation to reflect new precedence rules - Remove unused MissingEmbeddingModelError class - Update tip section to clarify fallback behavior Resolves review comment to use default instead of error.
2025-12-08 11:07:22 +00:00 · 2025-08-04 13:01:10 -04:00 · 2025-08-04 13:01:10 -04:00 · e411099cbf
commit e411099cbf
parent 8e2675f50c
4 changed files with 39 additions and 62 deletions
--- a/llama_stack/core/routers/vector_io.py
+++ b/llama_stack/core/routers/vector_io.py
@ -78,36 +78,27 @@ class VectorIORouter(VectorIO):
            return None

    async def _resolve_embedding_model(self, explicit_model: str | None = None) -> tuple[str, int]:
-        """Apply precedence rules to decide which embedding model to use.
+        """Figure out which embedding model to use and what dimension it has."""

-        1. If *explicit_model* is provided, verify dimension (if possible) and use it.
-        2. Else use the global default in ``vector_store_config``.
-        3. Else fallback to system default (ibm-granite/granite-embedding-125m-english).
-        """
-
-        # 1. explicit override
+        # if they passed a model explicitly, use that
        if explicit_model is not None:
-            # We still need a dimension; try to look it up in routing table
-            all_models = await self.routing_table.get_all_with_type("model")
-            for m in all_models:
-                if getattr(m, "identifier", None) == explicit_model:
-                    dim = m.metadata.get("embedding_dimension")
+            # try to look up dimension from our routing table
+            models = await self.routing_table.get_all_with_type("model")
+            for model in models:
+                if getattr(model, "identifier", None) == explicit_model:
+                    dim = model.metadata.get("embedding_dimension")
                    if dim is None:
-                        raise ValueError(
-                            f"Failed to use embedding model {explicit_model}: found but has no embedding_dimension metadata"
-                        )
+                        raise ValueError(f"Model {explicit_model} found but no embedding dimension in metadata")
                    return explicit_model, dim
-            # If not found, dimension unknown - defer to caller
+            # model not in our registry, let caller deal with dimension
            return explicit_model, None  # type: ignore

-        # 2. global default
-        cfg = VectorStoreConfig()  # picks up env vars automatically
-        if cfg.default_embedding_model is not None:
-            return cfg.default_embedding_model, cfg.default_embedding_dimension or 384
+        # check if we have global defaults set via env vars
+        config = VectorStoreConfig()
+        if config.default_embedding_model is not None:
+            return config.default_embedding_model, config.default_embedding_dimension or 384

-        # 3. fallback to system default
-        # Use IBM Granite embedding model as default for commercial compatibility
-        # See: https://github.com/meta-llama/llama-stack/issues/2418
+        # fallback to granite model - see issue #2418 for context
        return "ibm-granite/granite-embedding-125m-english", 384

    async def register_vector_db(