Replace MissingEmbeddingModelError with IBM Granite default

- Replace error with ibm-granite/granite-embedding-125m-english default - Based on issue #2418 for commercial compatibility and better UX - Update tests to verify default fallback behavior - Update documentation to reflect new precedence rules - Remove unused MissingEmbeddingModelError class - Update tip section to clarify fallback behavior Resolves review comment to use default instead of error.
2025-12-08 11:07:22 +00:00 · 2025-08-04 13:01:10 -04:00 · 2025-08-04 13:01:10 -04:00 · 8e2675f50c
commit 8e2675f50c
parent 380bd1bb7a
4 changed files with 13 additions and 16 deletions
--- a/llama_stack/core/routers/vector_io.py
+++ b/llama_stack/core/routers/vector_io.py
@ -11,7 +11,6 @@ from typing import Any
 from llama_stack.apis.common.content_types import (
    InterleavedContent,
 )
-from llama_stack.apis.common.errors import MissingEmbeddingModelError
 from llama_stack.apis.common.vector_store_config import VectorStoreConfig
 from llama_stack.apis.models import ModelType
 from llama_stack.apis.vector_io import (
@ -83,7 +82,7 @@ class VectorIORouter(VectorIO):

        1. If *explicit_model* is provided, verify dimension (if possible) and use it.
        2. Else use the global default in ``vector_store_config``.
-        3. Else raise ``MissingEmbeddingModelError``.
+        3. Else fallback to system default (ibm-granite/granite-embedding-125m-english).
        """

        # 1. explicit override
@ -106,10 +105,10 @@ class VectorIORouter(VectorIO):
        if cfg.default_embedding_model is not None:
            return cfg.default_embedding_model, cfg.default_embedding_dimension or 384

-        # 3. error - no default
-        raise MissingEmbeddingModelError(
-            "Failed to create vector store: No embedding model provided. Set vector_store_config.default_embedding_model or supply one in the API call."
-        )
+        # 3. fallback to system default
+        # Use IBM Granite embedding model as default for commercial compatibility
+        # See: https://github.com/meta-llama/llama-stack/issues/2418
+        return "ibm-granite/granite-embedding-125m-english", 384

    async def register_vector_db(
        self,