diff --git a/docs/source/distributions/configuration.md b/docs/source/distributions/configuration.md index 2fe9d7c53..760d4eccf 100644 --- a/docs/source/distributions/configuration.md +++ b/docs/source/distributions/configuration.md @@ -705,7 +705,7 @@ Precedence rules at runtime: 1. If `embedding_model` is explicitly passed in an API call, that value is used. 2. Otherwise the value in `vector_store_config.default_embedding_model` is used. -3. If neither is available the server will raise `MissingEmbeddingModelError` at store-creation time so mis-configuration is caught early. +3. If neither is available the server will fall back to the system default (ibm-granite/granite-embedding-125m-english). #### Environment variables @@ -721,4 +721,4 @@ export LLAMA_STACK_DEFAULT_EMBEDDING_MODEL="sentence-transformers/all-MiniLM-L6- llama stack run --config run.yaml ``` -> Tip: If you omit `vector_store_config` entirely you **must** either pass `embedding_model=` on every `create_vector_store` call or set `LLAMA_STACK_DEFAULT_EMBEDDING_MODEL` in the environment, otherwise the server will refuse to create a vector store. +> Tip: If you omit `vector_store_config` entirely and don't set `LLAMA_STACK_DEFAULT_EMBEDDING_MODEL`, the system will fall back to the default `ibm-granite/granite-embedding-125m-english` model with 384 dimensions for vector store creation. diff --git a/llama_stack/apis/common/vector_store_config.py b/llama_stack/apis/common/vector_store_config.py index d0508048d..c2122e261 100644 --- a/llama_stack/apis/common/vector_store_config.py +++ b/llama_stack/apis/common/vector_store_config.py @@ -29,7 +29,7 @@ class VectorStoreConfig(BaseModel): default_embedding_model The model *id* the stack should use when an embedding model is required but not supplied by the API caller. When *None* the - router will raise a :class:`~llama_stack.apis.common.errors.MissingEmbeddingModelError`. + router will fall back to the system default (ibm-granite/granite-embedding-125m-english). default_embedding_dimension Optional integer hint for vector dimension. Routers/providers may validate that the chosen model emits vectors of this size. diff --git a/llama_stack/core/routers/vector_io.py b/llama_stack/core/routers/vector_io.py index bde200c34..ff9a2f9ea 100644 --- a/llama_stack/core/routers/vector_io.py +++ b/llama_stack/core/routers/vector_io.py @@ -11,7 +11,6 @@ from typing import Any from llama_stack.apis.common.content_types import ( InterleavedContent, ) -from llama_stack.apis.common.errors import MissingEmbeddingModelError from llama_stack.apis.common.vector_store_config import VectorStoreConfig from llama_stack.apis.models import ModelType from llama_stack.apis.vector_io import ( @@ -83,7 +82,7 @@ class VectorIORouter(VectorIO): 1. If *explicit_model* is provided, verify dimension (if possible) and use it. 2. Else use the global default in ``vector_store_config``. - 3. Else raise ``MissingEmbeddingModelError``. + 3. Else fallback to system default (ibm-granite/granite-embedding-125m-english). """ # 1. explicit override @@ -106,10 +105,10 @@ class VectorIORouter(VectorIO): if cfg.default_embedding_model is not None: return cfg.default_embedding_model, cfg.default_embedding_dimension or 384 - # 3. error - no default - raise MissingEmbeddingModelError( - "Failed to create vector store: No embedding model provided. Set vector_store_config.default_embedding_model or supply one in the API call." - ) + # 3. fallback to system default + # Use IBM Granite embedding model as default for commercial compatibility + # See: https://github.com/meta-llama/llama-stack/issues/2418 + return "ibm-granite/granite-embedding-125m-english", 384 async def register_vector_db( self, diff --git a/tests/unit/router/test_embedding_precedence.py b/tests/unit/router/test_embedding_precedence.py index fa255420a..2366eba55 100644 --- a/tests/unit/router/test_embedding_precedence.py +++ b/tests/unit/router/test_embedding_precedence.py @@ -5,9 +5,6 @@ # the root directory of this source tree. -import pytest - -from llama_stack.apis.common.errors import MissingEmbeddingModelError from llama_stack.apis.models import ModelType from llama_stack.core.routers.vector_io import VectorIORouter @@ -72,10 +69,11 @@ async def test_explicit_override(monkeypatch): monkeypatch.delenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL", raising=False) -async def test_error_when_no_default(): - """Router should raise when neither explicit nor global default is available.""" +async def test_fallback_to_system_default(): + """Router should use system default when neither explicit nor global default is available.""" router = VectorIORouter(routing_table=_DummyRoutingTable()) - with pytest.raises(MissingEmbeddingModelError): - await router._resolve_embedding_model(None) + model, dimension = await router._resolve_embedding_model(None) + assert model == "ibm-granite/granite-embedding-125m-english" + assert dimension == 384