Replace MissingEmbeddingModelError with IBM Granite default

- Replace error with ibm-granite/granite-embedding-125m-english default - Based on issue #2418 for commercial compatibility and better UX - Update tests to verify default fallback behavior - Update documentation to reflect new precedence rules - Remove unused MissingEmbeddingModelError class - Update tip section to clarify fallback behavior Resolves review comment to use default instead of error.
2025-10-05 12:21:52 +00:00 · 2025-08-04 13:01:10 -04:00 · 2025-08-04 13:01:10 -04:00 · e411099cbf
commit e411099cbf
parent 8e2675f50c
4 changed files with 39 additions and 62 deletions
--- a/tests/unit/router/test_embedding_precedence.py
+++ b/tests/unit/router/test_embedding_precedence.py
@ -17,29 +17,28 @@ class _DummyModel:


 class _DummyRoutingTable:
-    """Minimal stub satisfying the methods used by VectorIORouter in tests."""
+    """Just a fake routing table for testing."""

    def __init__(self):
-        self._models: list[_DummyModel] = [
+        self._models = [
            _DummyModel("first-model", 123),
            _DummyModel("second-model", 512),
        ]

    async def get_all_with_type(self, _type: str):
-        # Only embedding models requested in our tests
+        # just return embedding models for tests
        return self._models

-    # The following methods are required by the VectorIORouter signature but
-    # are not used in these unit tests; stub them out.
-    async def register_vector_db(self, *args, **kwargs):
+    # VectorIORouter needs these but we don't use them in tests
+    async def register_vector_db(self, *_args, **_kwargs):
        raise NotImplementedError

-    async def get_provider_impl(self, *args, **kwargs):
+    async def get_provider_impl(self, *_args, **_kwargs):
        raise NotImplementedError


 async def test_global_default_used(monkeypatch):
-    """Router should pick up global default when no explicit model is supplied."""
+    """Should use env var defaults when no explicit model given."""

    monkeypatch.setenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL", "env-default-model")
    monkeypatch.setenv("LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION", "256")
@ -50,13 +49,13 @@ async def test_global_default_used(monkeypatch):
    assert model == "env-default-model"
    assert dim == 256

-    # Cleanup env vars
+    # cleanup
    monkeypatch.delenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL", raising=False)
    monkeypatch.delenv("LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION", raising=False)


 async def test_explicit_override(monkeypatch):
-    """Explicit model parameter should override global default."""
+    """Explicit model should win over env defaults."""

    monkeypatch.setenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL", "env-default-model")

@ -69,11 +68,11 @@ async def test_explicit_override(monkeypatch):
    monkeypatch.delenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL", raising=False)


-async def test_fallback_to_system_default():
-    """Router should use system default when neither explicit nor global default is available."""
+async def test_fallback_to_granite():
+    """Should fallback to granite model when no defaults set."""

    router = VectorIORouter(routing_table=_DummyRoutingTable())

-    model, dimension = await router._resolve_embedding_model(None)
+    model, dim = await router._resolve_embedding_model(None)
    assert model == "ibm-granite/granite-embedding-125m-english"
-    assert dimension == 384
+    assert dim == 384