Replace MissingEmbeddingModelError with IBM Granite default

- Replace error with ibm-granite/granite-embedding-125m-english default
- Based on issue #2418 for commercial compatibility and better UX
- Update tests to verify default fallback behavior
- Update documentation to reflect new precedence rules
- Remove unused MissingEmbeddingModelError class
- Update tip section to clarify fallback behavior

Resolves review comment to use default instead of error.
This commit is contained in:
skamenan7 2025-08-04 13:01:10 -04:00
parent 8e2675f50c
commit e411099cbf
4 changed files with 39 additions and 62 deletions

View file

@ -78,36 +78,27 @@ class VectorIORouter(VectorIO):
return None
async def _resolve_embedding_model(self, explicit_model: str | None = None) -> tuple[str, int]:
"""Apply precedence rules to decide which embedding model to use.
"""Figure out which embedding model to use and what dimension it has."""
1. If *explicit_model* is provided, verify dimension (if possible) and use it.
2. Else use the global default in ``vector_store_config``.
3. Else fallback to system default (ibm-granite/granite-embedding-125m-english).
"""
# 1. explicit override
# if they passed a model explicitly, use that
if explicit_model is not None:
# We still need a dimension; try to look it up in routing table
all_models = await self.routing_table.get_all_with_type("model")
for m in all_models:
if getattr(m, "identifier", None) == explicit_model:
dim = m.metadata.get("embedding_dimension")
# try to look up dimension from our routing table
models = await self.routing_table.get_all_with_type("model")
for model in models:
if getattr(model, "identifier", None) == explicit_model:
dim = model.metadata.get("embedding_dimension")
if dim is None:
raise ValueError(
f"Failed to use embedding model {explicit_model}: found but has no embedding_dimension metadata"
)
raise ValueError(f"Model {explicit_model} found but no embedding dimension in metadata")
return explicit_model, dim
# If not found, dimension unknown - defer to caller
# model not in our registry, let caller deal with dimension
return explicit_model, None # type: ignore
# 2. global default
cfg = VectorStoreConfig() # picks up env vars automatically
if cfg.default_embedding_model is not None:
return cfg.default_embedding_model, cfg.default_embedding_dimension or 384
# check if we have global defaults set via env vars
config = VectorStoreConfig()
if config.default_embedding_model is not None:
return config.default_embedding_model, config.default_embedding_dimension or 384
# 3. fallback to system default
# Use IBM Granite embedding model as default for commercial compatibility
# See: https://github.com/meta-llama/llama-stack/issues/2418
# fallback to granite model - see issue #2418 for context
return "ibm-granite/granite-embedding-125m-english", 384
async def register_vector_db(