Replace MissingEmbeddingModelError with IBM Granite default

- Replace error with ibm-granite/granite-embedding-125m-english default
- Based on issue #2418 for commercial compatibility and better UX
- Update tests to verify default fallback behavior
- Update documentation to reflect new precedence rules
- Remove unused MissingEmbeddingModelError class
- Update tip section to clarify fallback behavior

Resolves review comment to use default instead of error.
This commit is contained in:
skamenan7 2025-08-04 13:01:10 -04:00
parent 8e2675f50c
commit e411099cbf
4 changed files with 39 additions and 62 deletions

View file

@ -6,12 +6,10 @@
from __future__ import annotations
"""Global vector-store configuration shared across the stack.
"""Vector store global config stuff.
This module introduces `VectorStoreConfig`, a small Pydantic model that
lives under `StackRunConfig.vector_store_config`. It lets deployers set
an explicit default embedding model (and dimension) that the Vector-IO
router will inject whenever the caller does not specify one.
Basically just holds default embedding model settings so we don't have to
pass them around everywhere. Router picks these up when client doesn't specify.
"""
import os
@ -22,25 +20,14 @@ __all__ = ["VectorStoreConfig"]
class VectorStoreConfig(BaseModel):
"""Stack-level defaults for vector-store creation.
Attributes
----------
default_embedding_model
The model *id* the stack should use when an embedding model is
required but not supplied by the API caller. When *None* the
router will fall back to the system default (ibm-granite/granite-embedding-125m-english).
default_embedding_dimension
Optional integer hint for vector dimension. Routers/providers
may validate that the chosen model emits vectors of this size.
"""
"""Default embedding model config that gets picked up from env vars."""
default_embedding_model: str | None = Field(
default_factory=lambda: os.getenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL")
)
# dimension from env - fallback to None if not set or invalid
default_embedding_dimension: int | None = Field(
default_factory=lambda: int(os.getenv("LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION", 0)) or None, ge=1
)
# Note: If not set, the router will fall back to 384 as the default dimension
model_config = ConfigDict(frozen=True)

View file

@ -78,36 +78,27 @@ class VectorIORouter(VectorIO):
return None
async def _resolve_embedding_model(self, explicit_model: str | None = None) -> tuple[str, int]:
"""Apply precedence rules to decide which embedding model to use.
"""Figure out which embedding model to use and what dimension it has."""
1. If *explicit_model* is provided, verify dimension (if possible) and use it.
2. Else use the global default in ``vector_store_config``.
3. Else fallback to system default (ibm-granite/granite-embedding-125m-english).
"""
# 1. explicit override
# if they passed a model explicitly, use that
if explicit_model is not None:
# We still need a dimension; try to look it up in routing table
all_models = await self.routing_table.get_all_with_type("model")
for m in all_models:
if getattr(m, "identifier", None) == explicit_model:
dim = m.metadata.get("embedding_dimension")
# try to look up dimension from our routing table
models = await self.routing_table.get_all_with_type("model")
for model in models:
if getattr(model, "identifier", None) == explicit_model:
dim = model.metadata.get("embedding_dimension")
if dim is None:
raise ValueError(
f"Failed to use embedding model {explicit_model}: found but has no embedding_dimension metadata"
)
raise ValueError(f"Model {explicit_model} found but no embedding dimension in metadata")
return explicit_model, dim
# If not found, dimension unknown - defer to caller
# model not in our registry, let caller deal with dimension
return explicit_model, None # type: ignore
# 2. global default
cfg = VectorStoreConfig() # picks up env vars automatically
if cfg.default_embedding_model is not None:
return cfg.default_embedding_model, cfg.default_embedding_dimension or 384
# check if we have global defaults set via env vars
config = VectorStoreConfig()
if config.default_embedding_model is not None:
return config.default_embedding_model, config.default_embedding_dimension or 384
# 3. fallback to system default
# Use IBM Granite embedding model as default for commercial compatibility
# See: https://github.com/meta-llama/llama-stack/issues/2418
# fallback to granite model - see issue #2418 for context
return "ibm-granite/granite-embedding-125m-english", 384
async def register_vector_db(