Replace MissingEmbeddingModelError with IBM Granite default

- Replace error with ibm-granite/granite-embedding-125m-english default - Based on issue #2418 for commercial compatibility and better UX - Update tests to verify default fallback behavior - Update documentation to reflect new precedence rules - Remove unused MissingEmbeddingModelError class - Update tip section to clarify fallback behavior Resolves review comment to use default instead of error.
2025-10-06 20:44:58 +00:00 · 2025-08-04 13:01:10 -04:00 · 2025-08-04 13:01:10 -04:00 · e411099cbf
commit e411099cbf
parent 8e2675f50c
4 changed files with 39 additions and 62 deletions
--- a/llama_stack/apis/common/vector_store_config.py
+++ b/llama_stack/apis/common/vector_store_config.py
@ -6,12 +6,10 @@

 from __future__ import annotations

-"""Global vector-store configuration shared across the stack.
+"""Vector store global config stuff.

-This module introduces `VectorStoreConfig`, a small Pydantic model that
-lives under `StackRunConfig.vector_store_config`.  It lets deployers set
-an explicit default embedding model (and dimension) that the Vector-IO
-router will inject whenever the caller does not specify one.
+Basically just holds default embedding model settings so we don't have to
+pass them around everywhere. Router picks these up when client doesn't specify.
 """

 import os
@ -22,25 +20,14 @@ __all__ = ["VectorStoreConfig"]


 class VectorStoreConfig(BaseModel):
-    """Stack-level defaults for vector-store creation.
-
-    Attributes
-    ----------
-    default_embedding_model
-        The model *id* the stack should use when an embedding model is
-        required but not supplied by the API caller.  When *None* the
-        router will fall back to the system default (ibm-granite/granite-embedding-125m-english).
-    default_embedding_dimension
-        Optional integer hint for vector dimension.  Routers/providers
-        may validate that the chosen model emits vectors of this size.
-    """
+    """Default embedding model config that gets picked up from env vars."""

    default_embedding_model: str | None = Field(
        default_factory=lambda: os.getenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL")
    )
+    # dimension from env - fallback to None if not set or invalid
    default_embedding_dimension: int | None = Field(
        default_factory=lambda: int(os.getenv("LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION", 0)) or None, ge=1
    )
-    # Note: If not set, the router will fall back to 384 as the default dimension

    model_config = ConfigDict(frozen=True)