mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-15 06:00:48 +00:00
Replace MissingEmbeddingModelError with IBM Granite default
- Replace error with ibm-granite/granite-embedding-125m-english default - Based on issue #2418 for commercial compatibility and better UX - Update tests to verify default fallback behavior - Update documentation to reflect new precedence rules - Remove unused MissingEmbeddingModelError class - Update tip section to clarify fallback behavior Resolves review comment to use default instead of error.
This commit is contained in:
parent
e47c0da1fb
commit
f8946d8b9d
4 changed files with 13 additions and 16 deletions
|
@ -705,7 +705,7 @@ Precedence rules at runtime:
|
|||
|
||||
1. If `embedding_model` is explicitly passed in an API call, that value is used.
|
||||
2. Otherwise the value in `vector_store_config.default_embedding_model` is used.
|
||||
3. If neither is available the server will raise `MissingEmbeddingModelError` at store-creation time so mis-configuration is caught early.
|
||||
3. If neither is available the server will fall back to the system default (ibm-granite/granite-embedding-125m-english).
|
||||
|
||||
#### Environment variables
|
||||
|
||||
|
@ -721,4 +721,4 @@ export LLAMA_STACK_DEFAULT_EMBEDDING_MODEL="sentence-transformers/all-MiniLM-L6-
|
|||
llama stack run --config run.yaml
|
||||
```
|
||||
|
||||
> Tip: If you omit `vector_store_config` entirely you **must** either pass `embedding_model=` on every `create_vector_store` call or set `LLAMA_STACK_DEFAULT_EMBEDDING_MODEL` in the environment, otherwise the server will refuse to create a vector store.
|
||||
> Tip: If you omit `vector_store_config` entirely and don't set `LLAMA_STACK_DEFAULT_EMBEDDING_MODEL`, the system will fall back to the default `ibm-granite/granite-embedding-125m-english` model with 384 dimensions for vector store creation.
|
||||
|
|
|
@ -29,7 +29,7 @@ class VectorStoreConfig(BaseModel):
|
|||
default_embedding_model
|
||||
The model *id* the stack should use when an embedding model is
|
||||
required but not supplied by the API caller. When *None* the
|
||||
router will raise a :class:`~llama_stack.apis.common.errors.MissingEmbeddingModelError`.
|
||||
router will fall back to the system default (ibm-granite/granite-embedding-125m-english).
|
||||
default_embedding_dimension
|
||||
Optional integer hint for vector dimension. Routers/providers
|
||||
may validate that the chosen model emits vectors of this size.
|
||||
|
|
|
@ -11,7 +11,6 @@ from typing import Any
|
|||
from llama_stack.apis.common.content_types import (
|
||||
InterleavedContent,
|
||||
)
|
||||
from llama_stack.apis.common.errors import MissingEmbeddingModelError
|
||||
from llama_stack.apis.common.vector_store_config import VectorStoreConfig
|
||||
from llama_stack.apis.models import ModelType
|
||||
from llama_stack.apis.vector_io import (
|
||||
|
@ -83,7 +82,7 @@ class VectorIORouter(VectorIO):
|
|||
|
||||
1. If *explicit_model* is provided, verify dimension (if possible) and use it.
|
||||
2. Else use the global default in ``vector_store_config``.
|
||||
3. Else raise ``MissingEmbeddingModelError``.
|
||||
3. Else fallback to system default (ibm-granite/granite-embedding-125m-english).
|
||||
"""
|
||||
|
||||
# 1. explicit override
|
||||
|
@ -106,10 +105,10 @@ class VectorIORouter(VectorIO):
|
|||
if cfg.default_embedding_model is not None:
|
||||
return cfg.default_embedding_model, cfg.default_embedding_dimension or 384
|
||||
|
||||
# 3. error - no default
|
||||
raise MissingEmbeddingModelError(
|
||||
"Failed to create vector store: No embedding model provided. Set vector_store_config.default_embedding_model or supply one in the API call."
|
||||
)
|
||||
# 3. fallback to system default
|
||||
# Use IBM Granite embedding model as default for commercial compatibility
|
||||
# See: https://github.com/meta-llama/llama-stack/issues/2418
|
||||
return "ibm-granite/granite-embedding-125m-english", 384
|
||||
|
||||
async def register_vector_db(
|
||||
self,
|
||||
|
|
|
@ -5,9 +5,6 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
|
||||
import pytest
|
||||
|
||||
from llama_stack.apis.common.errors import MissingEmbeddingModelError
|
||||
from llama_stack.apis.models import ModelType
|
||||
from llama_stack.core.routers.vector_io import VectorIORouter
|
||||
|
||||
|
@ -72,10 +69,11 @@ async def test_explicit_override(monkeypatch):
|
|||
monkeypatch.delenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL", raising=False)
|
||||
|
||||
|
||||
async def test_error_when_no_default():
|
||||
"""Router should raise when neither explicit nor global default is available."""
|
||||
async def test_fallback_to_system_default():
|
||||
"""Router should use system default when neither explicit nor global default is available."""
|
||||
|
||||
router = VectorIORouter(routing_table=_DummyRoutingTable())
|
||||
|
||||
with pytest.raises(MissingEmbeddingModelError):
|
||||
await router._resolve_embedding_model(None)
|
||||
model, dimension = await router._resolve_embedding_model(None)
|
||||
assert model == "ibm-granite/granite-embedding-125m-english"
|
||||
assert dimension == 384
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue