diff --git a/docs/source/distributions/configuration.md b/docs/source/distributions/configuration.md index b7d910869..2fe9d7c53 100644 --- a/docs/source/distributions/configuration.md +++ b/docs/source/distributions/configuration.md @@ -705,7 +705,7 @@ Precedence rules at runtime: 1. If `embedding_model` is explicitly passed in an API call, that value is used. 2. Otherwise the value in `vector_store_config.default_embedding_model` is used. -3. If neither is available the server will raise **MissingEmbeddingModelError** at store-creation time so mis-configuration is caught early. +3. If neither is available the server will raise `MissingEmbeddingModelError` at store-creation time so mis-configuration is caught early. #### Environment variables diff --git a/llama_stack/apis/common/vector_store_config.py b/llama_stack/apis/common/vector_store_config.py index 2c396077a..d0508048d 100644 --- a/llama_stack/apis/common/vector_store_config.py +++ b/llama_stack/apis/common/vector_store_config.py @@ -29,7 +29,7 @@ class VectorStoreConfig(BaseModel): default_embedding_model The model *id* the stack should use when an embedding model is required but not supplied by the API caller. When *None* the - router will raise a :class:`~llama_stack.errors.MissingEmbeddingModelError`. + router will raise a :class:`~llama_stack.apis.common.errors.MissingEmbeddingModelError`. default_embedding_dimension Optional integer hint for vector dimension. Routers/providers may validate that the chosen model emits vectors of this size. diff --git a/llama_stack/core/routers/vector_io.py b/llama_stack/core/routers/vector_io.py index a2f74ba36..bde200c34 100644 --- a/llama_stack/core/routers/vector_io.py +++ b/llama_stack/core/routers/vector_io.py @@ -11,6 +11,7 @@ from typing import Any from llama_stack.apis.common.content_types import ( InterleavedContent, ) +from llama_stack.apis.common.errors import MissingEmbeddingModelError from llama_stack.apis.common.vector_store_config import VectorStoreConfig from llama_stack.apis.models import ModelType from llama_stack.apis.vector_io import ( @@ -106,9 +107,6 @@ class VectorIORouter(VectorIO): return cfg.default_embedding_model, cfg.default_embedding_dimension or 384 # 3. error - no default - class MissingEmbeddingModelError(RuntimeError): - pass - raise MissingEmbeddingModelError( "Failed to create vector store: No embedding model provided. Set vector_store_config.default_embedding_model or supply one in the API call." ) diff --git a/tests/unit/router/test_embedding_precedence.py b/tests/unit/router/test_embedding_precedence.py index 20d26161c..6610ffcbc 100644 --- a/tests/unit/router/test_embedding_precedence.py +++ b/tests/unit/router/test_embedding_precedence.py @@ -7,6 +7,7 @@ import pytest +from llama_stack.apis.common.errors import MissingEmbeddingModelError from llama_stack.apis.models import ModelType from llama_stack.distribution.routers.vector_io import VectorIORouter @@ -76,5 +77,5 @@ async def test_error_when_no_default(): router = VectorIORouter(routing_table=_DummyRoutingTable()) - with pytest.raises(RuntimeError): + with pytest.raises(MissingEmbeddingModelError): await router._resolve_embedding_model(None)