mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 04:04:14 +00:00
tightened vector store embedding model validation
includes: - require models to exist in registry before use - make default_embedding_dimension mandatory when setting default model - use first available model fallback instead of hardcoded all-MiniLM-L6-v2 - add tests for error cases and update docs
This commit is contained in:
parent
534c227058
commit
32930868de
3 changed files with 55 additions and 14 deletions
|
@ -823,22 +823,22 @@ server:
|
||||||
port: 8321
|
port: 8321
|
||||||
vector_store_config:
|
vector_store_config:
|
||||||
default_embedding_model: ${env.LLAMA_STACK_DEFAULT_EMBEDDING_MODEL:=all-MiniLM-L6-v2}
|
default_embedding_model: ${env.LLAMA_STACK_DEFAULT_EMBEDDING_MODEL:=all-MiniLM-L6-v2}
|
||||||
# optional - if omitted, defaults to 384
|
# required when default_embedding_model is set
|
||||||
default_embedding_dimension: ${env.LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION:=384}
|
default_embedding_dimension: ${env.LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION:=384}
|
||||||
```
|
```
|
||||||
|
|
||||||
Precedence rules at runtime:
|
Precedence rules at runtime:
|
||||||
|
|
||||||
1. If `embedding_model` is explicitly passed in an API call, that value is used.
|
1. If `embedding_model` is explicitly passed in an API call, that value is used (model must be registered in the stack).
|
||||||
2. Otherwise the value in `vector_store_config.default_embedding_model` is used.
|
2. Otherwise the value in `vector_store_config.default_embedding_model` is used (requires `default_embedding_dimension` to be set).
|
||||||
3. If neither is available the server will fall back to the system default (all-MiniLM-L6-v2).
|
3. If neither is available, the server will fall back to the first available embedding model in the registry.
|
||||||
|
|
||||||
#### Environment variables
|
#### Environment variables
|
||||||
|
|
||||||
| Variable | Purpose | Example |
|
| Variable | Purpose | Example |
|
||||||
|----------|---------|---------|
|
|----------|---------|---------|
|
||||||
| `LLAMA_STACK_DEFAULT_EMBEDDING_MODEL` | Global default embedding model id | `all-MiniLM-L6-v2` |
|
| `LLAMA_STACK_DEFAULT_EMBEDDING_MODEL` | Global default embedding model id | `all-MiniLM-L6-v2` |
|
||||||
| `LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION` | Dimension for embeddings (optional, defaults to 384) | `384` |
|
| `LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION` | Dimension for embeddings (required when model is set) | `384` |
|
||||||
|
|
||||||
If you include the `${env.…}` placeholder in `vector_store_config`, deployments can override the default without editing YAML:
|
If you include the `${env.…}` placeholder in `vector_store_config`, deployments can override the default without editing YAML:
|
||||||
|
|
||||||
|
@ -847,4 +847,4 @@ export LLAMA_STACK_DEFAULT_EMBEDDING_MODEL="sentence-transformers/all-MiniLM-L6-
|
||||||
llama stack run --config run.yaml
|
llama stack run --config run.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
> Tip: If you omit `vector_store_config` entirely and don't set `LLAMA_STACK_DEFAULT_EMBEDDING_MODEL`, the system will fall back to the default `all-MiniLM-L6-v2` model with 384 dimensions for vector store creation.
|
> Tip: If you omit `vector_store_config` entirely and don't set `LLAMA_STACK_DEFAULT_EMBEDDING_MODEL`, the system will fall back to using the first available embedding model in the registry for vector store creation.
|
||||||
|
|
|
@ -90,16 +90,27 @@ class VectorIORouter(VectorIO):
|
||||||
if dim is None:
|
if dim is None:
|
||||||
raise ValueError(f"Model {explicit_model} found but no embedding dimension in metadata")
|
raise ValueError(f"Model {explicit_model} found but no embedding dimension in metadata")
|
||||||
return explicit_model, dim
|
return explicit_model, dim
|
||||||
# model not in our registry, let caller deal with dimension
|
# model not found in registry - this is an error
|
||||||
return explicit_model, None # type: ignore
|
raise ValueError(f"Embedding model '{explicit_model}' not found in model registry")
|
||||||
|
|
||||||
# check if we have global defaults set via env vars
|
# check if we have global defaults set via env vars
|
||||||
config = VectorStoreConfig()
|
config = VectorStoreConfig()
|
||||||
if config.default_embedding_model is not None:
|
if config.default_embedding_model is not None:
|
||||||
return config.default_embedding_model, config.default_embedding_dimension or 384
|
if config.default_embedding_dimension is None:
|
||||||
|
raise ValueError(
|
||||||
|
f"default_embedding_model '{config.default_embedding_model}' is set but default_embedding_dimension is missing"
|
||||||
|
)
|
||||||
|
return config.default_embedding_model, config.default_embedding_dimension
|
||||||
|
|
||||||
# fallback to existing default model for compatibility
|
# fallback to first available embedding model for compatibility
|
||||||
return "all-MiniLM-L6-v2", 384
|
fallback = await self._get_first_embedding_model()
|
||||||
|
if fallback is not None:
|
||||||
|
return fallback
|
||||||
|
|
||||||
|
# if no models available, raise error
|
||||||
|
raise ValueError(
|
||||||
|
"No embedding model specified and no default configured. Either provide an embedding_model parameter or set vector_store_config.default_embedding_model"
|
||||||
|
)
|
||||||
|
|
||||||
async def register_vector_db(
|
async def register_vector_db(
|
||||||
self,
|
self,
|
||||||
|
|
|
@ -73,10 +73,40 @@ async def test_explicit_override(monkeypatch):
|
||||||
|
|
||||||
|
|
||||||
async def test_fallback_to_default():
|
async def test_fallback_to_default():
|
||||||
"""Should fallback to all-MiniLM-L6-v2 when no defaults set."""
|
"""Should fallback to first available embedding model when no defaults set."""
|
||||||
|
|
||||||
router = VectorIORouter(routing_table=_DummyRoutingTable())
|
router = VectorIORouter(routing_table=_DummyRoutingTable())
|
||||||
|
|
||||||
model, dim = await router._resolve_embedding_model(None)
|
model, dim = await router._resolve_embedding_model(None)
|
||||||
assert model == "all-MiniLM-L6-v2"
|
assert model == "first-model"
|
||||||
assert dim == 384
|
assert dim == 123
|
||||||
|
|
||||||
|
|
||||||
|
async def test_missing_dimension_requirement(monkeypatch):
|
||||||
|
monkeypatch.setenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL", "some-model")
|
||||||
|
|
||||||
|
router = VectorIORouter(routing_table=_DummyRoutingTable())
|
||||||
|
|
||||||
|
with pytest.raises(ValueError, match="default_embedding_model.*is set but default_embedding_dimension is missing"):
|
||||||
|
await router._resolve_embedding_model(None)
|
||||||
|
|
||||||
|
monkeypatch.delenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL", raising=False)
|
||||||
|
|
||||||
|
|
||||||
|
async def test_unregistered_model_error():
|
||||||
|
router = VectorIORouter(routing_table=_DummyRoutingTable())
|
||||||
|
|
||||||
|
with pytest.raises(ValueError, match="Embedding model 'unknown-model' not found in model registry"):
|
||||||
|
await router._resolve_embedding_model("unknown-model")
|
||||||
|
|
||||||
|
|
||||||
|
class _EmptyRoutingTable:
|
||||||
|
async def get_all_with_type(self, _type: str):
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
async def test_no_models_available_error():
|
||||||
|
router = VectorIORouter(routing_table=_EmptyRoutingTable())
|
||||||
|
|
||||||
|
with pytest.raises(ValueError, match="No embedding model specified and no default configured"):
|
||||||
|
await router._resolve_embedding_model(None)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue