feat(vector-io): implement global default embedding model configuration (Issue #2729)

- Add VectorStoreConfig with global default_embedding_model and default_embedding_dimension
- Support environment variables LLAMA_STACK_DEFAULT_EMBEDDING_MODEL and LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION
- Implement precedence: explicit model > global default > clear error (no fallback)
- Update VectorIORouter with _resolve_embedding_model() precedence logic
- Remove non-deterministic 'first model in run.yaml' fallback behavior
- Add vector_store_config to StackRunConfig and all distribution templates
- Include comprehensive unit tests for config loading and router precedence
- Update documentation with configuration examples and usage patterns
- Fix error messages to include 'Failed to' prefix per coding standards

Resolves deterministic vector store creation by eliminating unpredictable fallbacks
and providing clear configuration options at the stack level.
This commit is contained in:
skamenan7 2025-07-25 17:06:43 -04:00
parent 8422bd102a
commit 17fbd21c0d
7 changed files with 243 additions and 8 deletions

View file

@ -0,0 +1,26 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.apis.common.vector_store_config import VectorStoreConfig
def test_defaults():
cfg = VectorStoreConfig()
assert cfg.default_embedding_model is None
assert cfg.default_embedding_dimension is None
def test_env_loading(monkeypatch):
monkeypatch.setenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL", "test-model")
monkeypatch.setenv("LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION", "123")
cfg = VectorStoreConfig()
assert cfg.default_embedding_model == "test-model"
assert cfg.default_embedding_dimension == 123
# Clean up
monkeypatch.delenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL", raising=False)
monkeypatch.delenv("LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION", raising=False)

View file

@ -0,0 +1,83 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import pytest
from llama_stack.apis.models import ModelType
from llama_stack.distribution.routers.vector_io import VectorIORouter
class _DummyModel:
def __init__(self, identifier: str, dim: int):
self.identifier = identifier
self.model_type = ModelType.embedding
self.metadata = {"embedding_dimension": dim}
class _DummyRoutingTable:
"""Minimal stub satisfying the methods used by VectorIORouter in tests."""
def __init__(self):
self._models: list[_DummyModel] = [
_DummyModel("first-model", 123),
_DummyModel("second-model", 512),
]
async def get_all_with_type(self, _type: str):
# Only embedding models requested in our tests
return self._models
# The following methods are required by the VectorIORouter signature but
# are not used in these unit tests; stub them out.
async def register_vector_db(self, *args, **kwargs):
raise NotImplementedError
async def get_provider_impl(self, *args, **kwargs):
raise NotImplementedError
@pytest.mark.asyncio
async def test_global_default_used(monkeypatch):
"""Router should pick up global default when no explicit model is supplied."""
monkeypatch.setenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL", "env-default-model")
monkeypatch.setenv("LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION", "256")
router = VectorIORouter(routing_table=_DummyRoutingTable())
model, dim = await router._resolve_embedding_model(None)
assert model == "env-default-model"
assert dim == 256
# Cleanup env vars
monkeypatch.delenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL", raising=False)
monkeypatch.delenv("LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION", raising=False)
@pytest.mark.asyncio
async def test_explicit_override(monkeypatch):
"""Explicit model parameter should override global default."""
monkeypatch.setenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL", "env-default-model")
router = VectorIORouter(routing_table=_DummyRoutingTable())
model, dim = await router._resolve_embedding_model("first-model")
assert model == "first-model"
assert dim == 123
monkeypatch.delenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL", raising=False)
@pytest.mark.asyncio
async def test_error_when_no_default(monkeypatch):
"""Router should raise when neither explicit nor global default is available."""
router = VectorIORouter(routing_table=_DummyRoutingTable())
with pytest.raises(RuntimeError):
await router._resolve_embedding_model(None)