feat(vector-io): implement global default embedding model configuration (Issue #2729)

- Add VectorStoreConfig with global default_embedding_model and default_embedding_dimension - Support environment variables LLAMA_STACK_DEFAULT_EMBEDDING_MODEL and LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION - Implement precedence: explicit model > global default > clear error (no fallback) - Update VectorIORouter with _resolve_embedding_model() precedence logic - Remove non-deterministic 'first model in run.yaml' fallback behavior - Add vector_store_config to StackRunConfig and all distribution templates - Include comprehensive unit tests for config loading and router precedence - Update documentation with configuration examples and usage patterns - Fix error messages to include 'Failed to' prefix per coding standards Resolves deterministic vector store creation by eliminating unpredictable fallbacks and providing clear configuration options at the stack level.
2025-12-08 11:07:22 +00:00 · 2025-07-25 17:06:43 -04:00 · 2025-07-25 17:06:43 -04:00 · 17fbd21c0d
commit 17fbd21c0d
parent 8422bd102a
7 changed files with 243 additions and 8 deletions
--- a/tests/unit/common/test_vector_store_config.py
+++ b/tests/unit/common/test_vector_store_config.py
@ -0,0 +1,26 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.apis.common.vector_store_config import VectorStoreConfig
+
+
+def test_defaults():
+    cfg = VectorStoreConfig()
+    assert cfg.default_embedding_model is None
+    assert cfg.default_embedding_dimension is None
+
+
+def test_env_loading(monkeypatch):
+    monkeypatch.setenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL", "test-model")
+    monkeypatch.setenv("LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION", "123")
+
+    cfg = VectorStoreConfig()
+    assert cfg.default_embedding_model == "test-model"
+    assert cfg.default_embedding_dimension == 123
+
+    # Clean up
+    monkeypatch.delenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL", raising=False)
+    monkeypatch.delenv("LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION", raising=False)
--- a/tests/unit/router/test_embedding_precedence.py
+++ b/tests/unit/router/test_embedding_precedence.py
@ -0,0 +1,83 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+
+import pytest
+
+from llama_stack.apis.models import ModelType
+from llama_stack.distribution.routers.vector_io import VectorIORouter
+
+
+class _DummyModel:
+    def __init__(self, identifier: str, dim: int):
+        self.identifier = identifier
+        self.model_type = ModelType.embedding
+        self.metadata = {"embedding_dimension": dim}
+
+
+class _DummyRoutingTable:
+    """Minimal stub satisfying the methods used by VectorIORouter in tests."""
+
+    def __init__(self):
+        self._models: list[_DummyModel] = [
+            _DummyModel("first-model", 123),
+            _DummyModel("second-model", 512),
+        ]
+
+    async def get_all_with_type(self, _type: str):
+        # Only embedding models requested in our tests
+        return self._models
+
+    # The following methods are required by the VectorIORouter signature but
+    # are not used in these unit tests; stub them out.
+    async def register_vector_db(self, *args, **kwargs):
+        raise NotImplementedError
+
+    async def get_provider_impl(self, *args, **kwargs):
+        raise NotImplementedError
+
+
+@pytest.mark.asyncio
+async def test_global_default_used(monkeypatch):
+    """Router should pick up global default when no explicit model is supplied."""
+
+    monkeypatch.setenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL", "env-default-model")
+    monkeypatch.setenv("LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION", "256")
+
+    router = VectorIORouter(routing_table=_DummyRoutingTable())
+
+    model, dim = await router._resolve_embedding_model(None)
+    assert model == "env-default-model"
+    assert dim == 256
+
+    # Cleanup env vars
+    monkeypatch.delenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL", raising=False)
+    monkeypatch.delenv("LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION", raising=False)
+
+
+@pytest.mark.asyncio
+async def test_explicit_override(monkeypatch):
+    """Explicit model parameter should override global default."""
+
+    monkeypatch.setenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL", "env-default-model")
+
+    router = VectorIORouter(routing_table=_DummyRoutingTable())
+
+    model, dim = await router._resolve_embedding_model("first-model")
+    assert model == "first-model"
+    assert dim == 123
+
+    monkeypatch.delenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL", raising=False)
+
+
+@pytest.mark.asyncio
+async def test_error_when_no_default(monkeypatch):
+    """Router should raise when neither explicit nor global default is available."""
+
+    router = VectorIORouter(routing_table=_DummyRoutingTable())
+
+    with pytest.raises(RuntimeError):
+        await router._resolve_embedding_model(None)