llama-stack-mirror/tests/unit/router/test_embedding_precedence.py
skamenan7 17fbd21c0d feat(vector-io): implement global default embedding model configuration (Issue #2729)
- Add VectorStoreConfig with global default_embedding_model and default_embedding_dimension
- Support environment variables LLAMA_STACK_DEFAULT_EMBEDDING_MODEL and LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION
- Implement precedence: explicit model > global default > clear error (no fallback)
- Update VectorIORouter with _resolve_embedding_model() precedence logic
- Remove non-deterministic 'first model in run.yaml' fallback behavior
- Add vector_store_config to StackRunConfig and all distribution templates
- Include comprehensive unit tests for config loading and router precedence
- Update documentation with configuration examples and usage patterns
- Fix error messages to include 'Failed to' prefix per coding standards

Resolves deterministic vector store creation by eliminating unpredictable fallbacks
and providing clear configuration options at the stack level.
2025-09-18 10:11:44 -04:00

83 lines
2.7 KiB
Python

# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import pytest
from llama_stack.apis.models import ModelType
from llama_stack.distribution.routers.vector_io import VectorIORouter
class _DummyModel:
def __init__(self, identifier: str, dim: int):
self.identifier = identifier
self.model_type = ModelType.embedding
self.metadata = {"embedding_dimension": dim}
class _DummyRoutingTable:
"""Minimal stub satisfying the methods used by VectorIORouter in tests."""
def __init__(self):
self._models: list[_DummyModel] = [
_DummyModel("first-model", 123),
_DummyModel("second-model", 512),
]
async def get_all_with_type(self, _type: str):
# Only embedding models requested in our tests
return self._models
# The following methods are required by the VectorIORouter signature but
# are not used in these unit tests; stub them out.
async def register_vector_db(self, *args, **kwargs):
raise NotImplementedError
async def get_provider_impl(self, *args, **kwargs):
raise NotImplementedError
@pytest.mark.asyncio
async def test_global_default_used(monkeypatch):
"""Router should pick up global default when no explicit model is supplied."""
monkeypatch.setenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL", "env-default-model")
monkeypatch.setenv("LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION", "256")
router = VectorIORouter(routing_table=_DummyRoutingTable())
model, dim = await router._resolve_embedding_model(None)
assert model == "env-default-model"
assert dim == 256
# Cleanup env vars
monkeypatch.delenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL", raising=False)
monkeypatch.delenv("LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION", raising=False)
@pytest.mark.asyncio
async def test_explicit_override(monkeypatch):
"""Explicit model parameter should override global default."""
monkeypatch.setenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL", "env-default-model")
router = VectorIORouter(routing_table=_DummyRoutingTable())
model, dim = await router._resolve_embedding_model("first-model")
assert model == "first-model"
assert dim == 123
monkeypatch.delenv("LLAMA_STACK_DEFAULT_EMBEDDING_MODEL", raising=False)
@pytest.mark.asyncio
async def test_error_when_no_default(monkeypatch):
"""Router should raise when neither explicit nor global default is available."""
router = VectorIORouter(routing_table=_DummyRoutingTable())
with pytest.raises(RuntimeError):
await router._resolve_embedding_model(None)