feat(vector-io): implement global default embedding model configuration (Issue #2729)

- Add VectorStoreConfig with global default_embedding_model and default_embedding_dimension
- Support environment variables LLAMA_STACK_DEFAULT_EMBEDDING_MODEL and LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION
- Implement precedence: explicit model > global default > clear error (no fallback)
- Update VectorIORouter with _resolve_embedding_model() precedence logic
- Remove non-deterministic 'first model in run.yaml' fallback behavior
- Add vector_store_config to StackRunConfig and all distribution templates
- Include comprehensive unit tests for config loading and router precedence
- Update documentation with configuration examples and usage patterns
- Fix error messages to include 'Failed to' prefix per coding standards

Resolves deterministic vector store creation by eliminating unpredictable fallbacks
and providing clear configuration options at the stack level.
This commit is contained in:
skamenan7 2025-07-25 17:06:43 -04:00
parent 8422bd102a
commit 17fbd21c0d
7 changed files with 243 additions and 8 deletions

View file

@ -12,6 +12,7 @@ from urllib.parse import urlparse
from pydantic import BaseModel, Field, field_validator, model_validator
from llama_stack.apis.benchmarks import Benchmark, BenchmarkInput
from llama_stack.apis.common.vector_store_config import VectorStoreConfig
from llama_stack.apis.datasetio import DatasetIO
from llama_stack.apis.datasets import Dataset, DatasetInput
from llama_stack.apis.eval import Eval
@ -474,6 +475,12 @@ InferenceStoreConfig (with queue tuning parameters) or a SqlStoreConfig (depreca
If not specified, a default SQLite store will be used.""",
)
# Global vector-store defaults (embedding model etc.)
vector_store_config: VectorStoreConfig = Field(
default_factory=VectorStoreConfig,
description="Global defaults for vector-store creation (embedding model, dimension, …)",
)
# registry of "resources" in the distribution
models: list[ModelInput] = Field(default_factory=list)
shields: list[ShieldInput] = Field(default_factory=list)

View file

@ -11,6 +11,7 @@ from typing import Any
from llama_stack.apis.common.content_types import (
InterleavedContent,
)
from llama_stack.apis.common.vector_store_config import VectorStoreConfig
from llama_stack.apis.models import ModelType
from llama_stack.apis.vector_io import (
Chunk,
@ -76,6 +77,42 @@ class VectorIORouter(VectorIO):
logger.error(f"Error getting embedding models: {e}")
return None
async def _resolve_embedding_model(self, explicit_model: str | None = None) -> tuple[str, int]:
"""Apply precedence rules to decide which embedding model to use.
1. If *explicit_model* is provided, verify dimension (if possible) and use it.
2. Else use the global default in ``vector_store_config``.
3. Else raise ``MissingEmbeddingModelError``.
"""
# 1. explicit override
if explicit_model is not None:
# We still need a dimension; try to look it up in routing table
all_models = await self.routing_table.get_all_with_type("model")
for m in all_models:
if getattr(m, "identifier", None) == explicit_model:
dim = m.metadata.get("embedding_dimension")
if dim is None:
raise ValueError(
f"Failed to use embedding model {explicit_model}: found but has no embedding_dimension metadata"
)
return explicit_model, dim
# If not found, dimension unknown - defer to caller
return explicit_model, None # type: ignore
# 2. global default
cfg = VectorStoreConfig() # picks up env vars automatically
if cfg.default_embedding_model is not None:
return cfg.default_embedding_model, cfg.default_embedding_dimension or 384
# 3. error - no default
class MissingEmbeddingModelError(RuntimeError):
pass
raise MissingEmbeddingModelError(
"Failed to create vector store: No embedding model provided. Set vector_store_config.default_embedding_model or supply one in the API call."
)
async def register_vector_db(
self,
vector_db_id: str,
@ -102,7 +139,7 @@ class VectorIORouter(VectorIO):
ttl_seconds: int | None = None,
) -> None:
logger.debug(
f"VectorIORouter.insert_chunks: {vector_db_id}, {len(chunks)} chunks, ttl_seconds={ttl_seconds}, chunk_ids={[chunk.metadata['document_id'] for chunk in chunks[:3]]}{' and more...' if len(chunks) > 3 else ''}",
f"VectorIORouter.insert_chunks: {vector_db_id}, {len(chunks)} chunks, ttl_seconds={ttl_seconds}, chunk_ids={[chunk.chunk_id for chunk in chunks[:3]]}{' and more...' if len(chunks) > 3 else ''}",
)
provider = await self.routing_table.get_provider_impl(vector_db_id)
return await provider.insert_chunks(vector_db_id, chunks, ttl_seconds)
@ -131,13 +168,12 @@ class VectorIORouter(VectorIO):
) -> VectorStoreObject:
logger.debug(f"VectorIORouter.openai_create_vector_store: name={name}, provider_id={provider_id}")
# If no embedding model is provided, use the first available one
if embedding_model is None:
embedding_model_info = await self._get_first_embedding_model()
if embedding_model_info is None:
raise ValueError("No embedding model provided and no embedding models available in the system")
embedding_model, embedding_dimension = embedding_model_info
logger.info(f"No embedding model specified, using first available: {embedding_model}")
# Determine which embedding model to use based on new precedence
embedding_model, embedding_dimension = await self._resolve_embedding_model(embedding_model)
if embedding_dimension is None:
# try to fetch dimension from model metadata as fallback
embedding_model_info = await self._get_first_embedding_model() # may still help
embedding_dimension = embedding_model_info[1] if embedding_model_info else 384
vector_db_id = f"vs_{uuid.uuid4()}"
registered_vector_db = await self.routing_table.register_vector_db(