chore: Updating how default embedding model is set in stack

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>

# Conflicts:
#	.github/workflows/integration-vector-io-tests.yml
#	llama_stack/distributions/ci-tests/run.yaml
#	llama_stack/distributions/starter-gpu/run.yaml
#	llama_stack/distributions/starter/run.yaml
#	llama_stack/distributions/template.py
#	llama_stack/providers/utils/memory/openai_vector_store_mixin.py
This commit is contained in:
Francisco Javier Arceo 2025-10-15 17:15:43 -04:00
parent cd152f4240
commit 24a1430c8b
32 changed files with 276 additions and 265 deletions

View file

@ -169,9 +169,7 @@ jobs:
run: | run: |
uv run --no-sync \ uv run --no-sync \
pytest -sv --stack-config="files=inline::localfs,inference=inline::sentence-transformers,vector_io=${{ matrix.vector-io-provider }}" \ pytest -sv --stack-config="files=inline::localfs,inference=inline::sentence-transformers,vector_io=${{ matrix.vector-io-provider }}" \
tests/integration/vector_io \ tests/integration/vector_io
--embedding-model inline::sentence-transformers/nomic-ai/nomic-embed-text-v1.5 \
--embedding-dimension 768
- name: Check Storage and Memory Available After Tests - name: Check Storage and Memory Available After Tests
if: ${{ always() }} if: ${{ always() }}

View file

@ -92,13 +92,15 @@ models:
provider_id: inline::sentence-transformers provider_id: inline::sentence-transformers
metadata: metadata:
embedding_dimension: 768 embedding_dimension: 768
default_configured: true
vector_stores:
default_embedding_model_id: nomic-ai/nomic-embed-text-v1.5
``` ```
With this configuration: With this configuration:
- `client.vector_stores.create()` works without requiring embedding model parameters - `client.vector_stores.create()` works without requiring embedding model parameters
- The system automatically uses the default model and its embedding dimension for any newly created vector store - The system automatically uses the default model and its embedding dimension for any newly created vector store
- Only one model can be marked as `default_configured: true` - The `vector_stores` section explicitly configures which embedding model to use as default
## Vector Store Operations ## Vector Store Operations

View file

@ -351,6 +351,15 @@ class AuthenticationRequiredError(Exception):
pass pass
class VectorStoresConfig(BaseModel):
"""Configuration for vector stores in the stack."""
default_embedding_model_id: str = Field(
...,
description="ID of the embedding model to use as default for vector stores when none is specified. Must reference a model defined in the 'models' section.",
)
class QuotaPeriod(StrEnum): class QuotaPeriod(StrEnum):
DAY = "day" DAY = "day"
@ -526,6 +535,11 @@ If not specified, a default SQLite store will be used.""",
description="Path to directory containing external API implementations. The APIs code and dependencies must be installed on the system.", description="Path to directory containing external API implementations. The APIs code and dependencies must be installed on the system.",
) )
vector_stores: VectorStoresConfig | None = Field(
default=None,
description="Configuration for vector stores, including default embedding model",
)
@field_validator("external_providers_dir") @field_validator("external_providers_dir")
@classmethod @classmethod
def validate_external_providers_dir(cls, v): def validate_external_providers_dir(cls, v):

View file

@ -409,6 +409,10 @@ async def instantiate_provider(
if "telemetry_enabled" in inspect.signature(getattr(module, method)).parameters and run_config.telemetry: if "telemetry_enabled" in inspect.signature(getattr(module, method)).parameters and run_config.telemetry:
args.append(run_config.telemetry.enabled) args.append(run_config.telemetry.enabled)
# vector_io providers need access to run_config.vector_stores
if provider_spec.api == Api.vector_io and "run_config" in inspect.signature(getattr(module, method)).parameters:
args.append(run_config)
fn = getattr(module, method) fn = getattr(module, method)
impl = await fn(*args) impl = await fn(*args)
impl.__provider_id__ = provider.provider_id impl.__provider_id__ = provider.provider_id

View file

@ -98,30 +98,6 @@ REGISTRY_REFRESH_TASK = None
TEST_RECORDING_CONTEXT = None TEST_RECORDING_CONTEXT = None
async def validate_default_embedding_model(impls: dict[Api, Any]):
"""Validate that at most one embedding model is marked as default."""
if Api.models not in impls:
return
models_impl = impls[Api.models]
response = await models_impl.list_models()
models_list = response.data if hasattr(response, "data") else response
default_embedding_models = []
for model in models_list:
if model.model_type == "embedding" and model.metadata.get("default_configured") is True:
default_embedding_models.append(model.identifier)
if len(default_embedding_models) > 1:
raise ValueError(
f"Multiple embedding models marked as default_configured=True: {default_embedding_models}. "
"Only one embedding model can be marked as default."
)
if default_embedding_models:
logger.info(f"Default embedding model configured: {default_embedding_models[0]}")
async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]): async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
for rsrc, api, register_method, list_method in RESOURCES: for rsrc, api, register_method, list_method in RESOURCES:
objects = getattr(run_config, rsrc) objects = getattr(run_config, rsrc)
@ -152,7 +128,48 @@ async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
f"{rsrc.capitalize()}: {obj.identifier} served by {obj.provider_id}", f"{rsrc.capitalize()}: {obj.identifier} served by {obj.provider_id}",
) )
await validate_default_embedding_model(impls)
async def validate_vector_stores_config(run_config: StackRunConfig, impls: dict[Api, Any]):
"""Validate vector stores configuration."""
if not run_config.vector_stores:
return
vector_stores_config = run_config.vector_stores
default_model_id = vector_stores_config.default_embedding_model_id
if Api.models not in impls:
raise ValueError(f"Models API is not available but vector_stores config requires model '{default_model_id}'")
models_impl = impls[Api.models]
response = await models_impl.list_models()
models_list = response.data if hasattr(response, "data") else response
# find default embedding model
default_model = None
for model in models_list:
if model.identifier == default_model_id:
default_model = model
break
if not default_model:
available_models = [m.identifier for m in models_list if m.model_type == "embedding"]
raise ValueError(
f"Embedding model '{default_model_id}' not found. Available embedding models: {available_models}"
)
if default_model.model_type != "embedding":
raise ValueError(f"Model '{default_model_id}' is type '{default_model.model_type}', not 'embedding'")
embedding_dimension = default_model.metadata.get("embedding_dimension")
if embedding_dimension is None:
raise ValueError(f"Embedding model '{default_model_id}' is missing 'embedding_dimension' in metadata")
try:
int(embedding_dimension)
except ValueError as err:
raise ValueError(f"Embedding dimension '{embedding_dimension}' cannot be converted to an integer") from err
logger.debug(f"Validated default embedding model: {default_model_id} (dimension: {embedding_dimension})")
class EnvVarError(Exception): class EnvVarError(Exception):
@ -367,8 +384,8 @@ class Stack:
await impls[Api.conversations].initialize() await impls[Api.conversations].initialize()
await register_resources(self.run_config, impls) await register_resources(self.run_config, impls)
await refresh_registry_once(impls) await refresh_registry_once(impls)
await validate_vector_stores_config(self.run_config, impls)
self.impls = impls self.impls = impls
def create_registry_refresh_task(self): def create_registry_refresh_task(self):

View file

@ -239,3 +239,5 @@ server:
port: 8321 port: 8321
telemetry: telemetry:
enabled: true enabled: true
vector_stores:
default_embedding_model_id: sentence-transformers/nomic-ai/nomic-embed-text-v1.5

View file

@ -240,5 +240,7 @@ tool_groups:
provider_id: rag-runtime provider_id: rag-runtime
server: server:
port: 8321 port: 8321
vector_stores:
default_embedding_model_id: sentence-transformers/nomic-ai/nomic-embed-text-v1.5
telemetry: telemetry:
enabled: true enabled: true

View file

@ -239,3 +239,5 @@ server:
port: 8321 port: 8321
telemetry: telemetry:
enabled: true enabled: true
vector_stores:
default_embedding_model_id: sentence-transformers/nomic-ai/nomic-embed-text-v1.5

View file

@ -13,6 +13,7 @@ from llama_stack.core.datatypes import (
ProviderSpec, ProviderSpec,
ShieldInput, ShieldInput,
ToolGroupInput, ToolGroupInput,
VectorStoresConfig,
) )
from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.core.utils.dynamic import instantiate_class_type
from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings
@ -227,6 +228,9 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
default_models=[], default_models=[],
default_tool_groups=default_tool_groups, default_tool_groups=default_tool_groups,
default_shields=default_shields, default_shields=default_shields,
vector_stores_config=VectorStoresConfig(
default_embedding_model_id="sentence-transformers/nomic-ai/nomic-embed-text-v1.5"
),
), ),
}, },
run_config_env_vars={ run_config_env_vars={

View file

@ -27,6 +27,7 @@ from llama_stack.core.datatypes import (
ShieldInput, ShieldInput,
TelemetryConfig, TelemetryConfig,
ToolGroupInput, ToolGroupInput,
VectorStoresConfig,
) )
from llama_stack.core.distribution import get_provider_registry from llama_stack.core.distribution import get_provider_registry
from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.core.utils.dynamic import instantiate_class_type
@ -183,6 +184,7 @@ class RunConfigSettings(BaseModel):
metadata_store: dict | None = None metadata_store: dict | None = None
inference_store: dict | None = None inference_store: dict | None = None
conversations_store: dict | None = None conversations_store: dict | None = None
vector_stores_config: VectorStoresConfig | None = None
telemetry: TelemetryConfig = Field(default_factory=lambda: TelemetryConfig(enabled=True)) telemetry: TelemetryConfig = Field(default_factory=lambda: TelemetryConfig(enabled=True))
def run_config( def run_config(
@ -227,7 +229,7 @@ class RunConfigSettings(BaseModel):
apis = sorted(providers.keys()) apis = sorted(providers.keys())
# Return a dict that matches StackRunConfig structure # Return a dict that matches StackRunConfig structure
return { config = {
"version": LLAMA_STACK_RUN_CONFIG_VERSION, "version": LLAMA_STACK_RUN_CONFIG_VERSION,
"image_name": name, "image_name": name,
"container_image": container_image, "container_image": container_image,
@ -261,6 +263,11 @@ class RunConfigSettings(BaseModel):
"telemetry": self.telemetry.model_dump(exclude_none=True) if self.telemetry else None, "telemetry": self.telemetry.model_dump(exclude_none=True) if self.telemetry else None,
} }
if self.vector_stores_config:
config["vector_stores"] = self.vector_stores_config.model_dump(exclude_none=True)
return config
class DistributionTemplate(BaseModel): class DistributionTemplate(BaseModel):
""" """

View file

@ -59,7 +59,6 @@ class SentenceTransformersInferenceImpl(
provider_id=self.__provider_id__, provider_id=self.__provider_id__,
metadata={ metadata={
"embedding_dimension": 768, "embedding_dimension": 768,
"default_configured": True,
}, },
model_type=ModelType.embedding, model_type=ModelType.embedding,
), ),

View file

@ -6,21 +6,29 @@
from typing import Any from typing import Any
from llama_stack.core.datatypes import StackRunConfig
from llama_stack.providers.datatypes import Api from llama_stack.providers.datatypes import Api
from .config import ChromaVectorIOConfig from .config import ChromaVectorIOConfig
async def get_provider_impl(config: ChromaVectorIOConfig, deps: dict[Api, Any]): async def get_provider_impl(
config: ChromaVectorIOConfig, deps: dict[Api, Any], run_config: StackRunConfig | None = None
):
from llama_stack.providers.remote.vector_io.chroma.chroma import ( from llama_stack.providers.remote.vector_io.chroma.chroma import (
ChromaVectorIOAdapter, ChromaVectorIOAdapter,
) )
vector_stores_config = None
if run_config and run_config.vector_stores:
vector_stores_config = run_config.vector_stores
impl = ChromaVectorIOAdapter( impl = ChromaVectorIOAdapter(
config, config,
deps[Api.inference], deps[Api.inference],
deps[Api.models], deps[Api.models],
deps.get(Api.files), deps.get(Api.files),
vector_stores_config,
) )
await impl.initialize() await impl.initialize()
return impl return impl

View file

@ -6,21 +6,29 @@
from typing import Any from typing import Any
from llama_stack.core.datatypes import StackRunConfig
from llama_stack.providers.datatypes import Api from llama_stack.providers.datatypes import Api
from .config import FaissVectorIOConfig from .config import FaissVectorIOConfig
async def get_provider_impl(config: FaissVectorIOConfig, deps: dict[Api, Any]): async def get_provider_impl(
config: FaissVectorIOConfig, deps: dict[Api, Any], run_config: StackRunConfig | None = None
):
from .faiss import FaissVectorIOAdapter from .faiss import FaissVectorIOAdapter
assert isinstance(config, FaissVectorIOConfig), f"Unexpected config type: {type(config)}" assert isinstance(config, FaissVectorIOConfig), f"Unexpected config type: {type(config)}"
vector_stores_config = None
if run_config and run_config.vector_stores:
vector_stores_config = run_config.vector_stores
impl = FaissVectorIOAdapter( impl = FaissVectorIOAdapter(
config, config,
deps[Api.inference], deps[Api.inference],
deps[Api.models], deps[Api.models],
deps.get(Api.files), deps.get(Api.files),
vector_stores_config,
) )
await impl.initialize() await impl.initialize()
return impl return impl

View file

@ -24,6 +24,7 @@ from llama_stack.apis.vector_io import (
QueryChunksResponse, QueryChunksResponse,
VectorIO, VectorIO,
) )
from llama_stack.core.datatypes import VectorStoresConfig
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.datatypes import ( from llama_stack.providers.datatypes import (
HealthResponse, HealthResponse,
@ -206,11 +207,13 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr
inference_api: Inference, inference_api: Inference,
models_api: Models, models_api: Models,
files_api: Files | None, files_api: Files | None,
vector_stores_config: VectorStoresConfig | None = None,
) -> None: ) -> None:
super().__init__(files_api=files_api, kvstore=None) super().__init__(files_api=files_api, kvstore=None)
self.config = config self.config = config
self.inference_api = inference_api self.inference_api = inference_api
self.models_api = models_api self.models_api = models_api
self.vector_stores_config = vector_stores_config
self.cache: dict[str, VectorDBWithIndex] = {} self.cache: dict[str, VectorDBWithIndex] = {}
async def initialize(self) -> None: async def initialize(self) -> None:

View file

@ -6,19 +6,27 @@
from typing import Any from typing import Any
from llama_stack.core.datatypes import StackRunConfig
from llama_stack.providers.datatypes import Api from llama_stack.providers.datatypes import Api
from .config import MilvusVectorIOConfig from .config import MilvusVectorIOConfig
async def get_provider_impl(config: MilvusVectorIOConfig, deps: dict[Api, Any]): async def get_provider_impl(
config: MilvusVectorIOConfig, deps: dict[Api, Any], run_config: StackRunConfig | None = None
):
from llama_stack.providers.remote.vector_io.milvus.milvus import MilvusVectorIOAdapter from llama_stack.providers.remote.vector_io.milvus.milvus import MilvusVectorIOAdapter
vector_stores_config = None
if run_config and run_config.vector_stores:
vector_stores_config = run_config.vector_stores
impl = MilvusVectorIOAdapter( impl = MilvusVectorIOAdapter(
config, config,
deps[Api.inference], deps[Api.inference],
deps[Api.models], deps.get(Api.models),
deps.get(Api.files), deps.get(Api.files),
vector_stores_config,
) )
await impl.initialize() await impl.initialize()
return impl return impl

View file

@ -6,20 +6,28 @@
from typing import Any from typing import Any
from llama_stack.core.datatypes import StackRunConfig
from llama_stack.providers.datatypes import Api from llama_stack.providers.datatypes import Api
from .config import QdrantVectorIOConfig from .config import QdrantVectorIOConfig
async def get_provider_impl(config: QdrantVectorIOConfig, deps: dict[Api, Any]): async def get_provider_impl(
config: QdrantVectorIOConfig, deps: dict[Api, Any], run_config: StackRunConfig | None = None
):
from llama_stack.providers.remote.vector_io.qdrant.qdrant import QdrantVectorIOAdapter from llama_stack.providers.remote.vector_io.qdrant.qdrant import QdrantVectorIOAdapter
vector_stores_config = None
if run_config and run_config.vector_stores:
vector_stores_config = run_config.vector_stores
assert isinstance(config, QdrantVectorIOConfig), f"Unexpected config type: {type(config)}" assert isinstance(config, QdrantVectorIOConfig), f"Unexpected config type: {type(config)}"
impl = QdrantVectorIOAdapter( impl = QdrantVectorIOAdapter(
config, config,
deps[Api.inference], deps[Api.inference],
deps[Api.models], deps[Api.models],
deps.get(Api.files), deps.get(Api.files),
vector_stores_config,
) )
await impl.initialize() await impl.initialize()
return impl return impl

View file

@ -6,20 +6,28 @@
from typing import Any from typing import Any
from llama_stack.core.datatypes import StackRunConfig
from llama_stack.providers.datatypes import Api from llama_stack.providers.datatypes import Api
from .config import SQLiteVectorIOConfig from .config import SQLiteVectorIOConfig
async def get_provider_impl(config: SQLiteVectorIOConfig, deps: dict[Api, Any]): async def get_provider_impl(
config: SQLiteVectorIOConfig, deps: dict[Api, Any], run_config: StackRunConfig | None = None
):
from .sqlite_vec import SQLiteVecVectorIOAdapter from .sqlite_vec import SQLiteVecVectorIOAdapter
vector_stores_config = None
if run_config and run_config.vector_stores:
vector_stores_config = run_config.vector_stores
assert isinstance(config, SQLiteVectorIOConfig), f"Unexpected config type: {type(config)}" assert isinstance(config, SQLiteVectorIOConfig), f"Unexpected config type: {type(config)}"
impl = SQLiteVecVectorIOAdapter( impl = SQLiteVecVectorIOAdapter(
config, config,
deps[Api.inference], deps[Api.inference],
deps[Api.models], deps[Api.models],
deps.get(Api.files), deps.get(Api.files),
vector_stores_config,
) )
await impl.initialize() await impl.initialize()
return impl return impl

View file

@ -24,6 +24,7 @@ from llama_stack.apis.vector_io import (
QueryChunksResponse, QueryChunksResponse,
VectorIO, VectorIO,
) )
from llama_stack.core.datatypes import VectorStoresConfig
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore import kvstore_impl
@ -416,11 +417,13 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoc
inference_api: Inference, inference_api: Inference,
models_api: Models, models_api: Models,
files_api: Files | None, files_api: Files | None,
vector_stores_config: VectorStoresConfig | None = None,
) -> None: ) -> None:
super().__init__(files_api=files_api, kvstore=None) super().__init__(files_api=files_api, kvstore=None)
self.config = config self.config = config
self.inference_api = inference_api self.inference_api = inference_api
self.models_api = models_api self.models_api = models_api
self.vector_stores_config = vector_stores_config
self.cache: dict[str, VectorDBWithIndex] = {} self.cache: dict[str, VectorDBWithIndex] = {}
self.vector_db_store = None self.vector_db_store = None

View file

@ -4,19 +4,27 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from llama_stack.core.datatypes import StackRunConfig
from llama_stack.providers.datatypes import Api, ProviderSpec from llama_stack.providers.datatypes import Api, ProviderSpec
from .config import ChromaVectorIOConfig from .config import ChromaVectorIOConfig
async def get_adapter_impl(config: ChromaVectorIOConfig, deps: dict[Api, ProviderSpec]): async def get_adapter_impl(
config: ChromaVectorIOConfig, deps: dict[Api, ProviderSpec], run_config: StackRunConfig | None = None
):
from .chroma import ChromaVectorIOAdapter from .chroma import ChromaVectorIOAdapter
vector_stores_config = None
if run_config and run_config.vector_stores:
vector_stores_config = run_config.vector_stores
impl = ChromaVectorIOAdapter( impl = ChromaVectorIOAdapter(
config, config,
deps[Api.inference], deps[Api.inference],
deps[Api.models], deps[Api.models],
deps.get(Api.files), deps.get(Api.files),
vector_stores_config,
) )
await impl.initialize() await impl.initialize()
return impl return impl

View file

@ -12,15 +12,17 @@ import chromadb
from numpy.typing import NDArray from numpy.typing import NDArray
from llama_stack.apis.files import Files from llama_stack.apis.files import Files
from llama_stack.apis.inference import InterleavedContent from llama_stack.apis.inference import Inference, InterleavedContent
from llama_stack.apis.models import Models
from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import ( from llama_stack.apis.vector_io import (
Chunk, Chunk,
QueryChunksResponse, QueryChunksResponse,
VectorIO, VectorIO,
) )
from llama_stack.core.datatypes import VectorStoresConfig
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig
from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore import kvstore_impl
from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.kvstore.api import KVStore
@ -137,15 +139,17 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
def __init__( def __init__(
self, self,
config: RemoteChromaVectorIOConfig | InlineChromaVectorIOConfig, config: RemoteChromaVectorIOConfig | InlineChromaVectorIOConfig,
inference_api: Api.inference, inference_api: Inference,
models_apis: Api.models, models_apis: Models,
files_api: Files | None, files_api: Files | None,
vector_stores_config: VectorStoresConfig | None = None,
) -> None: ) -> None:
super().__init__(files_api=files_api, kvstore=None) super().__init__(files_api=files_api, kvstore=None)
log.info(f"Initializing ChromaVectorIOAdapter with url: {config}") log.info(f"Initializing ChromaVectorIOAdapter with url: {config}")
self.config = config self.config = config
self.inference_api = inference_api self.inference_api = inference_api
self.models_api = models_apis self.models_api = models_apis
self.vector_stores_config = vector_stores_config
self.client = None self.client = None
self.cache = {} self.cache = {}
self.vector_db_store = None self.vector_db_store = None

View file

@ -4,21 +4,28 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from llama_stack.core.datatypes import StackRunConfig
from llama_stack.providers.datatypes import Api, ProviderSpec from llama_stack.providers.datatypes import Api, ProviderSpec
from .config import MilvusVectorIOConfig from .config import MilvusVectorIOConfig
async def get_adapter_impl(config: MilvusVectorIOConfig, deps: dict[Api, ProviderSpec]): async def get_adapter_impl(
config: MilvusVectorIOConfig, deps: dict[Api, ProviderSpec], run_config: StackRunConfig | None = None
):
from .milvus import MilvusVectorIOAdapter from .milvus import MilvusVectorIOAdapter
assert isinstance(config, MilvusVectorIOConfig), f"Unexpected config type: {type(config)}" vector_stores_config = None
if run_config and run_config.vector_stores:
vector_stores_config = run_config.vector_stores
assert isinstance(config, MilvusVectorIOConfig), f"Unexpected config type: {type(config)}"
impl = MilvusVectorIOAdapter( impl = MilvusVectorIOAdapter(
config, config,
deps[Api.inference], deps[Api.inference],
deps[Api.models], deps[Api.models],
deps.get(Api.files), deps.get(Api.files),
vector_stores_config,
) )
await impl.initialize() await impl.initialize()
return impl return impl

View file

@ -21,6 +21,7 @@ from llama_stack.apis.vector_io import (
QueryChunksResponse, QueryChunksResponse,
VectorIO, VectorIO,
) )
from llama_stack.core.datatypes import VectorStoresConfig
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
from llama_stack.providers.inline.vector_io.milvus import MilvusVectorIOConfig as InlineMilvusVectorIOConfig from llama_stack.providers.inline.vector_io.milvus import MilvusVectorIOConfig as InlineMilvusVectorIOConfig
@ -308,8 +309,9 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
self, self,
config: RemoteMilvusVectorIOConfig | InlineMilvusVectorIOConfig, config: RemoteMilvusVectorIOConfig | InlineMilvusVectorIOConfig,
inference_api: Inference, inference_api: Inference,
models_api: Models, models_api: Models | None,
files_api: Files | None, files_api: Files | None,
vector_stores_config: VectorStoresConfig | None = None,
) -> None: ) -> None:
super().__init__(files_api=files_api, kvstore=None) super().__init__(files_api=files_api, kvstore=None)
self.config = config self.config = config
@ -317,6 +319,7 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
self.client = None self.client = None
self.inference_api = inference_api self.inference_api = inference_api
self.models_api = models_api self.models_api = models_api
self.vector_stores_config = vector_stores_config
self.vector_db_store = None self.vector_db_store = None
self.metadata_collection_name = "openai_vector_stores_metadata" self.metadata_collection_name = "openai_vector_stores_metadata"

View file

@ -4,14 +4,26 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from llama_stack.core.datatypes import StackRunConfig
from llama_stack.providers.datatypes import Api, ProviderSpec from llama_stack.providers.datatypes import Api, ProviderSpec
from .config import PGVectorVectorIOConfig from .config import PGVectorVectorIOConfig
async def get_adapter_impl(config: PGVectorVectorIOConfig, deps: dict[Api, ProviderSpec]): async def get_adapter_impl(
config: PGVectorVectorIOConfig, deps: dict[Api, ProviderSpec], run_config: StackRunConfig | None = None
):
from .pgvector import PGVectorVectorIOAdapter from .pgvector import PGVectorVectorIOAdapter
impl = PGVectorVectorIOAdapter(config, deps[Api.inference], deps[Api.models], deps.get(Api.files, None)) vector_stores_config = None
if run_config and run_config.vector_stores:
vector_stores_config = run_config.vector_stores
impl = PGVectorVectorIOAdapter(
config,
deps[Api.inference],
deps[Api.models],
deps.get(Api.files),
vector_stores_config,
)
await impl.initialize() await impl.initialize()
return impl return impl

View file

@ -23,6 +23,7 @@ from llama_stack.apis.vector_io import (
QueryChunksResponse, QueryChunksResponse,
VectorIO, VectorIO,
) )
from llama_stack.core.datatypes import VectorStoresConfig
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
from llama_stack.providers.utils.inference.prompt_adapter import ( from llama_stack.providers.utils.inference.prompt_adapter import (
@ -346,11 +347,13 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoco
inference_api: Inference, inference_api: Inference,
models_api: Models, models_api: Models,
files_api: Files | None = None, files_api: Files | None = None,
vector_stores_config: VectorStoresConfig | None = None,
) -> None: ) -> None:
super().__init__(files_api=files_api, kvstore=None) super().__init__(files_api=files_api, kvstore=None)
self.config = config self.config = config
self.inference_api = inference_api self.inference_api = inference_api
self.models_api = models_api self.models_api = models_api
self.vector_stores_config = vector_stores_config
self.conn = None self.conn = None
self.cache = {} self.cache = {}
self.vector_db_store = None self.vector_db_store = None

View file

@ -4,19 +4,27 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from llama_stack.core.datatypes import StackRunConfig
from llama_stack.providers.datatypes import Api, ProviderSpec from llama_stack.providers.datatypes import Api, ProviderSpec
from .config import QdrantVectorIOConfig from .config import QdrantVectorIOConfig
async def get_adapter_impl(config: QdrantVectorIOConfig, deps: dict[Api, ProviderSpec]): async def get_adapter_impl(
config: QdrantVectorIOConfig, deps: dict[Api, ProviderSpec], run_config: StackRunConfig | None = None
):
from .qdrant import QdrantVectorIOAdapter from .qdrant import QdrantVectorIOAdapter
vector_stores_config = None
if run_config and run_config.vector_stores:
vector_stores_config = run_config.vector_stores
impl = QdrantVectorIOAdapter( impl = QdrantVectorIOAdapter(
config, config,
deps[Api.inference], deps[Api.inference],
deps[Api.models], deps[Api.models],
deps.get(Api.files), deps.get(Api.files),
vector_stores_config,
) )
await impl.initialize() await impl.initialize()
return impl return impl

View file

@ -25,6 +25,7 @@ from llama_stack.apis.vector_io import (
VectorStoreChunkingStrategy, VectorStoreChunkingStrategy,
VectorStoreFileObject, VectorStoreFileObject,
) )
from llama_stack.core.datatypes import VectorStoresConfig
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
@ -163,6 +164,7 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
inference_api: Inference, inference_api: Inference,
models_api: Models, models_api: Models,
files_api: Files | None = None, files_api: Files | None = None,
vector_stores_config: VectorStoresConfig | None = None,
) -> None: ) -> None:
super().__init__(files_api=files_api, kvstore=None) super().__init__(files_api=files_api, kvstore=None)
self.config = config self.config = config
@ -170,6 +172,7 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
self.cache = {} self.cache = {}
self.inference_api = inference_api self.inference_api = inference_api
self.models_api = models_api self.models_api = models_api
self.vector_stores_config = vector_stores_config
self.vector_db_store = None self.vector_db_store = None
self._qdrant_lock = asyncio.Lock() self._qdrant_lock = asyncio.Lock()

View file

@ -4,19 +4,27 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from llama_stack.core.datatypes import StackRunConfig
from llama_stack.providers.datatypes import Api, ProviderSpec from llama_stack.providers.datatypes import Api, ProviderSpec
from .config import WeaviateVectorIOConfig from .config import WeaviateVectorIOConfig
async def get_adapter_impl(config: WeaviateVectorIOConfig, deps: dict[Api, ProviderSpec]): async def get_adapter_impl(
config: WeaviateVectorIOConfig, deps: dict[Api, ProviderSpec], run_config: StackRunConfig | None = None
):
from .weaviate import WeaviateVectorIOAdapter from .weaviate import WeaviateVectorIOAdapter
vector_stores_config = None
if run_config and run_config.vector_stores:
vector_stores_config = run_config.vector_stores
impl = WeaviateVectorIOAdapter( impl = WeaviateVectorIOAdapter(
config, config,
deps[Api.inference], deps[Api.inference],
deps[Api.models], deps[Api.models],
deps.get(Api.files), deps.get(Api.files),
vector_stores_config,
) )
await impl.initialize() await impl.initialize()
return impl return impl

View file

@ -19,6 +19,7 @@ from llama_stack.apis.inference import Inference
from llama_stack.apis.models import Models from llama_stack.apis.models import Models
from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
from llama_stack.core.datatypes import VectorStoresConfig
from llama_stack.core.request_headers import NeedsRequestProviderData from llama_stack.core.request_headers import NeedsRequestProviderData
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
@ -286,11 +287,13 @@ class WeaviateVectorIOAdapter(
inference_api: Inference, inference_api: Inference,
models_api: Models, models_api: Models,
files_api: Files | None, files_api: Files | None,
vector_stores_config: VectorStoresConfig | None = None,
) -> None: ) -> None:
super().__init__(files_api=files_api, kvstore=None) super().__init__(files_api=files_api, kvstore=None)
self.config = config self.config = config
self.inference_api = inference_api self.inference_api = inference_api
self.models_api = models_api self.models_api = models_api
self.vector_stores_config = vector_stores_config
self.client_cache = {} self.client_cache = {}
self.cache = {} self.cache = {}
self.vector_db_store = None self.vector_db_store = None

View file

@ -44,6 +44,7 @@ from llama_stack.apis.vector_io import (
VectorStoreSearchResponse, VectorStoreSearchResponse,
VectorStoreSearchResponsePage, VectorStoreSearchResponsePage,
) )
from llama_stack.core.datatypes import VectorStoresConfig
from llama_stack.core.id_generation import generate_object_id from llama_stack.core.id_generation import generate_object_id
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.kvstore.api import KVStore
@ -81,13 +82,17 @@ class OpenAIVectorStoreMixin(ABC):
# Implementing classes should call super().__init__() in their __init__ method # Implementing classes should call super().__init__() in their __init__ method
# to properly initialize the mixin attributes. # to properly initialize the mixin attributes.
def __init__( def __init__(
self, files_api: Files | None = None, kvstore: KVStore | None = None, models_api: Models | None = None self,
files_api: Files | None = None,
kvstore: KVStore | None = None,
): ):
self.openai_vector_stores: dict[str, dict[str, Any]] = {} self.openai_vector_stores: dict[str, dict[str, Any]] = {}
self.openai_file_batches: dict[str, dict[str, Any]] = {} self.openai_file_batches: dict[str, dict[str, Any]] = {}
self.files_api = files_api self.files_api = files_api
self.kvstore = kvstore self.kvstore = kvstore
self.models_api = models_api # These will be set by implementing classes
self.models_api: Models | None = None
self.vector_stores_config: VectorStoresConfig | None = None
self._last_file_batch_cleanup_time = 0 self._last_file_batch_cleanup_time = 0
self._file_batch_tasks: dict[str, asyncio.Task[None]] = {} self._file_batch_tasks: dict[str, asyncio.Task[None]] = {}
@ -474,24 +479,6 @@ class OpenAIVectorStoreMixin(ABC):
store_info = self.openai_vector_stores[vector_db_id] store_info = self.openai_vector_stores[vector_db_id]
return VectorStoreObject.model_validate(store_info) return VectorStoreObject.model_validate(store_info)
async def _get_embedding_models(self) -> list[Model]:
"""Get list of embedding models from the models API."""
if not self.models_api:
return []
models_response = await self.models_api.list_models()
models_list = models_response.data if hasattr(models_response, "data") else models_response
embedding_models = []
for model in models_list:
if not isinstance(model, Model):
logger.warning(f"Non-Model object found in models list: {type(model)} - {model}")
continue
if model.model_type == "embedding":
embedding_models.append(model)
return embedding_models
async def _get_embedding_dimension_for_model(self, model_id: str) -> int | None: async def _get_embedding_dimension_for_model(self, model_id: str) -> int | None:
"""Get embedding dimension for a specific model by looking it up in the models API. """Get embedding dimension for a specific model by looking it up in the models API.
@ -501,9 +488,18 @@ class OpenAIVectorStoreMixin(ABC):
Returns: Returns:
The embedding dimension for the model, or None if not found The embedding dimension for the model, or None if not found
""" """
embedding_models = await self._get_embedding_models() if not self.models_api:
return None
models_response = await self.models_api.list_models()
models_list = models_response.data if hasattr(models_response, "data") else models_response
for model in models_list:
if not isinstance(model, Model):
continue
if model.model_type != "embedding":
continue
for model in embedding_models:
# Check for exact match first # Check for exact match first
if model.identifier == model_id: if model.identifier == model_id:
embedding_dimension = model.metadata.get("embedding_dimension") embedding_dimension = model.metadata.get("embedding_dimension")
@ -523,35 +519,23 @@ class OpenAIVectorStoreMixin(ABC):
return None return None
async def _get_default_embedding_model_and_dimension(self) -> tuple[str, int] | None: async def _get_default_embedding_model_and_dimension(self) -> tuple[str, int] | None:
"""Get default embedding model from the models API. """Get default embedding model from vector stores config.
Looks for embedding models marked with default_configured=True in metadata. Returns None if no vector stores config is provided.
Returns None if no default embedding model is found.
Raises ValueError if multiple defaults are found.
""" """
embedding_models = await self._get_embedding_models() if not self.vector_stores_config:
logger.info("No vector stores config provided")
return None
default_models = [] model_id = self.vector_stores_config.default_embedding_model_id
for model in embedding_models:
if model.metadata.get("default_configured") is True:
default_models.append(model.identifier)
if len(default_models) > 1:
raise ValueError(
f"Multiple embedding models marked as default_configured=True: {default_models}. "
"Only one embedding model can be marked as default."
)
if default_models:
model_id = default_models[0]
embedding_dimension = await self._get_embedding_dimension_for_model(model_id) embedding_dimension = await self._get_embedding_dimension_for_model(model_id)
if embedding_dimension is None: if embedding_dimension is None:
raise ValueError(f"Embedding model '{model_id}' has no embedding_dimension in metadata") raise ValueError(f"Embedding model '{model_id}' not found or has no embedding_dimension in metadata")
logger.info(f"Using default embedding model: {model_id} with dimension {embedding_dimension}")
return model_id, embedding_dimension
logger.debug("No default embedding models found") logger.debug(
return None f"Using default embedding model from vector stores config: {model_id} with dimension {embedding_dimension}"
)
return model_id, embedding_dimension
async def openai_list_vector_stores( async def openai_list_vector_stores(
self, self,

View file

@ -21,6 +21,7 @@ from llama_stack_client import LlamaStackClient
from openai import OpenAI from openai import OpenAI
from llama_stack import LlamaStackAsLibraryClient from llama_stack import LlamaStackAsLibraryClient
from llama_stack.core.datatypes import VectorStoresConfig
from llama_stack.core.stack import run_config_from_adhoc_config_spec from llama_stack.core.stack import run_config_from_adhoc_config_spec
from llama_stack.env import get_env_or_fail from llama_stack.env import get_env_or_fail
@ -236,6 +237,13 @@ def instantiate_llama_stack_client(session):
if "=" in config: if "=" in config:
run_config = run_config_from_adhoc_config_spec(config) run_config = run_config_from_adhoc_config_spec(config)
# --stack-config bypasses template so need this to set default embedding model
if "vector_io" in config and "inference" in config:
run_config.vector_stores = VectorStoresConfig(
default_embedding_model_id="inline::sentence-transformers/nomic-ai/nomic-embed-text-v1.5"
)
run_config_file = tempfile.NamedTemporaryFile(delete=False, suffix=".yaml") run_config_file = tempfile.NamedTemporaryFile(delete=False, suffix=".yaml")
with open(run_config_file.name, "w") as f: with open(run_config_file.name, "w") as f:
yaml.dump(run_config.model_dump(), f) yaml.dump(run_config.model_dump(), f)

View file

@ -4,90 +4,44 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
""" """Unit tests for Stack validation functions."""
Unit tests for Stack validation functions.
"""
from unittest.mock import AsyncMock from unittest.mock import AsyncMock
import pytest import pytest
from llama_stack.apis.models import Model, ModelType from llama_stack.apis.models import Model, ModelType
from llama_stack.core.stack import validate_default_embedding_model from llama_stack.core.datatypes import StackRunConfig, VectorStoresConfig
from llama_stack.core.stack import validate_vector_stores_config
from llama_stack.providers.datatypes import Api from llama_stack.providers.datatypes import Api
class TestStackValidation: class TestVectorStoresValidation:
"""Test Stack validation functions.""" async def test_validate_missing_model(self):
"""Test validation fails when model not found."""
@pytest.mark.parametrize( run_config = StackRunConfig(
"models,should_raise", image_name="test", providers={}, vector_stores=VectorStoresConfig(default_embedding_model_id="missing")
[
([], False), # No models
(
[
Model(
identifier="emb1",
model_type=ModelType.embedding,
metadata={"default_configured": True},
provider_id="p",
provider_resource_id="emb1",
) )
], mock_models = AsyncMock()
False, mock_models.list_models.return_value = []
), # Single default
( with pytest.raises(ValueError, match="not found"):
[ await validate_vector_stores_config(run_config, {Api.models: mock_models})
Model(
identifier="emb1", async def test_validate_success(self):
model_type=ModelType.embedding, """Test validation passes with valid model."""
metadata={"default_configured": True}, run_config = StackRunConfig(
provider_id="p", image_name="test", providers={}, vector_stores=VectorStoresConfig(default_embedding_model_id="valid")
provider_resource_id="emb1",
),
Model(
identifier="emb2",
model_type=ModelType.embedding,
metadata={"default_configured": True},
provider_id="p",
provider_resource_id="emb2",
),
],
True,
), # Multiple defaults
(
[
Model(
identifier="emb1",
model_type=ModelType.embedding,
metadata={"default_configured": True},
provider_id="p",
provider_resource_id="emb1",
),
Model(
identifier="llm1",
model_type=ModelType.llm,
metadata={"default_configured": True},
provider_id="p",
provider_resource_id="llm1",
),
],
False,
), # Ignores non-embedding
],
) )
async def test_validate_default_embedding_model(self, models, should_raise): mock_models = AsyncMock()
"""Test validation with various model configurations.""" mock_models.list_models.return_value = [
mock_models_impl = AsyncMock() Model(
mock_models_impl.list_models.return_value = models identifier="valid",
impls = {Api.models: mock_models_impl} model_type=ModelType.embedding,
metadata={"embedding_dimension": 768},
provider_id="p",
provider_resource_id="valid",
)
]
if should_raise: await validate_vector_stores_config(run_config, {Api.models: mock_models})
with pytest.raises(ValueError, match="Multiple embedding models marked as default_configured=True"):
await validate_default_embedding_model(impls)
else:
await validate_default_embedding_model(impls)
async def test_validate_default_embedding_model_no_models_api(self):
"""Test validation when models API is not available."""
await validate_default_embedding_model({})

View file

@ -6,13 +6,12 @@
import json import json
import time import time
from unittest.mock import AsyncMock, Mock, patch from unittest.mock import AsyncMock, patch
import numpy as np import numpy as np
import pytest import pytest
from llama_stack.apis.common.errors import VectorStoreNotFoundError from llama_stack.apis.common.errors import VectorStoreNotFoundError
from llama_stack.apis.models import Model, ModelType
from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import ( from llama_stack.apis.vector_io import (
Chunk, Chunk,
@ -996,96 +995,6 @@ async def test_max_concurrent_files_per_batch(vector_io_adapter):
assert batch.file_counts.in_progress == 8 assert batch.file_counts.in_progress == 8
async def test_get_default_embedding_model_success(vector_io_adapter):
"""Test successful default embedding model detection."""
# Mock models API with a default model
mock_models_api = Mock()
mock_models_api.list_models = AsyncMock(
return_value=Mock(
data=[
Model(
identifier="nomic-embed-text-v1.5",
model_type=ModelType.embedding,
provider_id="test-provider",
metadata={
"embedding_dimension": 768,
"default_configured": True,
},
)
]
)
)
vector_io_adapter.models_api = mock_models_api
result = await vector_io_adapter._get_default_embedding_model_and_dimension()
assert result is not None
model_id, dimension = result
assert model_id == "nomic-embed-text-v1.5"
assert dimension == 768
async def test_get_default_embedding_model_multiple_defaults_error(vector_io_adapter):
"""Test error when multiple models are marked as default."""
mock_models_api = Mock()
mock_models_api.list_models = AsyncMock(
return_value=Mock(
data=[
Model(
identifier="model1",
model_type=ModelType.embedding,
provider_id="test-provider",
metadata={"embedding_dimension": 768, "default_configured": True},
),
Model(
identifier="model2",
model_type=ModelType.embedding,
provider_id="test-provider",
metadata={"embedding_dimension": 512, "default_configured": True},
),
]
)
)
vector_io_adapter.models_api = mock_models_api
with pytest.raises(ValueError, match="Multiple embedding models marked as default_configured=True"):
await vector_io_adapter._get_default_embedding_model_and_dimension()
async def test_openai_create_vector_store_uses_default_model(vector_io_adapter):
"""Test that vector store creation uses default embedding model when none specified."""
# Mock models API and dependencies
mock_models_api = Mock()
mock_models_api.list_models = AsyncMock(
return_value=Mock(
data=[
Model(
identifier="default-model",
model_type=ModelType.embedding,
provider_id="test-provider",
metadata={"embedding_dimension": 512, "default_configured": True},
)
]
)
)
vector_io_adapter.models_api = mock_models_api
vector_io_adapter.register_vector_db = AsyncMock()
vector_io_adapter.__provider_id__ = "test-provider"
# Create vector store without specifying embedding model
params = OpenAICreateVectorStoreRequestWithExtraBody(name="test-store")
result = await vector_io_adapter.openai_create_vector_store(params)
# Verify the vector store was created with default model
assert result.name == "test-store"
vector_io_adapter.register_vector_db.assert_called_once()
call_args = vector_io_adapter.register_vector_db.call_args[0][0]
assert call_args.embedding_model == "default-model"
assert call_args.embedding_dimension == 512
async def test_embedding_config_from_metadata(vector_io_adapter): async def test_embedding_config_from_metadata(vector_io_adapter):
"""Test that embedding configuration is correctly extracted from metadata.""" """Test that embedding configuration is correctly extracted from metadata."""