From 24a1430c8b110d166c370943524c9edaf6167271 Mon Sep 17 00:00:00 2001 From: Francisco Javier Arceo Date: Wed, 15 Oct 2025 17:15:43 -0400 Subject: [PATCH] chore: Updating how default embedding model is set in stack Signed-off-by: Francisco Javier Arceo # Conflicts: # .github/workflows/integration-vector-io-tests.yml # llama_stack/distributions/ci-tests/run.yaml # llama_stack/distributions/starter-gpu/run.yaml # llama_stack/distributions/starter/run.yaml # llama_stack/distributions/template.py # llama_stack/providers/utils/memory/openai_vector_store_mixin.py --- .../workflows/integration-vector-io-tests.yml | 4 +- docs/docs/building_applications/rag.mdx | 6 +- llama_stack/core/datatypes.py | 14 +++ llama_stack/core/resolver.py | 4 + llama_stack/core/stack.py | 69 +++++++----- llama_stack/distributions/ci-tests/run.yaml | 2 + .../distributions/starter-gpu/run.yaml | 2 + llama_stack/distributions/starter/run.yaml | 2 + llama_stack/distributions/starter/starter.py | 4 + llama_stack/distributions/template.py | 9 +- .../sentence_transformers.py | 1 - .../inline/vector_io/chroma/__init__.py | 10 +- .../inline/vector_io/faiss/__init__.py | 10 +- .../providers/inline/vector_io/faiss/faiss.py | 3 + .../inline/vector_io/milvus/__init__.py | 12 +- .../inline/vector_io/qdrant/__init__.py | 10 +- .../inline/vector_io/sqlite_vec/__init__.py | 10 +- .../inline/vector_io/sqlite_vec/sqlite_vec.py | 3 + .../remote/vector_io/chroma/__init__.py | 10 +- .../remote/vector_io/chroma/chroma.py | 12 +- .../remote/vector_io/milvus/__init__.py | 11 +- .../remote/vector_io/milvus/milvus.py | 5 +- .../remote/vector_io/pgvector/__init__.py | 16 ++- .../remote/vector_io/pgvector/pgvector.py | 3 + .../remote/vector_io/qdrant/__init__.py | 10 +- .../remote/vector_io/qdrant/qdrant.py | 3 + .../remote/vector_io/weaviate/__init__.py | 10 +- .../remote/vector_io/weaviate/weaviate.py | 3 + .../utils/memory/openai_vector_store_mixin.py | 78 ++++++------- tests/integration/fixtures/common.py | 8 ++ tests/unit/core/test_stack_validation.py | 104 +++++------------- .../test_vector_io_openai_vector_stores.py | 93 +--------------- 32 files changed, 276 insertions(+), 265 deletions(-) diff --git a/.github/workflows/integration-vector-io-tests.yml b/.github/workflows/integration-vector-io-tests.yml index fe5785c73..89dc64a45 100644 --- a/.github/workflows/integration-vector-io-tests.yml +++ b/.github/workflows/integration-vector-io-tests.yml @@ -169,9 +169,7 @@ jobs: run: | uv run --no-sync \ pytest -sv --stack-config="files=inline::localfs,inference=inline::sentence-transformers,vector_io=${{ matrix.vector-io-provider }}" \ - tests/integration/vector_io \ - --embedding-model inline::sentence-transformers/nomic-ai/nomic-embed-text-v1.5 \ - --embedding-dimension 768 + tests/integration/vector_io - name: Check Storage and Memory Available After Tests if: ${{ always() }} diff --git a/docs/docs/building_applications/rag.mdx b/docs/docs/building_applications/rag.mdx index edb6644f7..3d758e3e3 100644 --- a/docs/docs/building_applications/rag.mdx +++ b/docs/docs/building_applications/rag.mdx @@ -92,13 +92,15 @@ models: provider_id: inline::sentence-transformers metadata: embedding_dimension: 768 - default_configured: true + +vector_stores: + default_embedding_model_id: nomic-ai/nomic-embed-text-v1.5 ``` With this configuration: - `client.vector_stores.create()` works without requiring embedding model parameters - The system automatically uses the default model and its embedding dimension for any newly created vector store -- Only one model can be marked as `default_configured: true` +- The `vector_stores` section explicitly configures which embedding model to use as default ## Vector Store Operations diff --git a/llama_stack/core/datatypes.py b/llama_stack/core/datatypes.py index 94222d49e..d1e782510 100644 --- a/llama_stack/core/datatypes.py +++ b/llama_stack/core/datatypes.py @@ -351,6 +351,15 @@ class AuthenticationRequiredError(Exception): pass +class VectorStoresConfig(BaseModel): + """Configuration for vector stores in the stack.""" + + default_embedding_model_id: str = Field( + ..., + description="ID of the embedding model to use as default for vector stores when none is specified. Must reference a model defined in the 'models' section.", + ) + + class QuotaPeriod(StrEnum): DAY = "day" @@ -526,6 +535,11 @@ If not specified, a default SQLite store will be used.""", description="Path to directory containing external API implementations. The APIs code and dependencies must be installed on the system.", ) + vector_stores: VectorStoresConfig | None = Field( + default=None, + description="Configuration for vector stores, including default embedding model", + ) + @field_validator("external_providers_dir") @classmethod def validate_external_providers_dir(cls, v): diff --git a/llama_stack/core/resolver.py b/llama_stack/core/resolver.py index 73c047979..1c33010c4 100644 --- a/llama_stack/core/resolver.py +++ b/llama_stack/core/resolver.py @@ -409,6 +409,10 @@ async def instantiate_provider( if "telemetry_enabled" in inspect.signature(getattr(module, method)).parameters and run_config.telemetry: args.append(run_config.telemetry.enabled) + # vector_io providers need access to run_config.vector_stores + if provider_spec.api == Api.vector_io and "run_config" in inspect.signature(getattr(module, method)).parameters: + args.append(run_config) + fn = getattr(module, method) impl = await fn(*args) impl.__provider_id__ = provider.provider_id diff --git a/llama_stack/core/stack.py b/llama_stack/core/stack.py index 733b55262..5050102d8 100644 --- a/llama_stack/core/stack.py +++ b/llama_stack/core/stack.py @@ -98,30 +98,6 @@ REGISTRY_REFRESH_TASK = None TEST_RECORDING_CONTEXT = None -async def validate_default_embedding_model(impls: dict[Api, Any]): - """Validate that at most one embedding model is marked as default.""" - if Api.models not in impls: - return - - models_impl = impls[Api.models] - response = await models_impl.list_models() - models_list = response.data if hasattr(response, "data") else response - - default_embedding_models = [] - for model in models_list: - if model.model_type == "embedding" and model.metadata.get("default_configured") is True: - default_embedding_models.append(model.identifier) - - if len(default_embedding_models) > 1: - raise ValueError( - f"Multiple embedding models marked as default_configured=True: {default_embedding_models}. " - "Only one embedding model can be marked as default." - ) - - if default_embedding_models: - logger.info(f"Default embedding model configured: {default_embedding_models[0]}") - - async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]): for rsrc, api, register_method, list_method in RESOURCES: objects = getattr(run_config, rsrc) @@ -152,7 +128,48 @@ async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]): f"{rsrc.capitalize()}: {obj.identifier} served by {obj.provider_id}", ) - await validate_default_embedding_model(impls) + +async def validate_vector_stores_config(run_config: StackRunConfig, impls: dict[Api, Any]): + """Validate vector stores configuration.""" + if not run_config.vector_stores: + return + + vector_stores_config = run_config.vector_stores + default_model_id = vector_stores_config.default_embedding_model_id + + if Api.models not in impls: + raise ValueError(f"Models API is not available but vector_stores config requires model '{default_model_id}'") + + models_impl = impls[Api.models] + response = await models_impl.list_models() + models_list = response.data if hasattr(response, "data") else response + + # find default embedding model + default_model = None + for model in models_list: + if model.identifier == default_model_id: + default_model = model + break + + if not default_model: + available_models = [m.identifier for m in models_list if m.model_type == "embedding"] + raise ValueError( + f"Embedding model '{default_model_id}' not found. Available embedding models: {available_models}" + ) + + if default_model.model_type != "embedding": + raise ValueError(f"Model '{default_model_id}' is type '{default_model.model_type}', not 'embedding'") + + embedding_dimension = default_model.metadata.get("embedding_dimension") + if embedding_dimension is None: + raise ValueError(f"Embedding model '{default_model_id}' is missing 'embedding_dimension' in metadata") + + try: + int(embedding_dimension) + except ValueError as err: + raise ValueError(f"Embedding dimension '{embedding_dimension}' cannot be converted to an integer") from err + + logger.debug(f"Validated default embedding model: {default_model_id} (dimension: {embedding_dimension})") class EnvVarError(Exception): @@ -367,8 +384,8 @@ class Stack: await impls[Api.conversations].initialize() await register_resources(self.run_config, impls) - await refresh_registry_once(impls) + await validate_vector_stores_config(self.run_config, impls) self.impls = impls def create_registry_refresh_task(self): diff --git a/llama_stack/distributions/ci-tests/run.yaml b/llama_stack/distributions/ci-tests/run.yaml index a6a6b7c0d..42741f102 100644 --- a/llama_stack/distributions/ci-tests/run.yaml +++ b/llama_stack/distributions/ci-tests/run.yaml @@ -239,3 +239,5 @@ server: port: 8321 telemetry: enabled: true +vector_stores: + default_embedding_model_id: sentence-transformers/nomic-ai/nomic-embed-text-v1.5 diff --git a/llama_stack/distributions/starter-gpu/run.yaml b/llama_stack/distributions/starter-gpu/run.yaml index 370d4b516..9593b2947 100644 --- a/llama_stack/distributions/starter-gpu/run.yaml +++ b/llama_stack/distributions/starter-gpu/run.yaml @@ -240,5 +240,7 @@ tool_groups: provider_id: rag-runtime server: port: 8321 +vector_stores: + default_embedding_model_id: sentence-transformers/nomic-ai/nomic-embed-text-v1.5 telemetry: enabled: true diff --git a/llama_stack/distributions/starter/run.yaml b/llama_stack/distributions/starter/run.yaml index 2f4e7f350..2e2e5c1b0 100644 --- a/llama_stack/distributions/starter/run.yaml +++ b/llama_stack/distributions/starter/run.yaml @@ -239,3 +239,5 @@ server: port: 8321 telemetry: enabled: true +vector_stores: + default_embedding_model_id: sentence-transformers/nomic-ai/nomic-embed-text-v1.5 diff --git a/llama_stack/distributions/starter/starter.py b/llama_stack/distributions/starter/starter.py index f87ebcc5f..8d637a69a 100644 --- a/llama_stack/distributions/starter/starter.py +++ b/llama_stack/distributions/starter/starter.py @@ -13,6 +13,7 @@ from llama_stack.core.datatypes import ( ProviderSpec, ShieldInput, ToolGroupInput, + VectorStoresConfig, ) from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings @@ -227,6 +228,9 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate: default_models=[], default_tool_groups=default_tool_groups, default_shields=default_shields, + vector_stores_config=VectorStoresConfig( + default_embedding_model_id="sentence-transformers/nomic-ai/nomic-embed-text-v1.5" + ), ), }, run_config_env_vars={ diff --git a/llama_stack/distributions/template.py b/llama_stack/distributions/template.py index 807829999..64740d91d 100644 --- a/llama_stack/distributions/template.py +++ b/llama_stack/distributions/template.py @@ -27,6 +27,7 @@ from llama_stack.core.datatypes import ( ShieldInput, TelemetryConfig, ToolGroupInput, + VectorStoresConfig, ) from llama_stack.core.distribution import get_provider_registry from llama_stack.core.utils.dynamic import instantiate_class_type @@ -183,6 +184,7 @@ class RunConfigSettings(BaseModel): metadata_store: dict | None = None inference_store: dict | None = None conversations_store: dict | None = None + vector_stores_config: VectorStoresConfig | None = None telemetry: TelemetryConfig = Field(default_factory=lambda: TelemetryConfig(enabled=True)) def run_config( @@ -227,7 +229,7 @@ class RunConfigSettings(BaseModel): apis = sorted(providers.keys()) # Return a dict that matches StackRunConfig structure - return { + config = { "version": LLAMA_STACK_RUN_CONFIG_VERSION, "image_name": name, "container_image": container_image, @@ -261,6 +263,11 @@ class RunConfigSettings(BaseModel): "telemetry": self.telemetry.model_dump(exclude_none=True) if self.telemetry else None, } + if self.vector_stores_config: + config["vector_stores"] = self.vector_stores_config.model_dump(exclude_none=True) + + return config + class DistributionTemplate(BaseModel): """ diff --git a/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py b/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py index 871adcb24..cb72aa13a 100644 --- a/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +++ b/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py @@ -59,7 +59,6 @@ class SentenceTransformersInferenceImpl( provider_id=self.__provider_id__, metadata={ "embedding_dimension": 768, - "default_configured": True, }, model_type=ModelType.embedding, ), diff --git a/llama_stack/providers/inline/vector_io/chroma/__init__.py b/llama_stack/providers/inline/vector_io/chroma/__init__.py index 09e869c90..b1b4312eb 100644 --- a/llama_stack/providers/inline/vector_io/chroma/__init__.py +++ b/llama_stack/providers/inline/vector_io/chroma/__init__.py @@ -6,21 +6,29 @@ from typing import Any +from llama_stack.core.datatypes import StackRunConfig from llama_stack.providers.datatypes import Api from .config import ChromaVectorIOConfig -async def get_provider_impl(config: ChromaVectorIOConfig, deps: dict[Api, Any]): +async def get_provider_impl( + config: ChromaVectorIOConfig, deps: dict[Api, Any], run_config: StackRunConfig | None = None +): from llama_stack.providers.remote.vector_io.chroma.chroma import ( ChromaVectorIOAdapter, ) + vector_stores_config = None + if run_config and run_config.vector_stores: + vector_stores_config = run_config.vector_stores + impl = ChromaVectorIOAdapter( config, deps[Api.inference], deps[Api.models], deps.get(Api.files), + vector_stores_config, ) await impl.initialize() return impl diff --git a/llama_stack/providers/inline/vector_io/faiss/__init__.py b/llama_stack/providers/inline/vector_io/faiss/__init__.py index c0f01bc9d..e6aa2a1ef 100644 --- a/llama_stack/providers/inline/vector_io/faiss/__init__.py +++ b/llama_stack/providers/inline/vector_io/faiss/__init__.py @@ -6,21 +6,29 @@ from typing import Any +from llama_stack.core.datatypes import StackRunConfig from llama_stack.providers.datatypes import Api from .config import FaissVectorIOConfig -async def get_provider_impl(config: FaissVectorIOConfig, deps: dict[Api, Any]): +async def get_provider_impl( + config: FaissVectorIOConfig, deps: dict[Api, Any], run_config: StackRunConfig | None = None +): from .faiss import FaissVectorIOAdapter assert isinstance(config, FaissVectorIOConfig), f"Unexpected config type: {type(config)}" + vector_stores_config = None + if run_config and run_config.vector_stores: + vector_stores_config = run_config.vector_stores + impl = FaissVectorIOAdapter( config, deps[Api.inference], deps[Api.models], deps.get(Api.files), + vector_stores_config, ) await impl.initialize() return impl diff --git a/llama_stack/providers/inline/vector_io/faiss/faiss.py b/llama_stack/providers/inline/vector_io/faiss/faiss.py index df0864db8..7ec73bed5 100644 --- a/llama_stack/providers/inline/vector_io/faiss/faiss.py +++ b/llama_stack/providers/inline/vector_io/faiss/faiss.py @@ -24,6 +24,7 @@ from llama_stack.apis.vector_io import ( QueryChunksResponse, VectorIO, ) +from llama_stack.core.datatypes import VectorStoresConfig from llama_stack.log import get_logger from llama_stack.providers.datatypes import ( HealthResponse, @@ -206,11 +207,13 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr inference_api: Inference, models_api: Models, files_api: Files | None, + vector_stores_config: VectorStoresConfig | None = None, ) -> None: super().__init__(files_api=files_api, kvstore=None) self.config = config self.inference_api = inference_api self.models_api = models_api + self.vector_stores_config = vector_stores_config self.cache: dict[str, VectorDBWithIndex] = {} async def initialize(self) -> None: diff --git a/llama_stack/providers/inline/vector_io/milvus/__init__.py b/llama_stack/providers/inline/vector_io/milvus/__init__.py index 46a006a91..239abd35d 100644 --- a/llama_stack/providers/inline/vector_io/milvus/__init__.py +++ b/llama_stack/providers/inline/vector_io/milvus/__init__.py @@ -6,19 +6,27 @@ from typing import Any +from llama_stack.core.datatypes import StackRunConfig from llama_stack.providers.datatypes import Api from .config import MilvusVectorIOConfig -async def get_provider_impl(config: MilvusVectorIOConfig, deps: dict[Api, Any]): +async def get_provider_impl( + config: MilvusVectorIOConfig, deps: dict[Api, Any], run_config: StackRunConfig | None = None +): from llama_stack.providers.remote.vector_io.milvus.milvus import MilvusVectorIOAdapter + vector_stores_config = None + if run_config and run_config.vector_stores: + vector_stores_config = run_config.vector_stores + impl = MilvusVectorIOAdapter( config, deps[Api.inference], - deps[Api.models], + deps.get(Api.models), deps.get(Api.files), + vector_stores_config, ) await impl.initialize() return impl diff --git a/llama_stack/providers/inline/vector_io/qdrant/__init__.py b/llama_stack/providers/inline/vector_io/qdrant/__init__.py index 2863f667c..efa4e3dcc 100644 --- a/llama_stack/providers/inline/vector_io/qdrant/__init__.py +++ b/llama_stack/providers/inline/vector_io/qdrant/__init__.py @@ -6,20 +6,28 @@ from typing import Any +from llama_stack.core.datatypes import StackRunConfig from llama_stack.providers.datatypes import Api from .config import QdrantVectorIOConfig -async def get_provider_impl(config: QdrantVectorIOConfig, deps: dict[Api, Any]): +async def get_provider_impl( + config: QdrantVectorIOConfig, deps: dict[Api, Any], run_config: StackRunConfig | None = None +): from llama_stack.providers.remote.vector_io.qdrant.qdrant import QdrantVectorIOAdapter + vector_stores_config = None + if run_config and run_config.vector_stores: + vector_stores_config = run_config.vector_stores + assert isinstance(config, QdrantVectorIOConfig), f"Unexpected config type: {type(config)}" impl = QdrantVectorIOAdapter( config, deps[Api.inference], deps[Api.models], deps.get(Api.files), + vector_stores_config, ) await impl.initialize() return impl diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py b/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py index 93921fb23..4748a3874 100644 --- a/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +++ b/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py @@ -6,20 +6,28 @@ from typing import Any +from llama_stack.core.datatypes import StackRunConfig from llama_stack.providers.datatypes import Api from .config import SQLiteVectorIOConfig -async def get_provider_impl(config: SQLiteVectorIOConfig, deps: dict[Api, Any]): +async def get_provider_impl( + config: SQLiteVectorIOConfig, deps: dict[Api, Any], run_config: StackRunConfig | None = None +): from .sqlite_vec import SQLiteVecVectorIOAdapter + vector_stores_config = None + if run_config and run_config.vector_stores: + vector_stores_config = run_config.vector_stores + assert isinstance(config, SQLiteVectorIOConfig), f"Unexpected config type: {type(config)}" impl = SQLiteVecVectorIOAdapter( config, deps[Api.inference], deps[Api.models], deps.get(Api.files), + vector_stores_config, ) await impl.initialize() return impl diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py index 8bc3b04cb..94d06e830 100644 --- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py @@ -24,6 +24,7 @@ from llama_stack.apis.vector_io import ( QueryChunksResponse, VectorIO, ) +from llama_stack.core.datatypes import VectorStoresConfig from llama_stack.log import get_logger from llama_stack.providers.datatypes import VectorDBsProtocolPrivate from llama_stack.providers.utils.kvstore import kvstore_impl @@ -416,11 +417,13 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoc inference_api: Inference, models_api: Models, files_api: Files | None, + vector_stores_config: VectorStoresConfig | None = None, ) -> None: super().__init__(files_api=files_api, kvstore=None) self.config = config self.inference_api = inference_api self.models_api = models_api + self.vector_stores_config = vector_stores_config self.cache: dict[str, VectorDBWithIndex] = {} self.vector_db_store = None diff --git a/llama_stack/providers/remote/vector_io/chroma/__init__.py b/llama_stack/providers/remote/vector_io/chroma/__init__.py index a6db48c43..e0c9df638 100644 --- a/llama_stack/providers/remote/vector_io/chroma/__init__.py +++ b/llama_stack/providers/remote/vector_io/chroma/__init__.py @@ -4,19 +4,27 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +from llama_stack.core.datatypes import StackRunConfig from llama_stack.providers.datatypes import Api, ProviderSpec from .config import ChromaVectorIOConfig -async def get_adapter_impl(config: ChromaVectorIOConfig, deps: dict[Api, ProviderSpec]): +async def get_adapter_impl( + config: ChromaVectorIOConfig, deps: dict[Api, ProviderSpec], run_config: StackRunConfig | None = None +): from .chroma import ChromaVectorIOAdapter + vector_stores_config = None + if run_config and run_config.vector_stores: + vector_stores_config = run_config.vector_stores + impl = ChromaVectorIOAdapter( config, deps[Api.inference], deps[Api.models], deps.get(Api.files), + vector_stores_config, ) await impl.initialize() return impl diff --git a/llama_stack/providers/remote/vector_io/chroma/chroma.py b/llama_stack/providers/remote/vector_io/chroma/chroma.py index 5792a83c6..9ed9672cd 100644 --- a/llama_stack/providers/remote/vector_io/chroma/chroma.py +++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py @@ -12,15 +12,17 @@ import chromadb from numpy.typing import NDArray from llama_stack.apis.files import Files -from llama_stack.apis.inference import InterleavedContent +from llama_stack.apis.inference import Inference, InterleavedContent +from llama_stack.apis.models import Models from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import ( Chunk, QueryChunksResponse, VectorIO, ) +from llama_stack.core.datatypes import VectorStoresConfig from llama_stack.log import get_logger -from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate +from llama_stack.providers.datatypes import VectorDBsProtocolPrivate from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore.api import KVStore @@ -137,15 +139,17 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP def __init__( self, config: RemoteChromaVectorIOConfig | InlineChromaVectorIOConfig, - inference_api: Api.inference, - models_apis: Api.models, + inference_api: Inference, + models_apis: Models, files_api: Files | None, + vector_stores_config: VectorStoresConfig | None = None, ) -> None: super().__init__(files_api=files_api, kvstore=None) log.info(f"Initializing ChromaVectorIOAdapter with url: {config}") self.config = config self.inference_api = inference_api self.models_api = models_apis + self.vector_stores_config = vector_stores_config self.client = None self.cache = {} self.vector_db_store = None diff --git a/llama_stack/providers/remote/vector_io/milvus/__init__.py b/llama_stack/providers/remote/vector_io/milvus/__init__.py index dc5a642d6..0b9721beb 100644 --- a/llama_stack/providers/remote/vector_io/milvus/__init__.py +++ b/llama_stack/providers/remote/vector_io/milvus/__init__.py @@ -4,21 +4,28 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +from llama_stack.core.datatypes import StackRunConfig from llama_stack.providers.datatypes import Api, ProviderSpec from .config import MilvusVectorIOConfig -async def get_adapter_impl(config: MilvusVectorIOConfig, deps: dict[Api, ProviderSpec]): +async def get_adapter_impl( + config: MilvusVectorIOConfig, deps: dict[Api, ProviderSpec], run_config: StackRunConfig | None = None +): from .milvus import MilvusVectorIOAdapter - assert isinstance(config, MilvusVectorIOConfig), f"Unexpected config type: {type(config)}" + vector_stores_config = None + if run_config and run_config.vector_stores: + vector_stores_config = run_config.vector_stores + assert isinstance(config, MilvusVectorIOConfig), f"Unexpected config type: {type(config)}" impl = MilvusVectorIOAdapter( config, deps[Api.inference], deps[Api.models], deps.get(Api.files), + vector_stores_config, ) await impl.initialize() return impl diff --git a/llama_stack/providers/remote/vector_io/milvus/milvus.py b/llama_stack/providers/remote/vector_io/milvus/milvus.py index d7147a7f0..dd01f6637 100644 --- a/llama_stack/providers/remote/vector_io/milvus/milvus.py +++ b/llama_stack/providers/remote/vector_io/milvus/milvus.py @@ -21,6 +21,7 @@ from llama_stack.apis.vector_io import ( QueryChunksResponse, VectorIO, ) +from llama_stack.core.datatypes import VectorStoresConfig from llama_stack.log import get_logger from llama_stack.providers.datatypes import VectorDBsProtocolPrivate from llama_stack.providers.inline.vector_io.milvus import MilvusVectorIOConfig as InlineMilvusVectorIOConfig @@ -308,8 +309,9 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP self, config: RemoteMilvusVectorIOConfig | InlineMilvusVectorIOConfig, inference_api: Inference, - models_api: Models, + models_api: Models | None, files_api: Files | None, + vector_stores_config: VectorStoresConfig | None = None, ) -> None: super().__init__(files_api=files_api, kvstore=None) self.config = config @@ -317,6 +319,7 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP self.client = None self.inference_api = inference_api self.models_api = models_api + self.vector_stores_config = vector_stores_config self.vector_db_store = None self.metadata_collection_name = "openai_vector_stores_metadata" diff --git a/llama_stack/providers/remote/vector_io/pgvector/__init__.py b/llama_stack/providers/remote/vector_io/pgvector/__init__.py index bb4079ab5..309c8e159 100644 --- a/llama_stack/providers/remote/vector_io/pgvector/__init__.py +++ b/llama_stack/providers/remote/vector_io/pgvector/__init__.py @@ -4,14 +4,26 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +from llama_stack.core.datatypes import StackRunConfig from llama_stack.providers.datatypes import Api, ProviderSpec from .config import PGVectorVectorIOConfig -async def get_adapter_impl(config: PGVectorVectorIOConfig, deps: dict[Api, ProviderSpec]): +async def get_adapter_impl( + config: PGVectorVectorIOConfig, deps: dict[Api, ProviderSpec], run_config: StackRunConfig | None = None +): from .pgvector import PGVectorVectorIOAdapter - impl = PGVectorVectorIOAdapter(config, deps[Api.inference], deps[Api.models], deps.get(Api.files, None)) + vector_stores_config = None + if run_config and run_config.vector_stores: + vector_stores_config = run_config.vector_stores + impl = PGVectorVectorIOAdapter( + config, + deps[Api.inference], + deps[Api.models], + deps.get(Api.files), + vector_stores_config, + ) await impl.initialize() return impl diff --git a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py index d55c13103..e58317bf0 100644 --- a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py +++ b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py @@ -23,6 +23,7 @@ from llama_stack.apis.vector_io import ( QueryChunksResponse, VectorIO, ) +from llama_stack.core.datatypes import VectorStoresConfig from llama_stack.log import get_logger from llama_stack.providers.datatypes import VectorDBsProtocolPrivate from llama_stack.providers.utils.inference.prompt_adapter import ( @@ -346,11 +347,13 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoco inference_api: Inference, models_api: Models, files_api: Files | None = None, + vector_stores_config: VectorStoresConfig | None = None, ) -> None: super().__init__(files_api=files_api, kvstore=None) self.config = config self.inference_api = inference_api self.models_api = models_api + self.vector_stores_config = vector_stores_config self.conn = None self.cache = {} self.vector_db_store = None diff --git a/llama_stack/providers/remote/vector_io/qdrant/__init__.py b/llama_stack/providers/remote/vector_io/qdrant/__init__.py index c4942fbce..abdef9775 100644 --- a/llama_stack/providers/remote/vector_io/qdrant/__init__.py +++ b/llama_stack/providers/remote/vector_io/qdrant/__init__.py @@ -4,19 +4,27 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +from llama_stack.core.datatypes import StackRunConfig from llama_stack.providers.datatypes import Api, ProviderSpec from .config import QdrantVectorIOConfig -async def get_adapter_impl(config: QdrantVectorIOConfig, deps: dict[Api, ProviderSpec]): +async def get_adapter_impl( + config: QdrantVectorIOConfig, deps: dict[Api, ProviderSpec], run_config: StackRunConfig | None = None +): from .qdrant import QdrantVectorIOAdapter + vector_stores_config = None + if run_config and run_config.vector_stores: + vector_stores_config = run_config.vector_stores + impl = QdrantVectorIOAdapter( config, deps[Api.inference], deps[Api.models], deps.get(Api.files), + vector_stores_config, ) await impl.initialize() return impl diff --git a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py index 8b90935cd..db1e21a6c 100644 --- a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py +++ b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py @@ -25,6 +25,7 @@ from llama_stack.apis.vector_io import ( VectorStoreChunkingStrategy, VectorStoreFileObject, ) +from llama_stack.core.datatypes import VectorStoresConfig from llama_stack.log import get_logger from llama_stack.providers.datatypes import VectorDBsProtocolPrivate from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig @@ -163,6 +164,7 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP inference_api: Inference, models_api: Models, files_api: Files | None = None, + vector_stores_config: VectorStoresConfig | None = None, ) -> None: super().__init__(files_api=files_api, kvstore=None) self.config = config @@ -170,6 +172,7 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP self.cache = {} self.inference_api = inference_api self.models_api = models_api + self.vector_stores_config = vector_stores_config self.vector_db_store = None self._qdrant_lock = asyncio.Lock() diff --git a/llama_stack/providers/remote/vector_io/weaviate/__init__.py b/llama_stack/providers/remote/vector_io/weaviate/__init__.py index 2040dad96..ab0277cc7 100644 --- a/llama_stack/providers/remote/vector_io/weaviate/__init__.py +++ b/llama_stack/providers/remote/vector_io/weaviate/__init__.py @@ -4,19 +4,27 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +from llama_stack.core.datatypes import StackRunConfig from llama_stack.providers.datatypes import Api, ProviderSpec from .config import WeaviateVectorIOConfig -async def get_adapter_impl(config: WeaviateVectorIOConfig, deps: dict[Api, ProviderSpec]): +async def get_adapter_impl( + config: WeaviateVectorIOConfig, deps: dict[Api, ProviderSpec], run_config: StackRunConfig | None = None +): from .weaviate import WeaviateVectorIOAdapter + vector_stores_config = None + if run_config and run_config.vector_stores: + vector_stores_config = run_config.vector_stores + impl = WeaviateVectorIOAdapter( config, deps[Api.inference], deps[Api.models], deps.get(Api.files), + vector_stores_config, ) await impl.initialize() return impl diff --git a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py index d8b11c441..f8046e127 100644 --- a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py +++ b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py @@ -19,6 +19,7 @@ from llama_stack.apis.inference import Inference from llama_stack.apis.models import Models from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO +from llama_stack.core.datatypes import VectorStoresConfig from llama_stack.core.request_headers import NeedsRequestProviderData from llama_stack.log import get_logger from llama_stack.providers.datatypes import VectorDBsProtocolPrivate @@ -286,11 +287,13 @@ class WeaviateVectorIOAdapter( inference_api: Inference, models_api: Models, files_api: Files | None, + vector_stores_config: VectorStoresConfig | None = None, ) -> None: super().__init__(files_api=files_api, kvstore=None) self.config = config self.inference_api = inference_api self.models_api = models_api + self.vector_stores_config = vector_stores_config self.client_cache = {} self.cache = {} self.vector_db_store = None diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py index 0e550434e..fbbe42d28 100644 --- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py +++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py @@ -44,6 +44,7 @@ from llama_stack.apis.vector_io import ( VectorStoreSearchResponse, VectorStoreSearchResponsePage, ) +from llama_stack.core.datatypes import VectorStoresConfig from llama_stack.core.id_generation import generate_object_id from llama_stack.log import get_logger from llama_stack.providers.utils.kvstore.api import KVStore @@ -81,13 +82,17 @@ class OpenAIVectorStoreMixin(ABC): # Implementing classes should call super().__init__() in their __init__ method # to properly initialize the mixin attributes. def __init__( - self, files_api: Files | None = None, kvstore: KVStore | None = None, models_api: Models | None = None + self, + files_api: Files | None = None, + kvstore: KVStore | None = None, ): self.openai_vector_stores: dict[str, dict[str, Any]] = {} self.openai_file_batches: dict[str, dict[str, Any]] = {} self.files_api = files_api self.kvstore = kvstore - self.models_api = models_api + # These will be set by implementing classes + self.models_api: Models | None = None + self.vector_stores_config: VectorStoresConfig | None = None self._last_file_batch_cleanup_time = 0 self._file_batch_tasks: dict[str, asyncio.Task[None]] = {} @@ -474,24 +479,6 @@ class OpenAIVectorStoreMixin(ABC): store_info = self.openai_vector_stores[vector_db_id] return VectorStoreObject.model_validate(store_info) - async def _get_embedding_models(self) -> list[Model]: - """Get list of embedding models from the models API.""" - if not self.models_api: - return [] - - models_response = await self.models_api.list_models() - models_list = models_response.data if hasattr(models_response, "data") else models_response - - embedding_models = [] - for model in models_list: - if not isinstance(model, Model): - logger.warning(f"Non-Model object found in models list: {type(model)} - {model}") - continue - if model.model_type == "embedding": - embedding_models.append(model) - - return embedding_models - async def _get_embedding_dimension_for_model(self, model_id: str) -> int | None: """Get embedding dimension for a specific model by looking it up in the models API. @@ -501,9 +488,18 @@ class OpenAIVectorStoreMixin(ABC): Returns: The embedding dimension for the model, or None if not found """ - embedding_models = await self._get_embedding_models() + if not self.models_api: + return None + + models_response = await self.models_api.list_models() + models_list = models_response.data if hasattr(models_response, "data") else models_response + + for model in models_list: + if not isinstance(model, Model): + continue + if model.model_type != "embedding": + continue - for model in embedding_models: # Check for exact match first if model.identifier == model_id: embedding_dimension = model.metadata.get("embedding_dimension") @@ -523,35 +519,23 @@ class OpenAIVectorStoreMixin(ABC): return None async def _get_default_embedding_model_and_dimension(self) -> tuple[str, int] | None: - """Get default embedding model from the models API. + """Get default embedding model from vector stores config. - Looks for embedding models marked with default_configured=True in metadata. - Returns None if no default embedding model is found. - Raises ValueError if multiple defaults are found. + Returns None if no vector stores config is provided. """ - embedding_models = await self._get_embedding_models() + if not self.vector_stores_config: + logger.info("No vector stores config provided") + return None - default_models = [] - for model in embedding_models: - if model.metadata.get("default_configured") is True: - default_models.append(model.identifier) + model_id = self.vector_stores_config.default_embedding_model_id + embedding_dimension = await self._get_embedding_dimension_for_model(model_id) + if embedding_dimension is None: + raise ValueError(f"Embedding model '{model_id}' not found or has no embedding_dimension in metadata") - if len(default_models) > 1: - raise ValueError( - f"Multiple embedding models marked as default_configured=True: {default_models}. " - "Only one embedding model can be marked as default." - ) - - if default_models: - model_id = default_models[0] - embedding_dimension = await self._get_embedding_dimension_for_model(model_id) - if embedding_dimension is None: - raise ValueError(f"Embedding model '{model_id}' has no embedding_dimension in metadata") - logger.info(f"Using default embedding model: {model_id} with dimension {embedding_dimension}") - return model_id, embedding_dimension - - logger.debug("No default embedding models found") - return None + logger.debug( + f"Using default embedding model from vector stores config: {model_id} with dimension {embedding_dimension}" + ) + return model_id, embedding_dimension async def openai_list_vector_stores( self, diff --git a/tests/integration/fixtures/common.py b/tests/integration/fixtures/common.py index 68a30fc69..cd5f49488 100644 --- a/tests/integration/fixtures/common.py +++ b/tests/integration/fixtures/common.py @@ -21,6 +21,7 @@ from llama_stack_client import LlamaStackClient from openai import OpenAI from llama_stack import LlamaStackAsLibraryClient +from llama_stack.core.datatypes import VectorStoresConfig from llama_stack.core.stack import run_config_from_adhoc_config_spec from llama_stack.env import get_env_or_fail @@ -236,6 +237,13 @@ def instantiate_llama_stack_client(session): if "=" in config: run_config = run_config_from_adhoc_config_spec(config) + + # --stack-config bypasses template so need this to set default embedding model + if "vector_io" in config and "inference" in config: + run_config.vector_stores = VectorStoresConfig( + default_embedding_model_id="inline::sentence-transformers/nomic-ai/nomic-embed-text-v1.5" + ) + run_config_file = tempfile.NamedTemporaryFile(delete=False, suffix=".yaml") with open(run_config_file.name, "w") as f: yaml.dump(run_config.model_dump(), f) diff --git a/tests/unit/core/test_stack_validation.py b/tests/unit/core/test_stack_validation.py index 5fc27e199..b5f6c1b24 100644 --- a/tests/unit/core/test_stack_validation.py +++ b/tests/unit/core/test_stack_validation.py @@ -4,90 +4,44 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -""" -Unit tests for Stack validation functions. -""" +"""Unit tests for Stack validation functions.""" from unittest.mock import AsyncMock import pytest from llama_stack.apis.models import Model, ModelType -from llama_stack.core.stack import validate_default_embedding_model +from llama_stack.core.datatypes import StackRunConfig, VectorStoresConfig +from llama_stack.core.stack import validate_vector_stores_config from llama_stack.providers.datatypes import Api -class TestStackValidation: - """Test Stack validation functions.""" +class TestVectorStoresValidation: + async def test_validate_missing_model(self): + """Test validation fails when model not found.""" + run_config = StackRunConfig( + image_name="test", providers={}, vector_stores=VectorStoresConfig(default_embedding_model_id="missing") + ) + mock_models = AsyncMock() + mock_models.list_models.return_value = [] - @pytest.mark.parametrize( - "models,should_raise", - [ - ([], False), # No models - ( - [ - Model( - identifier="emb1", - model_type=ModelType.embedding, - metadata={"default_configured": True}, - provider_id="p", - provider_resource_id="emb1", - ) - ], - False, - ), # Single default - ( - [ - Model( - identifier="emb1", - model_type=ModelType.embedding, - metadata={"default_configured": True}, - provider_id="p", - provider_resource_id="emb1", - ), - Model( - identifier="emb2", - model_type=ModelType.embedding, - metadata={"default_configured": True}, - provider_id="p", - provider_resource_id="emb2", - ), - ], - True, - ), # Multiple defaults - ( - [ - Model( - identifier="emb1", - model_type=ModelType.embedding, - metadata={"default_configured": True}, - provider_id="p", - provider_resource_id="emb1", - ), - Model( - identifier="llm1", - model_type=ModelType.llm, - metadata={"default_configured": True}, - provider_id="p", - provider_resource_id="llm1", - ), - ], - False, - ), # Ignores non-embedding - ], - ) - async def test_validate_default_embedding_model(self, models, should_raise): - """Test validation with various model configurations.""" - mock_models_impl = AsyncMock() - mock_models_impl.list_models.return_value = models - impls = {Api.models: mock_models_impl} + with pytest.raises(ValueError, match="not found"): + await validate_vector_stores_config(run_config, {Api.models: mock_models}) - if should_raise: - with pytest.raises(ValueError, match="Multiple embedding models marked as default_configured=True"): - await validate_default_embedding_model(impls) - else: - await validate_default_embedding_model(impls) + async def test_validate_success(self): + """Test validation passes with valid model.""" + run_config = StackRunConfig( + image_name="test", providers={}, vector_stores=VectorStoresConfig(default_embedding_model_id="valid") + ) + mock_models = AsyncMock() + mock_models.list_models.return_value = [ + Model( + identifier="valid", + model_type=ModelType.embedding, + metadata={"embedding_dimension": 768}, + provider_id="p", + provider_resource_id="valid", + ) + ] - async def test_validate_default_embedding_model_no_models_api(self): - """Test validation when models API is not available.""" - await validate_default_embedding_model({}) + await validate_vector_stores_config(run_config, {Api.models: mock_models}) diff --git a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py index 32d59c91b..a3d2e3173 100644 --- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py +++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py @@ -6,13 +6,12 @@ import json import time -from unittest.mock import AsyncMock, Mock, patch +from unittest.mock import AsyncMock, patch import numpy as np import pytest from llama_stack.apis.common.errors import VectorStoreNotFoundError -from llama_stack.apis.models import Model, ModelType from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import ( Chunk, @@ -996,96 +995,6 @@ async def test_max_concurrent_files_per_batch(vector_io_adapter): assert batch.file_counts.in_progress == 8 -async def test_get_default_embedding_model_success(vector_io_adapter): - """Test successful default embedding model detection.""" - # Mock models API with a default model - mock_models_api = Mock() - mock_models_api.list_models = AsyncMock( - return_value=Mock( - data=[ - Model( - identifier="nomic-embed-text-v1.5", - model_type=ModelType.embedding, - provider_id="test-provider", - metadata={ - "embedding_dimension": 768, - "default_configured": True, - }, - ) - ] - ) - ) - - vector_io_adapter.models_api = mock_models_api - result = await vector_io_adapter._get_default_embedding_model_and_dimension() - - assert result is not None - model_id, dimension = result - assert model_id == "nomic-embed-text-v1.5" - assert dimension == 768 - - -async def test_get_default_embedding_model_multiple_defaults_error(vector_io_adapter): - """Test error when multiple models are marked as default.""" - mock_models_api = Mock() - mock_models_api.list_models = AsyncMock( - return_value=Mock( - data=[ - Model( - identifier="model1", - model_type=ModelType.embedding, - provider_id="test-provider", - metadata={"embedding_dimension": 768, "default_configured": True}, - ), - Model( - identifier="model2", - model_type=ModelType.embedding, - provider_id="test-provider", - metadata={"embedding_dimension": 512, "default_configured": True}, - ), - ] - ) - ) - - vector_io_adapter.models_api = mock_models_api - - with pytest.raises(ValueError, match="Multiple embedding models marked as default_configured=True"): - await vector_io_adapter._get_default_embedding_model_and_dimension() - - -async def test_openai_create_vector_store_uses_default_model(vector_io_adapter): - """Test that vector store creation uses default embedding model when none specified.""" - # Mock models API and dependencies - mock_models_api = Mock() - mock_models_api.list_models = AsyncMock( - return_value=Mock( - data=[ - Model( - identifier="default-model", - model_type=ModelType.embedding, - provider_id="test-provider", - metadata={"embedding_dimension": 512, "default_configured": True}, - ) - ] - ) - ) - - vector_io_adapter.models_api = mock_models_api - vector_io_adapter.register_vector_db = AsyncMock() - vector_io_adapter.__provider_id__ = "test-provider" - - # Create vector store without specifying embedding model - params = OpenAICreateVectorStoreRequestWithExtraBody(name="test-store") - result = await vector_io_adapter.openai_create_vector_store(params) - - # Verify the vector store was created with default model - assert result.name == "test-store" - vector_io_adapter.register_vector_db.assert_called_once() - call_args = vector_io_adapter.register_vector_db.call_args[0][0] - assert call_args.embedding_model == "default-model" - assert call_args.embedding_dimension == 512 - - async def test_embedding_config_from_metadata(vector_io_adapter): """Test that embedding configuration is correctly extracted from metadata."""