chore: Updating how default embedding model is set in stack

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>

# Conflicts:
#	.github/workflows/integration-vector-io-tests.yml
#	llama_stack/distributions/ci-tests/run.yaml
#	llama_stack/distributions/starter-gpu/run.yaml
#	llama_stack/distributions/starter/run.yaml
#	llama_stack/distributions/template.py
#	llama_stack/providers/utils/memory/openai_vector_store_mixin.py
This commit is contained in:
Francisco Javier Arceo 2025-10-15 17:15:43 -04:00
parent cd152f4240
commit 24a1430c8b
32 changed files with 276 additions and 265 deletions

View file

@ -351,6 +351,15 @@ class AuthenticationRequiredError(Exception):
pass
class VectorStoresConfig(BaseModel):
"""Configuration for vector stores in the stack."""
default_embedding_model_id: str = Field(
...,
description="ID of the embedding model to use as default for vector stores when none is specified. Must reference a model defined in the 'models' section.",
)
class QuotaPeriod(StrEnum):
DAY = "day"
@ -526,6 +535,11 @@ If not specified, a default SQLite store will be used.""",
description="Path to directory containing external API implementations. The APIs code and dependencies must be installed on the system.",
)
vector_stores: VectorStoresConfig | None = Field(
default=None,
description="Configuration for vector stores, including default embedding model",
)
@field_validator("external_providers_dir")
@classmethod
def validate_external_providers_dir(cls, v):

View file

@ -409,6 +409,10 @@ async def instantiate_provider(
if "telemetry_enabled" in inspect.signature(getattr(module, method)).parameters and run_config.telemetry:
args.append(run_config.telemetry.enabled)
# vector_io providers need access to run_config.vector_stores
if provider_spec.api == Api.vector_io and "run_config" in inspect.signature(getattr(module, method)).parameters:
args.append(run_config)
fn = getattr(module, method)
impl = await fn(*args)
impl.__provider_id__ = provider.provider_id

View file

@ -98,30 +98,6 @@ REGISTRY_REFRESH_TASK = None
TEST_RECORDING_CONTEXT = None
async def validate_default_embedding_model(impls: dict[Api, Any]):
"""Validate that at most one embedding model is marked as default."""
if Api.models not in impls:
return
models_impl = impls[Api.models]
response = await models_impl.list_models()
models_list = response.data if hasattr(response, "data") else response
default_embedding_models = []
for model in models_list:
if model.model_type == "embedding" and model.metadata.get("default_configured") is True:
default_embedding_models.append(model.identifier)
if len(default_embedding_models) > 1:
raise ValueError(
f"Multiple embedding models marked as default_configured=True: {default_embedding_models}. "
"Only one embedding model can be marked as default."
)
if default_embedding_models:
logger.info(f"Default embedding model configured: {default_embedding_models[0]}")
async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
for rsrc, api, register_method, list_method in RESOURCES:
objects = getattr(run_config, rsrc)
@ -152,7 +128,48 @@ async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
f"{rsrc.capitalize()}: {obj.identifier} served by {obj.provider_id}",
)
await validate_default_embedding_model(impls)
async def validate_vector_stores_config(run_config: StackRunConfig, impls: dict[Api, Any]):
"""Validate vector stores configuration."""
if not run_config.vector_stores:
return
vector_stores_config = run_config.vector_stores
default_model_id = vector_stores_config.default_embedding_model_id
if Api.models not in impls:
raise ValueError(f"Models API is not available but vector_stores config requires model '{default_model_id}'")
models_impl = impls[Api.models]
response = await models_impl.list_models()
models_list = response.data if hasattr(response, "data") else response
# find default embedding model
default_model = None
for model in models_list:
if model.identifier == default_model_id:
default_model = model
break
if not default_model:
available_models = [m.identifier for m in models_list if m.model_type == "embedding"]
raise ValueError(
f"Embedding model '{default_model_id}' not found. Available embedding models: {available_models}"
)
if default_model.model_type != "embedding":
raise ValueError(f"Model '{default_model_id}' is type '{default_model.model_type}', not 'embedding'")
embedding_dimension = default_model.metadata.get("embedding_dimension")
if embedding_dimension is None:
raise ValueError(f"Embedding model '{default_model_id}' is missing 'embedding_dimension' in metadata")
try:
int(embedding_dimension)
except ValueError as err:
raise ValueError(f"Embedding dimension '{embedding_dimension}' cannot be converted to an integer") from err
logger.debug(f"Validated default embedding model: {default_model_id} (dimension: {embedding_dimension})")
class EnvVarError(Exception):
@ -367,8 +384,8 @@ class Stack:
await impls[Api.conversations].initialize()
await register_resources(self.run_config, impls)
await refresh_registry_once(impls)
await validate_vector_stores_config(self.run_config, impls)
self.impls = impls
def create_registry_refresh_task(self):

View file

@ -239,3 +239,5 @@ server:
port: 8321
telemetry:
enabled: true
vector_stores:
default_embedding_model_id: sentence-transformers/nomic-ai/nomic-embed-text-v1.5

View file

@ -240,5 +240,7 @@ tool_groups:
provider_id: rag-runtime
server:
port: 8321
vector_stores:
default_embedding_model_id: sentence-transformers/nomic-ai/nomic-embed-text-v1.5
telemetry:
enabled: true

View file

@ -239,3 +239,5 @@ server:
port: 8321
telemetry:
enabled: true
vector_stores:
default_embedding_model_id: sentence-transformers/nomic-ai/nomic-embed-text-v1.5

View file

@ -13,6 +13,7 @@ from llama_stack.core.datatypes import (
ProviderSpec,
ShieldInput,
ToolGroupInput,
VectorStoresConfig,
)
from llama_stack.core.utils.dynamic import instantiate_class_type
from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings
@ -227,6 +228,9 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
default_models=[],
default_tool_groups=default_tool_groups,
default_shields=default_shields,
vector_stores_config=VectorStoresConfig(
default_embedding_model_id="sentence-transformers/nomic-ai/nomic-embed-text-v1.5"
),
),
},
run_config_env_vars={

View file

@ -27,6 +27,7 @@ from llama_stack.core.datatypes import (
ShieldInput,
TelemetryConfig,
ToolGroupInput,
VectorStoresConfig,
)
from llama_stack.core.distribution import get_provider_registry
from llama_stack.core.utils.dynamic import instantiate_class_type
@ -183,6 +184,7 @@ class RunConfigSettings(BaseModel):
metadata_store: dict | None = None
inference_store: dict | None = None
conversations_store: dict | None = None
vector_stores_config: VectorStoresConfig | None = None
telemetry: TelemetryConfig = Field(default_factory=lambda: TelemetryConfig(enabled=True))
def run_config(
@ -227,7 +229,7 @@ class RunConfigSettings(BaseModel):
apis = sorted(providers.keys())
# Return a dict that matches StackRunConfig structure
return {
config = {
"version": LLAMA_STACK_RUN_CONFIG_VERSION,
"image_name": name,
"container_image": container_image,
@ -261,6 +263,11 @@ class RunConfigSettings(BaseModel):
"telemetry": self.telemetry.model_dump(exclude_none=True) if self.telemetry else None,
}
if self.vector_stores_config:
config["vector_stores"] = self.vector_stores_config.model_dump(exclude_none=True)
return config
class DistributionTemplate(BaseModel):
"""

View file

@ -59,7 +59,6 @@ class SentenceTransformersInferenceImpl(
provider_id=self.__provider_id__,
metadata={
"embedding_dimension": 768,
"default_configured": True,
},
model_type=ModelType.embedding,
),

View file

@ -6,21 +6,29 @@
from typing import Any
from llama_stack.core.datatypes import StackRunConfig
from llama_stack.providers.datatypes import Api
from .config import ChromaVectorIOConfig
async def get_provider_impl(config: ChromaVectorIOConfig, deps: dict[Api, Any]):
async def get_provider_impl(
config: ChromaVectorIOConfig, deps: dict[Api, Any], run_config: StackRunConfig | None = None
):
from llama_stack.providers.remote.vector_io.chroma.chroma import (
ChromaVectorIOAdapter,
)
vector_stores_config = None
if run_config and run_config.vector_stores:
vector_stores_config = run_config.vector_stores
impl = ChromaVectorIOAdapter(
config,
deps[Api.inference],
deps[Api.models],
deps.get(Api.files),
vector_stores_config,
)
await impl.initialize()
return impl

View file

@ -6,21 +6,29 @@
from typing import Any
from llama_stack.core.datatypes import StackRunConfig
from llama_stack.providers.datatypes import Api
from .config import FaissVectorIOConfig
async def get_provider_impl(config: FaissVectorIOConfig, deps: dict[Api, Any]):
async def get_provider_impl(
config: FaissVectorIOConfig, deps: dict[Api, Any], run_config: StackRunConfig | None = None
):
from .faiss import FaissVectorIOAdapter
assert isinstance(config, FaissVectorIOConfig), f"Unexpected config type: {type(config)}"
vector_stores_config = None
if run_config and run_config.vector_stores:
vector_stores_config = run_config.vector_stores
impl = FaissVectorIOAdapter(
config,
deps[Api.inference],
deps[Api.models],
deps.get(Api.files),
vector_stores_config,
)
await impl.initialize()
return impl

View file

@ -24,6 +24,7 @@ from llama_stack.apis.vector_io import (
QueryChunksResponse,
VectorIO,
)
from llama_stack.core.datatypes import VectorStoresConfig
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import (
HealthResponse,
@ -206,11 +207,13 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr
inference_api: Inference,
models_api: Models,
files_api: Files | None,
vector_stores_config: VectorStoresConfig | None = None,
) -> None:
super().__init__(files_api=files_api, kvstore=None)
self.config = config
self.inference_api = inference_api
self.models_api = models_api
self.vector_stores_config = vector_stores_config
self.cache: dict[str, VectorDBWithIndex] = {}
async def initialize(self) -> None:

View file

@ -6,19 +6,27 @@
from typing import Any
from llama_stack.core.datatypes import StackRunConfig
from llama_stack.providers.datatypes import Api
from .config import MilvusVectorIOConfig
async def get_provider_impl(config: MilvusVectorIOConfig, deps: dict[Api, Any]):
async def get_provider_impl(
config: MilvusVectorIOConfig, deps: dict[Api, Any], run_config: StackRunConfig | None = None
):
from llama_stack.providers.remote.vector_io.milvus.milvus import MilvusVectorIOAdapter
vector_stores_config = None
if run_config and run_config.vector_stores:
vector_stores_config = run_config.vector_stores
impl = MilvusVectorIOAdapter(
config,
deps[Api.inference],
deps[Api.models],
deps.get(Api.models),
deps.get(Api.files),
vector_stores_config,
)
await impl.initialize()
return impl

View file

@ -6,20 +6,28 @@
from typing import Any
from llama_stack.core.datatypes import StackRunConfig
from llama_stack.providers.datatypes import Api
from .config import QdrantVectorIOConfig
async def get_provider_impl(config: QdrantVectorIOConfig, deps: dict[Api, Any]):
async def get_provider_impl(
config: QdrantVectorIOConfig, deps: dict[Api, Any], run_config: StackRunConfig | None = None
):
from llama_stack.providers.remote.vector_io.qdrant.qdrant import QdrantVectorIOAdapter
vector_stores_config = None
if run_config and run_config.vector_stores:
vector_stores_config = run_config.vector_stores
assert isinstance(config, QdrantVectorIOConfig), f"Unexpected config type: {type(config)}"
impl = QdrantVectorIOAdapter(
config,
deps[Api.inference],
deps[Api.models],
deps.get(Api.files),
vector_stores_config,
)
await impl.initialize()
return impl

View file

@ -6,20 +6,28 @@
from typing import Any
from llama_stack.core.datatypes import StackRunConfig
from llama_stack.providers.datatypes import Api
from .config import SQLiteVectorIOConfig
async def get_provider_impl(config: SQLiteVectorIOConfig, deps: dict[Api, Any]):
async def get_provider_impl(
config: SQLiteVectorIOConfig, deps: dict[Api, Any], run_config: StackRunConfig | None = None
):
from .sqlite_vec import SQLiteVecVectorIOAdapter
vector_stores_config = None
if run_config and run_config.vector_stores:
vector_stores_config = run_config.vector_stores
assert isinstance(config, SQLiteVectorIOConfig), f"Unexpected config type: {type(config)}"
impl = SQLiteVecVectorIOAdapter(
config,
deps[Api.inference],
deps[Api.models],
deps.get(Api.files),
vector_stores_config,
)
await impl.initialize()
return impl

View file

@ -24,6 +24,7 @@ from llama_stack.apis.vector_io import (
QueryChunksResponse,
VectorIO,
)
from llama_stack.core.datatypes import VectorStoresConfig
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
from llama_stack.providers.utils.kvstore import kvstore_impl
@ -416,11 +417,13 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoc
inference_api: Inference,
models_api: Models,
files_api: Files | None,
vector_stores_config: VectorStoresConfig | None = None,
) -> None:
super().__init__(files_api=files_api, kvstore=None)
self.config = config
self.inference_api = inference_api
self.models_api = models_api
self.vector_stores_config = vector_stores_config
self.cache: dict[str, VectorDBWithIndex] = {}
self.vector_db_store = None

View file

@ -4,19 +4,27 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.core.datatypes import StackRunConfig
from llama_stack.providers.datatypes import Api, ProviderSpec
from .config import ChromaVectorIOConfig
async def get_adapter_impl(config: ChromaVectorIOConfig, deps: dict[Api, ProviderSpec]):
async def get_adapter_impl(
config: ChromaVectorIOConfig, deps: dict[Api, ProviderSpec], run_config: StackRunConfig | None = None
):
from .chroma import ChromaVectorIOAdapter
vector_stores_config = None
if run_config and run_config.vector_stores:
vector_stores_config = run_config.vector_stores
impl = ChromaVectorIOAdapter(
config,
deps[Api.inference],
deps[Api.models],
deps.get(Api.files),
vector_stores_config,
)
await impl.initialize()
return impl

View file

@ -12,15 +12,17 @@ import chromadb
from numpy.typing import NDArray
from llama_stack.apis.files import Files
from llama_stack.apis.inference import InterleavedContent
from llama_stack.apis.inference import Inference, InterleavedContent
from llama_stack.apis.models import Models
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import (
Chunk,
QueryChunksResponse,
VectorIO,
)
from llama_stack.core.datatypes import VectorStoresConfig
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig
from llama_stack.providers.utils.kvstore import kvstore_impl
from llama_stack.providers.utils.kvstore.api import KVStore
@ -137,15 +139,17 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
def __init__(
self,
config: RemoteChromaVectorIOConfig | InlineChromaVectorIOConfig,
inference_api: Api.inference,
models_apis: Api.models,
inference_api: Inference,
models_apis: Models,
files_api: Files | None,
vector_stores_config: VectorStoresConfig | None = None,
) -> None:
super().__init__(files_api=files_api, kvstore=None)
log.info(f"Initializing ChromaVectorIOAdapter with url: {config}")
self.config = config
self.inference_api = inference_api
self.models_api = models_apis
self.vector_stores_config = vector_stores_config
self.client = None
self.cache = {}
self.vector_db_store = None

View file

@ -4,21 +4,28 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.core.datatypes import StackRunConfig
from llama_stack.providers.datatypes import Api, ProviderSpec
from .config import MilvusVectorIOConfig
async def get_adapter_impl(config: MilvusVectorIOConfig, deps: dict[Api, ProviderSpec]):
async def get_adapter_impl(
config: MilvusVectorIOConfig, deps: dict[Api, ProviderSpec], run_config: StackRunConfig | None = None
):
from .milvus import MilvusVectorIOAdapter
assert isinstance(config, MilvusVectorIOConfig), f"Unexpected config type: {type(config)}"
vector_stores_config = None
if run_config and run_config.vector_stores:
vector_stores_config = run_config.vector_stores
assert isinstance(config, MilvusVectorIOConfig), f"Unexpected config type: {type(config)}"
impl = MilvusVectorIOAdapter(
config,
deps[Api.inference],
deps[Api.models],
deps.get(Api.files),
vector_stores_config,
)
await impl.initialize()
return impl

View file

@ -21,6 +21,7 @@ from llama_stack.apis.vector_io import (
QueryChunksResponse,
VectorIO,
)
from llama_stack.core.datatypes import VectorStoresConfig
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
from llama_stack.providers.inline.vector_io.milvus import MilvusVectorIOConfig as InlineMilvusVectorIOConfig
@ -308,8 +309,9 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
self,
config: RemoteMilvusVectorIOConfig | InlineMilvusVectorIOConfig,
inference_api: Inference,
models_api: Models,
models_api: Models | None,
files_api: Files | None,
vector_stores_config: VectorStoresConfig | None = None,
) -> None:
super().__init__(files_api=files_api, kvstore=None)
self.config = config
@ -317,6 +319,7 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
self.client = None
self.inference_api = inference_api
self.models_api = models_api
self.vector_stores_config = vector_stores_config
self.vector_db_store = None
self.metadata_collection_name = "openai_vector_stores_metadata"

View file

@ -4,14 +4,26 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.core.datatypes import StackRunConfig
from llama_stack.providers.datatypes import Api, ProviderSpec
from .config import PGVectorVectorIOConfig
async def get_adapter_impl(config: PGVectorVectorIOConfig, deps: dict[Api, ProviderSpec]):
async def get_adapter_impl(
config: PGVectorVectorIOConfig, deps: dict[Api, ProviderSpec], run_config: StackRunConfig | None = None
):
from .pgvector import PGVectorVectorIOAdapter
impl = PGVectorVectorIOAdapter(config, deps[Api.inference], deps[Api.models], deps.get(Api.files, None))
vector_stores_config = None
if run_config and run_config.vector_stores:
vector_stores_config = run_config.vector_stores
impl = PGVectorVectorIOAdapter(
config,
deps[Api.inference],
deps[Api.models],
deps.get(Api.files),
vector_stores_config,
)
await impl.initialize()
return impl

View file

@ -23,6 +23,7 @@ from llama_stack.apis.vector_io import (
QueryChunksResponse,
VectorIO,
)
from llama_stack.core.datatypes import VectorStoresConfig
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
from llama_stack.providers.utils.inference.prompt_adapter import (
@ -346,11 +347,13 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoco
inference_api: Inference,
models_api: Models,
files_api: Files | None = None,
vector_stores_config: VectorStoresConfig | None = None,
) -> None:
super().__init__(files_api=files_api, kvstore=None)
self.config = config
self.inference_api = inference_api
self.models_api = models_api
self.vector_stores_config = vector_stores_config
self.conn = None
self.cache = {}
self.vector_db_store = None

View file

@ -4,19 +4,27 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.core.datatypes import StackRunConfig
from llama_stack.providers.datatypes import Api, ProviderSpec
from .config import QdrantVectorIOConfig
async def get_adapter_impl(config: QdrantVectorIOConfig, deps: dict[Api, ProviderSpec]):
async def get_adapter_impl(
config: QdrantVectorIOConfig, deps: dict[Api, ProviderSpec], run_config: StackRunConfig | None = None
):
from .qdrant import QdrantVectorIOAdapter
vector_stores_config = None
if run_config and run_config.vector_stores:
vector_stores_config = run_config.vector_stores
impl = QdrantVectorIOAdapter(
config,
deps[Api.inference],
deps[Api.models],
deps.get(Api.files),
vector_stores_config,
)
await impl.initialize()
return impl

View file

@ -25,6 +25,7 @@ from llama_stack.apis.vector_io import (
VectorStoreChunkingStrategy,
VectorStoreFileObject,
)
from llama_stack.core.datatypes import VectorStoresConfig
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
@ -163,6 +164,7 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
inference_api: Inference,
models_api: Models,
files_api: Files | None = None,
vector_stores_config: VectorStoresConfig | None = None,
) -> None:
super().__init__(files_api=files_api, kvstore=None)
self.config = config
@ -170,6 +172,7 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
self.cache = {}
self.inference_api = inference_api
self.models_api = models_api
self.vector_stores_config = vector_stores_config
self.vector_db_store = None
self._qdrant_lock = asyncio.Lock()

View file

@ -4,19 +4,27 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.core.datatypes import StackRunConfig
from llama_stack.providers.datatypes import Api, ProviderSpec
from .config import WeaviateVectorIOConfig
async def get_adapter_impl(config: WeaviateVectorIOConfig, deps: dict[Api, ProviderSpec]):
async def get_adapter_impl(
config: WeaviateVectorIOConfig, deps: dict[Api, ProviderSpec], run_config: StackRunConfig | None = None
):
from .weaviate import WeaviateVectorIOAdapter
vector_stores_config = None
if run_config and run_config.vector_stores:
vector_stores_config = run_config.vector_stores
impl = WeaviateVectorIOAdapter(
config,
deps[Api.inference],
deps[Api.models],
deps.get(Api.files),
vector_stores_config,
)
await impl.initialize()
return impl

View file

@ -19,6 +19,7 @@ from llama_stack.apis.inference import Inference
from llama_stack.apis.models import Models
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
from llama_stack.core.datatypes import VectorStoresConfig
from llama_stack.core.request_headers import NeedsRequestProviderData
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
@ -286,11 +287,13 @@ class WeaviateVectorIOAdapter(
inference_api: Inference,
models_api: Models,
files_api: Files | None,
vector_stores_config: VectorStoresConfig | None = None,
) -> None:
super().__init__(files_api=files_api, kvstore=None)
self.config = config
self.inference_api = inference_api
self.models_api = models_api
self.vector_stores_config = vector_stores_config
self.client_cache = {}
self.cache = {}
self.vector_db_store = None

View file

@ -44,6 +44,7 @@ from llama_stack.apis.vector_io import (
VectorStoreSearchResponse,
VectorStoreSearchResponsePage,
)
from llama_stack.core.datatypes import VectorStoresConfig
from llama_stack.core.id_generation import generate_object_id
from llama_stack.log import get_logger
from llama_stack.providers.utils.kvstore.api import KVStore
@ -81,13 +82,17 @@ class OpenAIVectorStoreMixin(ABC):
# Implementing classes should call super().__init__() in their __init__ method
# to properly initialize the mixin attributes.
def __init__(
self, files_api: Files | None = None, kvstore: KVStore | None = None, models_api: Models | None = None
self,
files_api: Files | None = None,
kvstore: KVStore | None = None,
):
self.openai_vector_stores: dict[str, dict[str, Any]] = {}
self.openai_file_batches: dict[str, dict[str, Any]] = {}
self.files_api = files_api
self.kvstore = kvstore
self.models_api = models_api
# These will be set by implementing classes
self.models_api: Models | None = None
self.vector_stores_config: VectorStoresConfig | None = None
self._last_file_batch_cleanup_time = 0
self._file_batch_tasks: dict[str, asyncio.Task[None]] = {}
@ -474,24 +479,6 @@ class OpenAIVectorStoreMixin(ABC):
store_info = self.openai_vector_stores[vector_db_id]
return VectorStoreObject.model_validate(store_info)
async def _get_embedding_models(self) -> list[Model]:
"""Get list of embedding models from the models API."""
if not self.models_api:
return []
models_response = await self.models_api.list_models()
models_list = models_response.data if hasattr(models_response, "data") else models_response
embedding_models = []
for model in models_list:
if not isinstance(model, Model):
logger.warning(f"Non-Model object found in models list: {type(model)} - {model}")
continue
if model.model_type == "embedding":
embedding_models.append(model)
return embedding_models
async def _get_embedding_dimension_for_model(self, model_id: str) -> int | None:
"""Get embedding dimension for a specific model by looking it up in the models API.
@ -501,9 +488,18 @@ class OpenAIVectorStoreMixin(ABC):
Returns:
The embedding dimension for the model, or None if not found
"""
embedding_models = await self._get_embedding_models()
if not self.models_api:
return None
models_response = await self.models_api.list_models()
models_list = models_response.data if hasattr(models_response, "data") else models_response
for model in models_list:
if not isinstance(model, Model):
continue
if model.model_type != "embedding":
continue
for model in embedding_models:
# Check for exact match first
if model.identifier == model_id:
embedding_dimension = model.metadata.get("embedding_dimension")
@ -523,35 +519,23 @@ class OpenAIVectorStoreMixin(ABC):
return None
async def _get_default_embedding_model_and_dimension(self) -> tuple[str, int] | None:
"""Get default embedding model from the models API.
"""Get default embedding model from vector stores config.
Looks for embedding models marked with default_configured=True in metadata.
Returns None if no default embedding model is found.
Raises ValueError if multiple defaults are found.
Returns None if no vector stores config is provided.
"""
embedding_models = await self._get_embedding_models()
if not self.vector_stores_config:
logger.info("No vector stores config provided")
return None
default_models = []
for model in embedding_models:
if model.metadata.get("default_configured") is True:
default_models.append(model.identifier)
model_id = self.vector_stores_config.default_embedding_model_id
embedding_dimension = await self._get_embedding_dimension_for_model(model_id)
if embedding_dimension is None:
raise ValueError(f"Embedding model '{model_id}' not found or has no embedding_dimension in metadata")
if len(default_models) > 1:
raise ValueError(
f"Multiple embedding models marked as default_configured=True: {default_models}. "
"Only one embedding model can be marked as default."
)
if default_models:
model_id = default_models[0]
embedding_dimension = await self._get_embedding_dimension_for_model(model_id)
if embedding_dimension is None:
raise ValueError(f"Embedding model '{model_id}' has no embedding_dimension in metadata")
logger.info(f"Using default embedding model: {model_id} with dimension {embedding_dimension}")
return model_id, embedding_dimension
logger.debug("No default embedding models found")
return None
logger.debug(
f"Using default embedding model from vector stores config: {model_id} with dimension {embedding_dimension}"
)
return model_id, embedding_dimension
async def openai_list_vector_stores(
self,