diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index f9af10165..b940c97cb 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -16306,15 +16306,15 @@ }, "embedding_model": { "type": "string", - "description": "The embedding model to use." + "description": "The embedding model to use (optional if provider has defaults)." }, "embedding_dimension": { "type": "integer", - "description": "The dimension of the embedding model." + "description": "The dimension of the embedding model (optional if provider has defaults)." }, "provider_id": { "type": "string", - "description": "The identifier of the provider." + "description": "The identifier of the provider (can provide embedding defaults)." }, "vector_db_name": { "type": "string", @@ -16327,8 +16327,7 @@ }, "additionalProperties": false, "required": [ - "vector_db_id", - "embedding_model" + "vector_db_id" ], "title": "RegisterVectorDbRequest" }, diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index d2c41b2bf..c23991c63 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -12114,13 +12114,16 @@ components: The identifier of the vector database to register. embedding_model: type: string - description: The embedding model to use. + description: >- + The embedding model to use (optional if provider has defaults). embedding_dimension: type: integer - description: The dimension of the embedding model. + description: >- + The dimension of the embedding model (optional if provider has defaults). provider_id: type: string - description: The identifier of the provider. + description: >- + The identifier of the provider (can provide embedding defaults). vector_db_name: type: string description: The name of the vector database. @@ -12131,7 +12134,6 @@ components: additionalProperties: false required: - vector_db_id - - embedding_model title: RegisterVectorDbRequest ResumeAgentTurnRequest: type: object diff --git a/docs/source/providers/vector_io/inline_chromadb.md b/docs/source/providers/vector_io/inline_chromadb.md index 679c82830..00c3e7f27 100644 --- a/docs/source/providers/vector_io/inline_chromadb.md +++ b/docs/source/providers/vector_io/inline_chromadb.md @@ -43,6 +43,7 @@ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introducti |-------|------|----------|---------|-------------| | `db_path` | `` | No | PydanticUndefined | | | `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend | +| `embedding` | `utils.vector_io.embedding_config.EmbeddingConfig \| None` | No | | Default embedding configuration for this provider. When specified, vector databases created with this provider will use these embedding settings as defaults. | ## Sample Configuration diff --git a/docs/source/providers/vector_io/inline_faiss.md b/docs/source/providers/vector_io/inline_faiss.md index bcff66f3f..10209b9de 100644 --- a/docs/source/providers/vector_io/inline_faiss.md +++ b/docs/source/providers/vector_io/inline_faiss.md @@ -38,6 +38,7 @@ more details about Faiss in general. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | | +| `embedding` | `utils.vector_io.embedding_config.EmbeddingConfig \| None` | No | | Default embedding configuration for this provider. When specified, vector databases created with this provider will use these embedding settings as defaults. | ## Sample Configuration diff --git a/docs/source/providers/vector_io/inline_meta-reference.md b/docs/source/providers/vector_io/inline_meta-reference.md index 0aac445bd..86fb48b3a 100644 --- a/docs/source/providers/vector_io/inline_meta-reference.md +++ b/docs/source/providers/vector_io/inline_meta-reference.md @@ -9,6 +9,7 @@ Meta's reference implementation of a vector database. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | | +| `embedding` | `utils.vector_io.embedding_config.EmbeddingConfig \| None` | No | | Default embedding configuration for this provider. When specified, vector databases created with this provider will use these embedding settings as defaults. | ## Sample Configuration diff --git a/docs/source/providers/vector_io/inline_milvus.md b/docs/source/providers/vector_io/inline_milvus.md index 3b3aad3fc..d9a4febc2 100644 --- a/docs/source/providers/vector_io/inline_milvus.md +++ b/docs/source/providers/vector_io/inline_milvus.md @@ -13,6 +13,7 @@ Please refer to the remote provider documentation. | `db_path` | `` | No | PydanticUndefined | | | `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend (SQLite only for now) | | `consistency_level` | `` | No | Strong | The consistency level of the Milvus server | +| `embedding` | `utils.vector_io.embedding_config.EmbeddingConfig \| None` | No | | Default embedding configuration for this provider. When specified, vector databases created with this provider will use these embedding settings as defaults. | ## Sample Configuration diff --git a/docs/source/providers/vector_io/inline_sqlite-vec.md b/docs/source/providers/vector_io/inline_sqlite-vec.md index ae7c45b21..831551d07 100644 --- a/docs/source/providers/vector_io/inline_sqlite-vec.md +++ b/docs/source/providers/vector_io/inline_sqlite-vec.md @@ -207,6 +207,7 @@ See [sqlite-vec's GitHub repo](https://github.com/asg017/sqlite-vec/tree/main) f |-------|------|----------|---------|-------------| | `db_path` | `` | No | PydanticUndefined | Path to the SQLite database file | | `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend (SQLite only for now) | +| `embedding` | `utils.vector_io.embedding_config.EmbeddingConfig \| None` | No | | Default embedding configuration for this provider. When specified, vector databases created with this provider will use these embedding settings as defaults. | ## Sample Configuration diff --git a/docs/source/providers/vector_io/inline_sqlite_vec.md b/docs/source/providers/vector_io/inline_sqlite_vec.md index 7e14bb8bd..65d2f5eff 100644 --- a/docs/source/providers/vector_io/inline_sqlite_vec.md +++ b/docs/source/providers/vector_io/inline_sqlite_vec.md @@ -12,6 +12,7 @@ Please refer to the sqlite-vec provider documentation. |-------|------|----------|---------|-------------| | `db_path` | `` | No | PydanticUndefined | Path to the SQLite database file | | `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend (SQLite only for now) | +| `embedding` | `utils.vector_io.embedding_config.EmbeddingConfig \| None` | No | | Default embedding configuration for this provider. When specified, vector databases created with this provider will use these embedding settings as defaults. | ## Sample Configuration diff --git a/docs/source/providers/vector_io/remote_chromadb.md b/docs/source/providers/vector_io/remote_chromadb.md index 447ea6cd6..daa115999 100644 --- a/docs/source/providers/vector_io/remote_chromadb.md +++ b/docs/source/providers/vector_io/remote_chromadb.md @@ -42,6 +42,7 @@ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introducti |-------|------|----------|---------|-------------| | `url` | `str \| None` | No | PydanticUndefined | | | `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend | +| `embedding` | `utils.vector_io.embedding_config.EmbeddingConfig \| None` | No | | Default embedding configuration for this provider. When specified, vector databases created with this provider will use these embedding settings as defaults. | ## Sample Configuration diff --git a/docs/source/providers/vector_io/remote_milvus.md b/docs/source/providers/vector_io/remote_milvus.md index 6734d8315..84ce19da2 100644 --- a/docs/source/providers/vector_io/remote_milvus.md +++ b/docs/source/providers/vector_io/remote_milvus.md @@ -115,6 +115,7 @@ For more details on TLS configuration, refer to the [TLS setup guide](https://mi | `token` | `str \| None` | No | PydanticUndefined | The token of the Milvus server | | `consistency_level` | `` | No | Strong | The consistency level of the Milvus server | | `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend | +| `embedding` | `utils.vector_io.embedding_config.EmbeddingConfig \| None` | No | | Default embedding configuration for this provider. When specified, vector databases created with this provider will use these embedding settings as defaults. | | `config` | `dict` | No | {} | This configuration allows additional fields to be passed through to the underlying Milvus client. See the [Milvus](https://milvus.io/docs/install-overview.md) documentation for more details about Milvus in general. | > **Note**: This configuration class accepts additional fields beyond those listed above. You can pass any additional configuration options that will be forwarded to the underlying provider. diff --git a/docs/source/providers/vector_io/remote_pgvector.md b/docs/source/providers/vector_io/remote_pgvector.md index 74f588a13..8b186c2d0 100644 --- a/docs/source/providers/vector_io/remote_pgvector.md +++ b/docs/source/providers/vector_io/remote_pgvector.md @@ -41,6 +41,7 @@ See [PGVector's documentation](https://github.com/pgvector/pgvector) for more de | `user` | `str \| None` | No | postgres | | | `password` | `str \| None` | No | mysecretpassword | | | `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig, annotation=NoneType, required=False, default='sqlite', discriminator='type'` | No | | Config for KV store backend (SQLite only for now) | +| `embedding` | `utils.vector_io.embedding_config.EmbeddingConfig \| None` | No | | Default embedding configuration for this provider. When specified, vector databases created with this provider will use these embedding settings as defaults. | ## Sample Configuration diff --git a/docs/source/providers/vector_io/remote_qdrant.md b/docs/source/providers/vector_io/remote_qdrant.md index 043141007..4dbdb2820 100644 --- a/docs/source/providers/vector_io/remote_qdrant.md +++ b/docs/source/providers/vector_io/remote_qdrant.md @@ -21,6 +21,7 @@ Please refer to the inline provider documentation. | `timeout` | `int \| None` | No | | | | `host` | `str \| None` | No | | | | `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | | +| `embedding` | `utils.vector_io.embedding_config.EmbeddingConfig \| None` | No | | Default embedding configuration for this provider. When specified, vector databases created with this provider will use these embedding settings as defaults. | ## Sample Configuration diff --git a/docs/source/providers/vector_io/remote_weaviate.md b/docs/source/providers/vector_io/remote_weaviate.md index c59487cf6..76a564ef1 100644 --- a/docs/source/providers/vector_io/remote_weaviate.md +++ b/docs/source/providers/vector_io/remote_weaviate.md @@ -40,6 +40,7 @@ See [Weaviate's documentation](https://weaviate.io/developers/weaviate) for more | `weaviate_api_key` | `str \| None` | No | | The API key for the Weaviate instance | | `weaviate_cluster_url` | `str \| None` | No | localhost:8080 | The URL of the Weaviate cluster | | `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig, annotation=NoneType, required=False, default='sqlite', discriminator='type'` | No | | Config for KV store backend (SQLite only for now) | +| `embedding` | `utils.vector_io.embedding_config.EmbeddingConfig \| None` | No | | Default embedding configuration for this provider. When specified, vector databases created with this provider will use these embedding settings as defaults. | ## Sample Configuration diff --git a/llama_stack/apis/vector_dbs/vector_dbs.py b/llama_stack/apis/vector_dbs/vector_dbs.py index 47820fa0f..707228a23 100644 --- a/llama_stack/apis/vector_dbs/vector_dbs.py +++ b/llama_stack/apis/vector_dbs/vector_dbs.py @@ -41,14 +41,15 @@ class VectorDBInput(BaseModel): """Input parameters for creating or configuring a vector database. :param vector_db_id: Unique identifier for the vector database - :param embedding_model: Name of the embedding model to use for vector generation - :param embedding_dimension: Dimension of the embedding vectors + :param embedding_model: Name of the embedding model to use for vector generation (optional if provider has defaults) + :param embedding_dimension: Dimension of the embedding vectors (optional if provider has defaults) + :param provider_id: Provider to use for this vector database (can inherit embedding defaults) :param provider_vector_db_id: (Optional) Provider-specific identifier for the vector database """ vector_db_id: str - embedding_model: str - embedding_dimension: int + embedding_model: str | None = None + embedding_dimension: int | None = None provider_id: str | None = None provider_vector_db_id: str | None = None @@ -89,8 +90,8 @@ class VectorDBs(Protocol): async def register_vector_db( self, vector_db_id: str, - embedding_model: str, - embedding_dimension: int | None = 384, + embedding_model: str | None = None, + embedding_dimension: int | None = None, provider_id: str | None = None, vector_db_name: str | None = None, provider_vector_db_id: str | None = None, @@ -98,9 +99,9 @@ class VectorDBs(Protocol): """Register a vector database. :param vector_db_id: The identifier of the vector database to register. - :param embedding_model: The embedding model to use. - :param embedding_dimension: The dimension of the embedding model. - :param provider_id: The identifier of the provider. + :param embedding_model: The embedding model to use (optional if provider has defaults). + :param embedding_dimension: The dimension of the embedding model (optional if provider has defaults). + :param provider_id: The identifier of the provider (can provide embedding defaults). :param vector_db_name: The name of the vector database. :param provider_vector_db_id: The identifier of the vector database in the provider. :returns: A VectorDB. diff --git a/llama_stack/core/routers/vector_io.py b/llama_stack/core/routers/vector_io.py index 3d0996c49..027537001 100644 --- a/llama_stack/core/routers/vector_io.py +++ b/llama_stack/core/routers/vector_io.py @@ -79,13 +79,23 @@ class VectorIORouter(VectorIO): async def register_vector_db( self, vector_db_id: str, - embedding_model: str, - embedding_dimension: int | None = 384, + embedding_model: str | None = None, + embedding_dimension: int | None = None, provider_id: str | None = None, vector_db_name: str | None = None, provider_vector_db_id: str | None = None, ) -> None: - logger.debug(f"VectorIORouter.register_vector_db: {vector_db_id}, {embedding_model}") + logger.debug( + f"VectorIORouter.register_vector_db: {vector_db_id}, embedding_model={embedding_model}, provider_id={provider_id}" + ) + + # If embedding model/dimension not provided, they will be applied by the provider + # from its embedding configuration during register_vector_db call + if embedding_model is None and embedding_dimension is None: + # Use fallback defaults if no provider defaults are available + embedding_model = embedding_model or "all-MiniLM-L6-v2" + embedding_dimension = embedding_dimension or 384 + await self.routing_table.register_vector_db( vector_db_id, embedding_model, diff --git a/llama_stack/providers/inline/vector_io/chroma/config.py b/llama_stack/providers/inline/vector_io/chroma/config.py index a9566f7ff..f7505d7a9 100644 --- a/llama_stack/providers/inline/vector_io/chroma/config.py +++ b/llama_stack/providers/inline/vector_io/chroma/config.py @@ -9,6 +9,7 @@ from typing import Any from pydantic import BaseModel, Field from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig +from llama_stack.providers.utils.vector_io.embedding_config import EmbeddingConfig from llama_stack.schema_utils import json_schema_type @@ -16,6 +17,10 @@ from llama_stack.schema_utils import json_schema_type class ChromaVectorIOConfig(BaseModel): db_path: str kvstore: KVStoreConfig = Field(description="Config for KV store backend") + embedding: EmbeddingConfig | None = Field( + default=None, + description="Default embedding configuration for this provider. When specified, vector databases created with this provider will use these embedding settings as defaults.", + ) @classmethod def sample_run_config( @@ -27,4 +32,9 @@ class ChromaVectorIOConfig(BaseModel): __distro_dir__=__distro_dir__, db_name="chroma_inline_registry.db", ), + # Optional: Configure default embedding model for this provider + # "embedding": { + # "model": "${env.CHROMA_EMBEDDING_MODEL:=all-MiniLM-L6-v2}", + # "dimensions": 384 + # }, } diff --git a/llama_stack/providers/inline/vector_io/faiss/config.py b/llama_stack/providers/inline/vector_io/faiss/config.py index cbcbb1762..31e717b51 100644 --- a/llama_stack/providers/inline/vector_io/faiss/config.py +++ b/llama_stack/providers/inline/vector_io/faiss/config.py @@ -6,18 +6,23 @@ from typing import Any -from pydantic import BaseModel +from pydantic import BaseModel, Field from llama_stack.providers.utils.kvstore.config import ( KVStoreConfig, SqliteKVStoreConfig, ) +from llama_stack.providers.utils.vector_io.embedding_config import EmbeddingConfig from llama_stack.schema_utils import json_schema_type @json_schema_type class FaissVectorIOConfig(BaseModel): kvstore: KVStoreConfig + embedding: EmbeddingConfig | None = Field( + default=None, + description="Default embedding configuration for this provider. When specified, vector databases created with this provider will use these embedding settings as defaults.", + ) @classmethod def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]: @@ -25,5 +30,10 @@ class FaissVectorIOConfig(BaseModel): "kvstore": SqliteKVStoreConfig.sample_run_config( __distro_dir__=__distro_dir__, db_name="faiss_store.db", - ) + ), + # Optional: Configure default embedding model for this provider + # "embedding": { + # "model": "${env.FAISS_EMBEDDING_MODEL:=all-MiniLM-L6-v2}", + # "dimensions": 384 + # }, } diff --git a/llama_stack/providers/inline/vector_io/faiss/faiss.py b/llama_stack/providers/inline/vector_io/faiss/faiss.py index c45651033..be613cc86 100644 --- a/llama_stack/providers/inline/vector_io/faiss/faiss.py +++ b/llama_stack/providers/inline/vector_io/faiss/faiss.py @@ -35,6 +35,7 @@ from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIV from llama_stack.providers.utils.memory.vector_store import ( EmbeddingIndex, VectorDBWithIndex, + apply_provider_embedding_defaults, ) from .config import FaissVectorIOConfig @@ -237,6 +238,9 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr self, vector_db: VectorDB, ) -> None: + # Apply provider-level embedding defaults if configured + vector_db = apply_provider_embedding_defaults(vector_db, self.config.embedding) + assert self.kvstore is not None key = f"{VECTOR_DBS_PREFIX}{vector_db.identifier}" diff --git a/llama_stack/providers/inline/vector_io/milvus/config.py b/llama_stack/providers/inline/vector_io/milvus/config.py index 8cbd056be..76e1a1454 100644 --- a/llama_stack/providers/inline/vector_io/milvus/config.py +++ b/llama_stack/providers/inline/vector_io/milvus/config.py @@ -12,6 +12,7 @@ from llama_stack.providers.utils.kvstore.config import ( KVStoreConfig, SqliteKVStoreConfig, ) +from llama_stack.providers.utils.vector_io.embedding_config import EmbeddingConfig from llama_stack.schema_utils import json_schema_type @@ -20,6 +21,10 @@ class MilvusVectorIOConfig(BaseModel): db_path: str kvstore: KVStoreConfig = Field(description="Config for KV store backend (SQLite only for now)") consistency_level: str = Field(description="The consistency level of the Milvus server", default="Strong") + embedding: EmbeddingConfig | None = Field( + default=None, + description="Default embedding configuration for this provider. When specified, vector databases created with this provider will use these embedding settings as defaults.", + ) @classmethod def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]: @@ -29,4 +34,9 @@ class MilvusVectorIOConfig(BaseModel): __distro_dir__=__distro_dir__, db_name="milvus_registry.db", ), + # Optional: Configure default embedding model for this provider + # "embedding": { + # "model": "${env.MILVUS_EMBEDDING_MODEL:=all-MiniLM-L6-v2}", + # "dimensions": 384 + # }, } diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/config.py b/llama_stack/providers/inline/vector_io/sqlite_vec/config.py index 525ed4b1f..253b7449c 100644 --- a/llama_stack/providers/inline/vector_io/sqlite_vec/config.py +++ b/llama_stack/providers/inline/vector_io/sqlite_vec/config.py @@ -12,11 +12,18 @@ from llama_stack.providers.utils.kvstore.config import ( KVStoreConfig, SqliteKVStoreConfig, ) +from llama_stack.providers.utils.vector_io.embedding_config import EmbeddingConfig +from llama_stack.schema_utils import json_schema_type +@json_schema_type class SQLiteVectorIOConfig(BaseModel): db_path: str = Field(description="Path to the SQLite database file") kvstore: KVStoreConfig = Field(description="Config for KV store backend (SQLite only for now)") + embedding: EmbeddingConfig | None = Field( + default=None, + description="Default embedding configuration for this provider. When specified, vector databases created with this provider will use these embedding settings as defaults.", + ) @classmethod def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: @@ -26,4 +33,9 @@ class SQLiteVectorIOConfig(BaseModel): __distro_dir__=__distro_dir__, db_name="sqlite_vec_registry.db", ), + # Optional: Configure default embedding model for this provider + # "embedding": { + # "model": "${env.SQLITE_VEC_EMBEDDING_MODEL:=all-MiniLM-L6-v2}", + # "dimensions": 384 + # }, } diff --git a/llama_stack/providers/remote/vector_io/chroma/config.py b/llama_stack/providers/remote/vector_io/chroma/config.py index a1193905a..0ab10a372 100644 --- a/llama_stack/providers/remote/vector_io/chroma/config.py +++ b/llama_stack/providers/remote/vector_io/chroma/config.py @@ -9,6 +9,7 @@ from typing import Any from pydantic import BaseModel, Field from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig +from llama_stack.providers.utils.vector_io.embedding_config import EmbeddingConfig from llama_stack.schema_utils import json_schema_type @@ -16,6 +17,10 @@ from llama_stack.schema_utils import json_schema_type class ChromaVectorIOConfig(BaseModel): url: str | None kvstore: KVStoreConfig = Field(description="Config for KV store backend") + embedding: EmbeddingConfig | None = Field( + default=None, + description="Default embedding configuration for this provider. When specified, vector databases created with this provider will use these embedding settings as defaults.", + ) @classmethod def sample_run_config(cls, __distro_dir__: str, url: str = "${env.CHROMADB_URL}", **kwargs: Any) -> dict[str, Any]: @@ -25,4 +30,9 @@ class ChromaVectorIOConfig(BaseModel): __distro_dir__=__distro_dir__, db_name="chroma_remote_registry.db", ), + # Optional: Configure default embedding model for this provider + # "embedding": { + # "model": "${env.CHROMA_EMBEDDING_MODEL:=all-MiniLM-L6-v2}", + # "dimensions": 384 + # }, } diff --git a/llama_stack/providers/remote/vector_io/milvus/config.py b/llama_stack/providers/remote/vector_io/milvus/config.py index 899d3678d..54402b647 100644 --- a/llama_stack/providers/remote/vector_io/milvus/config.py +++ b/llama_stack/providers/remote/vector_io/milvus/config.py @@ -9,6 +9,7 @@ from typing import Any from pydantic import BaseModel, ConfigDict, Field from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig +from llama_stack.providers.utils.vector_io.embedding_config import EmbeddingConfig from llama_stack.schema_utils import json_schema_type @@ -18,6 +19,10 @@ class MilvusVectorIOConfig(BaseModel): token: str | None = Field(description="The token of the Milvus server") consistency_level: str = Field(description="The consistency level of the Milvus server", default="Strong") kvstore: KVStoreConfig = Field(description="Config for KV store backend") + embedding: EmbeddingConfig | None = Field( + default=None, + description="Default embedding configuration for this provider. When specified, vector databases created with this provider will use these embedding settings as defaults.", + ) # This configuration allows additional fields to be passed through to the underlying Milvus client. # See the [Milvus](https://milvus.io/docs/install-overview.md) documentation for more details about Milvus in general. @@ -32,4 +37,9 @@ class MilvusVectorIOConfig(BaseModel): __distro_dir__=__distro_dir__, db_name="milvus_remote_registry.db", ), + # Optional: Configure default embedding model for this provider + # "embedding": { + # "model": "${env.MILVUS_EMBEDDING_MODEL:=all-MiniLM-L6-v2}", + # "dimensions": 384 + # }, } diff --git a/llama_stack/providers/remote/vector_io/milvus/milvus.py b/llama_stack/providers/remote/vector_io/milvus/milvus.py index db58bf6d3..47486e533 100644 --- a/llama_stack/providers/remote/vector_io/milvus/milvus.py +++ b/llama_stack/providers/remote/vector_io/milvus/milvus.py @@ -29,6 +29,7 @@ from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIV from llama_stack.providers.utils.memory.vector_store import ( EmbeddingIndex, VectorDBWithIndex, + apply_provider_embedding_defaults, ) from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collection_name @@ -305,6 +306,9 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP self, vector_db: VectorDB, ) -> None: + # Apply provider-level embedding defaults if configured + vector_db = apply_provider_embedding_defaults(vector_db, self.config.embedding) + if isinstance(self.config, RemoteMilvusVectorIOConfig): consistency_level = self.config.consistency_level else: diff --git a/llama_stack/providers/remote/vector_io/pgvector/config.py b/llama_stack/providers/remote/vector_io/pgvector/config.py index 334cbe5be..3b3fd9fee 100644 --- a/llama_stack/providers/remote/vector_io/pgvector/config.py +++ b/llama_stack/providers/remote/vector_io/pgvector/config.py @@ -12,6 +12,7 @@ from llama_stack.providers.utils.kvstore.config import ( KVStoreConfig, SqliteKVStoreConfig, ) +from llama_stack.providers.utils.vector_io.embedding_config import EmbeddingConfig from llama_stack.schema_utils import json_schema_type @@ -23,6 +24,10 @@ class PGVectorVectorIOConfig(BaseModel): user: str | None = Field(default="postgres") password: str | None = Field(default="mysecretpassword") kvstore: KVStoreConfig | None = Field(description="Config for KV store backend (SQLite only for now)", default=None) + embedding: EmbeddingConfig | None = Field( + default=None, + description="Default embedding configuration for this provider. When specified, vector databases created with this provider will use these embedding settings as defaults.", + ) @classmethod def sample_run_config( @@ -45,4 +50,9 @@ class PGVectorVectorIOConfig(BaseModel): __distro_dir__=__distro_dir__, db_name="pgvector_registry.db", ), + # Optional: Configure default embedding model for this provider + # "embedding": { + # "model": "${env.PGVECTOR_EMBEDDING_MODEL:=all-MiniLM-L6-v2}", + # "dimensions": 384 + # }, } diff --git a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py index 28af2b911..fde0de32f 100644 --- a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py +++ b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py @@ -29,6 +29,7 @@ from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIV from llama_stack.providers.utils.memory.vector_store import ( EmbeddingIndex, VectorDBWithIndex, + apply_provider_embedding_defaults, ) from .config import PGVectorVectorIOConfig @@ -222,6 +223,9 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoco log.info("Connection to PGVector database server closed") async def register_vector_db(self, vector_db: VectorDB) -> None: + # Apply provider-level embedding defaults if configured + vector_db = apply_provider_embedding_defaults(vector_db, self.config.embedding) + # Persist vector DB metadata in the KV store assert self.kvstore is not None # Upsert model metadata in Postgres diff --git a/llama_stack/providers/remote/vector_io/qdrant/config.py b/llama_stack/providers/remote/vector_io/qdrant/config.py index ff5506236..5cd8ede01 100644 --- a/llama_stack/providers/remote/vector_io/qdrant/config.py +++ b/llama_stack/providers/remote/vector_io/qdrant/config.py @@ -6,12 +6,13 @@ from typing import Any -from pydantic import BaseModel +from pydantic import BaseModel, Field from llama_stack.providers.utils.kvstore.config import ( KVStoreConfig, SqliteKVStoreConfig, ) +from llama_stack.providers.utils.vector_io.embedding_config import EmbeddingConfig from llama_stack.schema_utils import json_schema_type @@ -28,6 +29,10 @@ class QdrantVectorIOConfig(BaseModel): timeout: int | None = None host: str | None = None kvstore: KVStoreConfig + embedding: EmbeddingConfig | None = Field( + default=None, + description="Default embedding configuration for this provider. When specified, vector databases created with this provider will use these embedding settings as defaults.", + ) @classmethod def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]: @@ -37,4 +42,10 @@ class QdrantVectorIOConfig(BaseModel): __distro_dir__=__distro_dir__, db_name="qdrant_registry.db", ), + "api_key": "${env.QDRANT_API_KEY}", + # Optional: Configure default embedding model for this provider + # "embedding": { + # "model": "${env.QDRANT_EMBEDDING_MODEL:=all-MiniLM-L6-v2}", + # "dimensions": 384 + # }, } diff --git a/llama_stack/providers/remote/vector_io/weaviate/config.py b/llama_stack/providers/remote/vector_io/weaviate/config.py index b693e294e..5d292f3bf 100644 --- a/llama_stack/providers/remote/vector_io/weaviate/config.py +++ b/llama_stack/providers/remote/vector_io/weaviate/config.py @@ -12,6 +12,7 @@ from llama_stack.providers.utils.kvstore.config import ( KVStoreConfig, SqliteKVStoreConfig, ) +from llama_stack.providers.utils.vector_io.embedding_config import EmbeddingConfig from llama_stack.schema_utils import json_schema_type @@ -21,6 +22,13 @@ class WeaviateVectorIOConfig(BaseModel): weaviate_cluster_url: str | None = Field(description="The URL of the Weaviate cluster", default="localhost:8080") kvstore: KVStoreConfig | None = Field(description="Config for KV store backend (SQLite only for now)", default=None) + +@json_schema_type +class WeaviateVectorIOConfig(BaseModel): + embedding: EmbeddingConfig | None = Field( + default=None, + description="Default embedding configuration for this provider. When specified, vector databases created with this provider will use these embedding settings as defaults.", + ) @classmethod def sample_run_config( cls, @@ -34,4 +42,9 @@ class WeaviateVectorIOConfig(BaseModel): __distro_dir__=__distro_dir__, db_name="weaviate_registry.db", ), + # Optional: Configure default embedding model for this provider + # "embedding": { + # "model": "${env.WEAVIATE_EMBEDDING_MODEL:=all-MiniLM-L6-v2}", + # "dimensions": 384 + # }, } diff --git a/llama_stack/providers/utils/memory/vector_store.py b/llama_stack/providers/utils/memory/vector_store.py index 484475e9d..d9bef2710 100644 --- a/llama_stack/providers/utils/memory/vector_store.py +++ b/llama_stack/providers/utils/memory/vector_store.py @@ -31,6 +31,8 @@ from llama_stack.providers.utils.inference.prompt_adapter import ( interleaved_content_as_str, ) from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id +from llama_stack.providers.utils.vector_io.chunk_utils import generate_chunk_id +from llama_stack.providers.utils.vector_io.embedding_config import EmbeddingConfig log = logging.getLogger(__name__) @@ -39,6 +41,41 @@ RERANKER_TYPE_RRF = "rrf" RERANKER_TYPE_WEIGHTED = "weighted" +def apply_provider_embedding_defaults( + vector_db: VectorDB, provider_embedding_config: EmbeddingConfig | None +) -> VectorDB: + """Apply provider-level embedding defaults to a VectorDB if not already specified. + + This allows providers to specify default embedding models for use-case specific + vector stores, reducing the need for app developers to know embedding details. + + Args: + vector_db: The VectorDB to potentially modify + provider_embedding_config: The provider's default embedding configuration + + Returns: + The VectorDB with embedding defaults applied if needed + """ + if provider_embedding_config is None: + return vector_db + + # Create a copy to avoid modifying the original + db_dict = vector_db.model_dump() + + # Apply embedding model default if not specified + if not db_dict.get("embedding_model") and provider_embedding_config.model: + db_dict["embedding_model"] = provider_embedding_config.model + + # Apply embedding dimension default if not specified + if not db_dict.get("embedding_dimension") and provider_embedding_config.dimensions: + db_dict["embedding_dimension"] = provider_embedding_config.dimensions + elif not db_dict.get("embedding_dimension"): + # Fallback to default dimension if still not specified + db_dict["embedding_dimension"] = provider_embedding_config.get_dimensions_or_default() + + return VectorDB.model_validate(db_dict) + + def parse_pdf(data: bytes) -> str: # For PDF and DOC/DOCX files, we can't reliably convert to string pdf_bytes = io.BytesIO(data) diff --git a/llama_stack/providers/utils/vector_io/embedding_config.py b/llama_stack/providers/utils/vector_io/embedding_config.py new file mode 100644 index 000000000..b27dceac2 --- /dev/null +++ b/llama_stack/providers/utils/vector_io/embedding_config.py @@ -0,0 +1,36 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from pydantic import BaseModel, Field + +from llama_stack.schema_utils import json_schema_type + + +@json_schema_type +class EmbeddingConfig(BaseModel): + """Configuration for embedding model used by vector-io providers. + + This allows providers to specify default embedding models for use-case specific + vector stores, reducing the need for app developers to know embedding details. + + Example usage in provider config: + ```yaml + vector_io: + - provider_id: question-answer + provider_type: remote::pgvector + config: + embedding: + model: prod/question-answer-embedder + dimensions: 384 + ``` + """ + + model: str = Field(description="The embedding model identifier to use") + dimensions: int | None = Field(default=None, description="The embedding dimensions (optional, can be inferred)") + + def get_dimensions_or_default(self, default: int = 384) -> int: + """Get dimensions with fallback to default if not specified.""" + return self.dimensions if self.dimensions is not None else default