diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index f9af10165..b940c97cb 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -16306,15 +16306,15 @@
},
"embedding_model": {
"type": "string",
- "description": "The embedding model to use."
+ "description": "The embedding model to use (optional if provider has defaults)."
},
"embedding_dimension": {
"type": "integer",
- "description": "The dimension of the embedding model."
+ "description": "The dimension of the embedding model (optional if provider has defaults)."
},
"provider_id": {
"type": "string",
- "description": "The identifier of the provider."
+ "description": "The identifier of the provider (can provide embedding defaults)."
},
"vector_db_name": {
"type": "string",
@@ -16327,8 +16327,7 @@
},
"additionalProperties": false,
"required": [
- "vector_db_id",
- "embedding_model"
+ "vector_db_id"
],
"title": "RegisterVectorDbRequest"
},
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index d2c41b2bf..c23991c63 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -12114,13 +12114,16 @@ components:
The identifier of the vector database to register.
embedding_model:
type: string
- description: The embedding model to use.
+ description: >-
+ The embedding model to use (optional if provider has defaults).
embedding_dimension:
type: integer
- description: The dimension of the embedding model.
+ description: >-
+ The dimension of the embedding model (optional if provider has defaults).
provider_id:
type: string
- description: The identifier of the provider.
+ description: >-
+ The identifier of the provider (can provide embedding defaults).
vector_db_name:
type: string
description: The name of the vector database.
@@ -12131,7 +12134,6 @@ components:
additionalProperties: false
required:
- vector_db_id
- - embedding_model
title: RegisterVectorDbRequest
ResumeAgentTurnRequest:
type: object
diff --git a/docs/source/providers/vector_io/inline_chromadb.md b/docs/source/providers/vector_io/inline_chromadb.md
index 679c82830..00c3e7f27 100644
--- a/docs/source/providers/vector_io/inline_chromadb.md
+++ b/docs/source/providers/vector_io/inline_chromadb.md
@@ -43,6 +43,7 @@ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introducti
|-------|------|----------|---------|-------------|
| `db_path` | `` | No | PydanticUndefined | |
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend |
+| `embedding` | `utils.vector_io.embedding_config.EmbeddingConfig \| None` | No | | Default embedding configuration for this provider. When specified, vector databases created with this provider will use these embedding settings as defaults. |
## Sample Configuration
diff --git a/docs/source/providers/vector_io/inline_faiss.md b/docs/source/providers/vector_io/inline_faiss.md
index bcff66f3f..10209b9de 100644
--- a/docs/source/providers/vector_io/inline_faiss.md
+++ b/docs/source/providers/vector_io/inline_faiss.md
@@ -38,6 +38,7 @@ more details about Faiss in general.
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | |
+| `embedding` | `utils.vector_io.embedding_config.EmbeddingConfig \| None` | No | | Default embedding configuration for this provider. When specified, vector databases created with this provider will use these embedding settings as defaults. |
## Sample Configuration
diff --git a/docs/source/providers/vector_io/inline_meta-reference.md b/docs/source/providers/vector_io/inline_meta-reference.md
index 0aac445bd..86fb48b3a 100644
--- a/docs/source/providers/vector_io/inline_meta-reference.md
+++ b/docs/source/providers/vector_io/inline_meta-reference.md
@@ -9,6 +9,7 @@ Meta's reference implementation of a vector database.
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | |
+| `embedding` | `utils.vector_io.embedding_config.EmbeddingConfig \| None` | No | | Default embedding configuration for this provider. When specified, vector databases created with this provider will use these embedding settings as defaults. |
## Sample Configuration
diff --git a/docs/source/providers/vector_io/inline_milvus.md b/docs/source/providers/vector_io/inline_milvus.md
index 3b3aad3fc..d9a4febc2 100644
--- a/docs/source/providers/vector_io/inline_milvus.md
+++ b/docs/source/providers/vector_io/inline_milvus.md
@@ -13,6 +13,7 @@ Please refer to the remote provider documentation.
| `db_path` | `` | No | PydanticUndefined | |
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend (SQLite only for now) |
| `consistency_level` | `` | No | Strong | The consistency level of the Milvus server |
+| `embedding` | `utils.vector_io.embedding_config.EmbeddingConfig \| None` | No | | Default embedding configuration for this provider. When specified, vector databases created with this provider will use these embedding settings as defaults. |
## Sample Configuration
diff --git a/docs/source/providers/vector_io/inline_sqlite-vec.md b/docs/source/providers/vector_io/inline_sqlite-vec.md
index ae7c45b21..831551d07 100644
--- a/docs/source/providers/vector_io/inline_sqlite-vec.md
+++ b/docs/source/providers/vector_io/inline_sqlite-vec.md
@@ -207,6 +207,7 @@ See [sqlite-vec's GitHub repo](https://github.com/asg017/sqlite-vec/tree/main) f
|-------|------|----------|---------|-------------|
| `db_path` | `` | No | PydanticUndefined | Path to the SQLite database file |
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend (SQLite only for now) |
+| `embedding` | `utils.vector_io.embedding_config.EmbeddingConfig \| None` | No | | Default embedding configuration for this provider. When specified, vector databases created with this provider will use these embedding settings as defaults. |
## Sample Configuration
diff --git a/docs/source/providers/vector_io/inline_sqlite_vec.md b/docs/source/providers/vector_io/inline_sqlite_vec.md
index 7e14bb8bd..65d2f5eff 100644
--- a/docs/source/providers/vector_io/inline_sqlite_vec.md
+++ b/docs/source/providers/vector_io/inline_sqlite_vec.md
@@ -12,6 +12,7 @@ Please refer to the sqlite-vec provider documentation.
|-------|------|----------|---------|-------------|
| `db_path` | `` | No | PydanticUndefined | Path to the SQLite database file |
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend (SQLite only for now) |
+| `embedding` | `utils.vector_io.embedding_config.EmbeddingConfig \| None` | No | | Default embedding configuration for this provider. When specified, vector databases created with this provider will use these embedding settings as defaults. |
## Sample Configuration
diff --git a/docs/source/providers/vector_io/remote_chromadb.md b/docs/source/providers/vector_io/remote_chromadb.md
index 447ea6cd6..daa115999 100644
--- a/docs/source/providers/vector_io/remote_chromadb.md
+++ b/docs/source/providers/vector_io/remote_chromadb.md
@@ -42,6 +42,7 @@ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introducti
|-------|------|----------|---------|-------------|
| `url` | `str \| None` | No | PydanticUndefined | |
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend |
+| `embedding` | `utils.vector_io.embedding_config.EmbeddingConfig \| None` | No | | Default embedding configuration for this provider. When specified, vector databases created with this provider will use these embedding settings as defaults. |
## Sample Configuration
diff --git a/docs/source/providers/vector_io/remote_milvus.md b/docs/source/providers/vector_io/remote_milvus.md
index 6734d8315..84ce19da2 100644
--- a/docs/source/providers/vector_io/remote_milvus.md
+++ b/docs/source/providers/vector_io/remote_milvus.md
@@ -115,6 +115,7 @@ For more details on TLS configuration, refer to the [TLS setup guide](https://mi
| `token` | `str \| None` | No | PydanticUndefined | The token of the Milvus server |
| `consistency_level` | `` | No | Strong | The consistency level of the Milvus server |
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend |
+| `embedding` | `utils.vector_io.embedding_config.EmbeddingConfig \| None` | No | | Default embedding configuration for this provider. When specified, vector databases created with this provider will use these embedding settings as defaults. |
| `config` | `dict` | No | {} | This configuration allows additional fields to be passed through to the underlying Milvus client. See the [Milvus](https://milvus.io/docs/install-overview.md) documentation for more details about Milvus in general. |
> **Note**: This configuration class accepts additional fields beyond those listed above. You can pass any additional configuration options that will be forwarded to the underlying provider.
diff --git a/docs/source/providers/vector_io/remote_pgvector.md b/docs/source/providers/vector_io/remote_pgvector.md
index 74f588a13..8b186c2d0 100644
--- a/docs/source/providers/vector_io/remote_pgvector.md
+++ b/docs/source/providers/vector_io/remote_pgvector.md
@@ -41,6 +41,7 @@ See [PGVector's documentation](https://github.com/pgvector/pgvector) for more de
| `user` | `str \| None` | No | postgres | |
| `password` | `str \| None` | No | mysecretpassword | |
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig, annotation=NoneType, required=False, default='sqlite', discriminator='type'` | No | | Config for KV store backend (SQLite only for now) |
+| `embedding` | `utils.vector_io.embedding_config.EmbeddingConfig \| None` | No | | Default embedding configuration for this provider. When specified, vector databases created with this provider will use these embedding settings as defaults. |
## Sample Configuration
diff --git a/docs/source/providers/vector_io/remote_qdrant.md b/docs/source/providers/vector_io/remote_qdrant.md
index 043141007..4dbdb2820 100644
--- a/docs/source/providers/vector_io/remote_qdrant.md
+++ b/docs/source/providers/vector_io/remote_qdrant.md
@@ -21,6 +21,7 @@ Please refer to the inline provider documentation.
| `timeout` | `int \| None` | No | | |
| `host` | `str \| None` | No | | |
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | |
+| `embedding` | `utils.vector_io.embedding_config.EmbeddingConfig \| None` | No | | Default embedding configuration for this provider. When specified, vector databases created with this provider will use these embedding settings as defaults. |
## Sample Configuration
diff --git a/docs/source/providers/vector_io/remote_weaviate.md b/docs/source/providers/vector_io/remote_weaviate.md
index c59487cf6..76a564ef1 100644
--- a/docs/source/providers/vector_io/remote_weaviate.md
+++ b/docs/source/providers/vector_io/remote_weaviate.md
@@ -40,6 +40,7 @@ See [Weaviate's documentation](https://weaviate.io/developers/weaviate) for more
| `weaviate_api_key` | `str \| None` | No | | The API key for the Weaviate instance |
| `weaviate_cluster_url` | `str \| None` | No | localhost:8080 | The URL of the Weaviate cluster |
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig, annotation=NoneType, required=False, default='sqlite', discriminator='type'` | No | | Config for KV store backend (SQLite only for now) |
+| `embedding` | `utils.vector_io.embedding_config.EmbeddingConfig \| None` | No | | Default embedding configuration for this provider. When specified, vector databases created with this provider will use these embedding settings as defaults. |
## Sample Configuration
diff --git a/llama_stack/apis/vector_dbs/vector_dbs.py b/llama_stack/apis/vector_dbs/vector_dbs.py
index 47820fa0f..707228a23 100644
--- a/llama_stack/apis/vector_dbs/vector_dbs.py
+++ b/llama_stack/apis/vector_dbs/vector_dbs.py
@@ -41,14 +41,15 @@ class VectorDBInput(BaseModel):
"""Input parameters for creating or configuring a vector database.
:param vector_db_id: Unique identifier for the vector database
- :param embedding_model: Name of the embedding model to use for vector generation
- :param embedding_dimension: Dimension of the embedding vectors
+ :param embedding_model: Name of the embedding model to use for vector generation (optional if provider has defaults)
+ :param embedding_dimension: Dimension of the embedding vectors (optional if provider has defaults)
+ :param provider_id: Provider to use for this vector database (can inherit embedding defaults)
:param provider_vector_db_id: (Optional) Provider-specific identifier for the vector database
"""
vector_db_id: str
- embedding_model: str
- embedding_dimension: int
+ embedding_model: str | None = None
+ embedding_dimension: int | None = None
provider_id: str | None = None
provider_vector_db_id: str | None = None
@@ -89,8 +90,8 @@ class VectorDBs(Protocol):
async def register_vector_db(
self,
vector_db_id: str,
- embedding_model: str,
- embedding_dimension: int | None = 384,
+ embedding_model: str | None = None,
+ embedding_dimension: int | None = None,
provider_id: str | None = None,
vector_db_name: str | None = None,
provider_vector_db_id: str | None = None,
@@ -98,9 +99,9 @@ class VectorDBs(Protocol):
"""Register a vector database.
:param vector_db_id: The identifier of the vector database to register.
- :param embedding_model: The embedding model to use.
- :param embedding_dimension: The dimension of the embedding model.
- :param provider_id: The identifier of the provider.
+ :param embedding_model: The embedding model to use (optional if provider has defaults).
+ :param embedding_dimension: The dimension of the embedding model (optional if provider has defaults).
+ :param provider_id: The identifier of the provider (can provide embedding defaults).
:param vector_db_name: The name of the vector database.
:param provider_vector_db_id: The identifier of the vector database in the provider.
:returns: A VectorDB.
diff --git a/llama_stack/core/routers/vector_io.py b/llama_stack/core/routers/vector_io.py
index 3d0996c49..027537001 100644
--- a/llama_stack/core/routers/vector_io.py
+++ b/llama_stack/core/routers/vector_io.py
@@ -79,13 +79,23 @@ class VectorIORouter(VectorIO):
async def register_vector_db(
self,
vector_db_id: str,
- embedding_model: str,
- embedding_dimension: int | None = 384,
+ embedding_model: str | None = None,
+ embedding_dimension: int | None = None,
provider_id: str | None = None,
vector_db_name: str | None = None,
provider_vector_db_id: str | None = None,
) -> None:
- logger.debug(f"VectorIORouter.register_vector_db: {vector_db_id}, {embedding_model}")
+ logger.debug(
+ f"VectorIORouter.register_vector_db: {vector_db_id}, embedding_model={embedding_model}, provider_id={provider_id}"
+ )
+
+ # If embedding model/dimension not provided, they will be applied by the provider
+ # from its embedding configuration during register_vector_db call
+ if embedding_model is None and embedding_dimension is None:
+ # Use fallback defaults if no provider defaults are available
+ embedding_model = embedding_model or "all-MiniLM-L6-v2"
+ embedding_dimension = embedding_dimension or 384
+
await self.routing_table.register_vector_db(
vector_db_id,
embedding_model,
diff --git a/llama_stack/providers/inline/vector_io/chroma/config.py b/llama_stack/providers/inline/vector_io/chroma/config.py
index a9566f7ff..f7505d7a9 100644
--- a/llama_stack/providers/inline/vector_io/chroma/config.py
+++ b/llama_stack/providers/inline/vector_io/chroma/config.py
@@ -9,6 +9,7 @@ from typing import Any
from pydantic import BaseModel, Field
from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
+from llama_stack.providers.utils.vector_io.embedding_config import EmbeddingConfig
from llama_stack.schema_utils import json_schema_type
@@ -16,6 +17,10 @@ from llama_stack.schema_utils import json_schema_type
class ChromaVectorIOConfig(BaseModel):
db_path: str
kvstore: KVStoreConfig = Field(description="Config for KV store backend")
+ embedding: EmbeddingConfig | None = Field(
+ default=None,
+ description="Default embedding configuration for this provider. When specified, vector databases created with this provider will use these embedding settings as defaults.",
+ )
@classmethod
def sample_run_config(
@@ -27,4 +32,9 @@ class ChromaVectorIOConfig(BaseModel):
__distro_dir__=__distro_dir__,
db_name="chroma_inline_registry.db",
),
+ # Optional: Configure default embedding model for this provider
+ # "embedding": {
+ # "model": "${env.CHROMA_EMBEDDING_MODEL:=all-MiniLM-L6-v2}",
+ # "dimensions": 384
+ # },
}
diff --git a/llama_stack/providers/inline/vector_io/faiss/config.py b/llama_stack/providers/inline/vector_io/faiss/config.py
index cbcbb1762..31e717b51 100644
--- a/llama_stack/providers/inline/vector_io/faiss/config.py
+++ b/llama_stack/providers/inline/vector_io/faiss/config.py
@@ -6,18 +6,23 @@
from typing import Any
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
from llama_stack.providers.utils.kvstore.config import (
KVStoreConfig,
SqliteKVStoreConfig,
)
+from llama_stack.providers.utils.vector_io.embedding_config import EmbeddingConfig
from llama_stack.schema_utils import json_schema_type
@json_schema_type
class FaissVectorIOConfig(BaseModel):
kvstore: KVStoreConfig
+ embedding: EmbeddingConfig | None = Field(
+ default=None,
+ description="Default embedding configuration for this provider. When specified, vector databases created with this provider will use these embedding settings as defaults.",
+ )
@classmethod
def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
@@ -25,5 +30,10 @@ class FaissVectorIOConfig(BaseModel):
"kvstore": SqliteKVStoreConfig.sample_run_config(
__distro_dir__=__distro_dir__,
db_name="faiss_store.db",
- )
+ ),
+ # Optional: Configure default embedding model for this provider
+ # "embedding": {
+ # "model": "${env.FAISS_EMBEDDING_MODEL:=all-MiniLM-L6-v2}",
+ # "dimensions": 384
+ # },
}
diff --git a/llama_stack/providers/inline/vector_io/faiss/faiss.py b/llama_stack/providers/inline/vector_io/faiss/faiss.py
index c45651033..be613cc86 100644
--- a/llama_stack/providers/inline/vector_io/faiss/faiss.py
+++ b/llama_stack/providers/inline/vector_io/faiss/faiss.py
@@ -35,6 +35,7 @@ from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIV
from llama_stack.providers.utils.memory.vector_store import (
EmbeddingIndex,
VectorDBWithIndex,
+ apply_provider_embedding_defaults,
)
from .config import FaissVectorIOConfig
@@ -237,6 +238,9 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr
self,
vector_db: VectorDB,
) -> None:
+ # Apply provider-level embedding defaults if configured
+ vector_db = apply_provider_embedding_defaults(vector_db, self.config.embedding)
+
assert self.kvstore is not None
key = f"{VECTOR_DBS_PREFIX}{vector_db.identifier}"
diff --git a/llama_stack/providers/inline/vector_io/milvus/config.py b/llama_stack/providers/inline/vector_io/milvus/config.py
index 8cbd056be..76e1a1454 100644
--- a/llama_stack/providers/inline/vector_io/milvus/config.py
+++ b/llama_stack/providers/inline/vector_io/milvus/config.py
@@ -12,6 +12,7 @@ from llama_stack.providers.utils.kvstore.config import (
KVStoreConfig,
SqliteKVStoreConfig,
)
+from llama_stack.providers.utils.vector_io.embedding_config import EmbeddingConfig
from llama_stack.schema_utils import json_schema_type
@@ -20,6 +21,10 @@ class MilvusVectorIOConfig(BaseModel):
db_path: str
kvstore: KVStoreConfig = Field(description="Config for KV store backend (SQLite only for now)")
consistency_level: str = Field(description="The consistency level of the Milvus server", default="Strong")
+ embedding: EmbeddingConfig | None = Field(
+ default=None,
+ description="Default embedding configuration for this provider. When specified, vector databases created with this provider will use these embedding settings as defaults.",
+ )
@classmethod
def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
@@ -29,4 +34,9 @@ class MilvusVectorIOConfig(BaseModel):
__distro_dir__=__distro_dir__,
db_name="milvus_registry.db",
),
+ # Optional: Configure default embedding model for this provider
+ # "embedding": {
+ # "model": "${env.MILVUS_EMBEDDING_MODEL:=all-MiniLM-L6-v2}",
+ # "dimensions": 384
+ # },
}
diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/config.py b/llama_stack/providers/inline/vector_io/sqlite_vec/config.py
index 525ed4b1f..253b7449c 100644
--- a/llama_stack/providers/inline/vector_io/sqlite_vec/config.py
+++ b/llama_stack/providers/inline/vector_io/sqlite_vec/config.py
@@ -12,11 +12,18 @@ from llama_stack.providers.utils.kvstore.config import (
KVStoreConfig,
SqliteKVStoreConfig,
)
+from llama_stack.providers.utils.vector_io.embedding_config import EmbeddingConfig
+from llama_stack.schema_utils import json_schema_type
+@json_schema_type
class SQLiteVectorIOConfig(BaseModel):
db_path: str = Field(description="Path to the SQLite database file")
kvstore: KVStoreConfig = Field(description="Config for KV store backend (SQLite only for now)")
+ embedding: EmbeddingConfig | None = Field(
+ default=None,
+ description="Default embedding configuration for this provider. When specified, vector databases created with this provider will use these embedding settings as defaults.",
+ )
@classmethod
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
@@ -26,4 +33,9 @@ class SQLiteVectorIOConfig(BaseModel):
__distro_dir__=__distro_dir__,
db_name="sqlite_vec_registry.db",
),
+ # Optional: Configure default embedding model for this provider
+ # "embedding": {
+ # "model": "${env.SQLITE_VEC_EMBEDDING_MODEL:=all-MiniLM-L6-v2}",
+ # "dimensions": 384
+ # },
}
diff --git a/llama_stack/providers/remote/vector_io/chroma/config.py b/llama_stack/providers/remote/vector_io/chroma/config.py
index a1193905a..0ab10a372 100644
--- a/llama_stack/providers/remote/vector_io/chroma/config.py
+++ b/llama_stack/providers/remote/vector_io/chroma/config.py
@@ -9,6 +9,7 @@ from typing import Any
from pydantic import BaseModel, Field
from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
+from llama_stack.providers.utils.vector_io.embedding_config import EmbeddingConfig
from llama_stack.schema_utils import json_schema_type
@@ -16,6 +17,10 @@ from llama_stack.schema_utils import json_schema_type
class ChromaVectorIOConfig(BaseModel):
url: str | None
kvstore: KVStoreConfig = Field(description="Config for KV store backend")
+ embedding: EmbeddingConfig | None = Field(
+ default=None,
+ description="Default embedding configuration for this provider. When specified, vector databases created with this provider will use these embedding settings as defaults.",
+ )
@classmethod
def sample_run_config(cls, __distro_dir__: str, url: str = "${env.CHROMADB_URL}", **kwargs: Any) -> dict[str, Any]:
@@ -25,4 +30,9 @@ class ChromaVectorIOConfig(BaseModel):
__distro_dir__=__distro_dir__,
db_name="chroma_remote_registry.db",
),
+ # Optional: Configure default embedding model for this provider
+ # "embedding": {
+ # "model": "${env.CHROMA_EMBEDDING_MODEL:=all-MiniLM-L6-v2}",
+ # "dimensions": 384
+ # },
}
diff --git a/llama_stack/providers/remote/vector_io/milvus/config.py b/llama_stack/providers/remote/vector_io/milvus/config.py
index 899d3678d..54402b647 100644
--- a/llama_stack/providers/remote/vector_io/milvus/config.py
+++ b/llama_stack/providers/remote/vector_io/milvus/config.py
@@ -9,6 +9,7 @@ from typing import Any
from pydantic import BaseModel, ConfigDict, Field
from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
+from llama_stack.providers.utils.vector_io.embedding_config import EmbeddingConfig
from llama_stack.schema_utils import json_schema_type
@@ -18,6 +19,10 @@ class MilvusVectorIOConfig(BaseModel):
token: str | None = Field(description="The token of the Milvus server")
consistency_level: str = Field(description="The consistency level of the Milvus server", default="Strong")
kvstore: KVStoreConfig = Field(description="Config for KV store backend")
+ embedding: EmbeddingConfig | None = Field(
+ default=None,
+ description="Default embedding configuration for this provider. When specified, vector databases created with this provider will use these embedding settings as defaults.",
+ )
# This configuration allows additional fields to be passed through to the underlying Milvus client.
# See the [Milvus](https://milvus.io/docs/install-overview.md) documentation for more details about Milvus in general.
@@ -32,4 +37,9 @@ class MilvusVectorIOConfig(BaseModel):
__distro_dir__=__distro_dir__,
db_name="milvus_remote_registry.db",
),
+ # Optional: Configure default embedding model for this provider
+ # "embedding": {
+ # "model": "${env.MILVUS_EMBEDDING_MODEL:=all-MiniLM-L6-v2}",
+ # "dimensions": 384
+ # },
}
diff --git a/llama_stack/providers/remote/vector_io/milvus/milvus.py b/llama_stack/providers/remote/vector_io/milvus/milvus.py
index db58bf6d3..47486e533 100644
--- a/llama_stack/providers/remote/vector_io/milvus/milvus.py
+++ b/llama_stack/providers/remote/vector_io/milvus/milvus.py
@@ -29,6 +29,7 @@ from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIV
from llama_stack.providers.utils.memory.vector_store import (
EmbeddingIndex,
VectorDBWithIndex,
+ apply_provider_embedding_defaults,
)
from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collection_name
@@ -305,6 +306,9 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
self,
vector_db: VectorDB,
) -> None:
+ # Apply provider-level embedding defaults if configured
+ vector_db = apply_provider_embedding_defaults(vector_db, self.config.embedding)
+
if isinstance(self.config, RemoteMilvusVectorIOConfig):
consistency_level = self.config.consistency_level
else:
diff --git a/llama_stack/providers/remote/vector_io/pgvector/config.py b/llama_stack/providers/remote/vector_io/pgvector/config.py
index 334cbe5be..3b3fd9fee 100644
--- a/llama_stack/providers/remote/vector_io/pgvector/config.py
+++ b/llama_stack/providers/remote/vector_io/pgvector/config.py
@@ -12,6 +12,7 @@ from llama_stack.providers.utils.kvstore.config import (
KVStoreConfig,
SqliteKVStoreConfig,
)
+from llama_stack.providers.utils.vector_io.embedding_config import EmbeddingConfig
from llama_stack.schema_utils import json_schema_type
@@ -23,6 +24,10 @@ class PGVectorVectorIOConfig(BaseModel):
user: str | None = Field(default="postgres")
password: str | None = Field(default="mysecretpassword")
kvstore: KVStoreConfig | None = Field(description="Config for KV store backend (SQLite only for now)", default=None)
+ embedding: EmbeddingConfig | None = Field(
+ default=None,
+ description="Default embedding configuration for this provider. When specified, vector databases created with this provider will use these embedding settings as defaults.",
+ )
@classmethod
def sample_run_config(
@@ -45,4 +50,9 @@ class PGVectorVectorIOConfig(BaseModel):
__distro_dir__=__distro_dir__,
db_name="pgvector_registry.db",
),
+ # Optional: Configure default embedding model for this provider
+ # "embedding": {
+ # "model": "${env.PGVECTOR_EMBEDDING_MODEL:=all-MiniLM-L6-v2}",
+ # "dimensions": 384
+ # },
}
diff --git a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
index 28af2b911..fde0de32f 100644
--- a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
+++ b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
@@ -29,6 +29,7 @@ from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIV
from llama_stack.providers.utils.memory.vector_store import (
EmbeddingIndex,
VectorDBWithIndex,
+ apply_provider_embedding_defaults,
)
from .config import PGVectorVectorIOConfig
@@ -222,6 +223,9 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoco
log.info("Connection to PGVector database server closed")
async def register_vector_db(self, vector_db: VectorDB) -> None:
+ # Apply provider-level embedding defaults if configured
+ vector_db = apply_provider_embedding_defaults(vector_db, self.config.embedding)
+
# Persist vector DB metadata in the KV store
assert self.kvstore is not None
# Upsert model metadata in Postgres
diff --git a/llama_stack/providers/remote/vector_io/qdrant/config.py b/llama_stack/providers/remote/vector_io/qdrant/config.py
index ff5506236..5cd8ede01 100644
--- a/llama_stack/providers/remote/vector_io/qdrant/config.py
+++ b/llama_stack/providers/remote/vector_io/qdrant/config.py
@@ -6,12 +6,13 @@
from typing import Any
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
from llama_stack.providers.utils.kvstore.config import (
KVStoreConfig,
SqliteKVStoreConfig,
)
+from llama_stack.providers.utils.vector_io.embedding_config import EmbeddingConfig
from llama_stack.schema_utils import json_schema_type
@@ -28,6 +29,10 @@ class QdrantVectorIOConfig(BaseModel):
timeout: int | None = None
host: str | None = None
kvstore: KVStoreConfig
+ embedding: EmbeddingConfig | None = Field(
+ default=None,
+ description="Default embedding configuration for this provider. When specified, vector databases created with this provider will use these embedding settings as defaults.",
+ )
@classmethod
def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
@@ -37,4 +42,10 @@ class QdrantVectorIOConfig(BaseModel):
__distro_dir__=__distro_dir__,
db_name="qdrant_registry.db",
),
+ "api_key": "${env.QDRANT_API_KEY}",
+ # Optional: Configure default embedding model for this provider
+ # "embedding": {
+ # "model": "${env.QDRANT_EMBEDDING_MODEL:=all-MiniLM-L6-v2}",
+ # "dimensions": 384
+ # },
}
diff --git a/llama_stack/providers/remote/vector_io/weaviate/config.py b/llama_stack/providers/remote/vector_io/weaviate/config.py
index b693e294e..5d292f3bf 100644
--- a/llama_stack/providers/remote/vector_io/weaviate/config.py
+++ b/llama_stack/providers/remote/vector_io/weaviate/config.py
@@ -12,6 +12,7 @@ from llama_stack.providers.utils.kvstore.config import (
KVStoreConfig,
SqliteKVStoreConfig,
)
+from llama_stack.providers.utils.vector_io.embedding_config import EmbeddingConfig
from llama_stack.schema_utils import json_schema_type
@@ -21,6 +22,13 @@ class WeaviateVectorIOConfig(BaseModel):
weaviate_cluster_url: str | None = Field(description="The URL of the Weaviate cluster", default="localhost:8080")
kvstore: KVStoreConfig | None = Field(description="Config for KV store backend (SQLite only for now)", default=None)
+
+@json_schema_type
+class WeaviateVectorIOConfig(BaseModel):
+ embedding: EmbeddingConfig | None = Field(
+ default=None,
+ description="Default embedding configuration for this provider. When specified, vector databases created with this provider will use these embedding settings as defaults.",
+ )
@classmethod
def sample_run_config(
cls,
@@ -34,4 +42,9 @@ class WeaviateVectorIOConfig(BaseModel):
__distro_dir__=__distro_dir__,
db_name="weaviate_registry.db",
),
+ # Optional: Configure default embedding model for this provider
+ # "embedding": {
+ # "model": "${env.WEAVIATE_EMBEDDING_MODEL:=all-MiniLM-L6-v2}",
+ # "dimensions": 384
+ # },
}
diff --git a/llama_stack/providers/utils/memory/vector_store.py b/llama_stack/providers/utils/memory/vector_store.py
index 484475e9d..d9bef2710 100644
--- a/llama_stack/providers/utils/memory/vector_store.py
+++ b/llama_stack/providers/utils/memory/vector_store.py
@@ -31,6 +31,8 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
interleaved_content_as_str,
)
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
+from llama_stack.providers.utils.vector_io.chunk_utils import generate_chunk_id
+from llama_stack.providers.utils.vector_io.embedding_config import EmbeddingConfig
log = logging.getLogger(__name__)
@@ -39,6 +41,41 @@ RERANKER_TYPE_RRF = "rrf"
RERANKER_TYPE_WEIGHTED = "weighted"
+def apply_provider_embedding_defaults(
+ vector_db: VectorDB, provider_embedding_config: EmbeddingConfig | None
+) -> VectorDB:
+ """Apply provider-level embedding defaults to a VectorDB if not already specified.
+
+ This allows providers to specify default embedding models for use-case specific
+ vector stores, reducing the need for app developers to know embedding details.
+
+ Args:
+ vector_db: The VectorDB to potentially modify
+ provider_embedding_config: The provider's default embedding configuration
+
+ Returns:
+ The VectorDB with embedding defaults applied if needed
+ """
+ if provider_embedding_config is None:
+ return vector_db
+
+ # Create a copy to avoid modifying the original
+ db_dict = vector_db.model_dump()
+
+ # Apply embedding model default if not specified
+ if not db_dict.get("embedding_model") and provider_embedding_config.model:
+ db_dict["embedding_model"] = provider_embedding_config.model
+
+ # Apply embedding dimension default if not specified
+ if not db_dict.get("embedding_dimension") and provider_embedding_config.dimensions:
+ db_dict["embedding_dimension"] = provider_embedding_config.dimensions
+ elif not db_dict.get("embedding_dimension"):
+ # Fallback to default dimension if still not specified
+ db_dict["embedding_dimension"] = provider_embedding_config.get_dimensions_or_default()
+
+ return VectorDB.model_validate(db_dict)
+
+
def parse_pdf(data: bytes) -> str:
# For PDF and DOC/DOCX files, we can't reliably convert to string
pdf_bytes = io.BytesIO(data)
diff --git a/llama_stack/providers/utils/vector_io/embedding_config.py b/llama_stack/providers/utils/vector_io/embedding_config.py
new file mode 100644
index 000000000..b27dceac2
--- /dev/null
+++ b/llama_stack/providers/utils/vector_io/embedding_config.py
@@ -0,0 +1,36 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from pydantic import BaseModel, Field
+
+from llama_stack.schema_utils import json_schema_type
+
+
+@json_schema_type
+class EmbeddingConfig(BaseModel):
+ """Configuration for embedding model used by vector-io providers.
+
+ This allows providers to specify default embedding models for use-case specific
+ vector stores, reducing the need for app developers to know embedding details.
+
+ Example usage in provider config:
+ ```yaml
+ vector_io:
+ - provider_id: question-answer
+ provider_type: remote::pgvector
+ config:
+ embedding:
+ model: prod/question-answer-embedder
+ dimensions: 384
+ ```
+ """
+
+ model: str = Field(description="The embedding model identifier to use")
+ dimensions: int | None = Field(default=None, description="The embedding dimensions (optional, can be inferred)")
+
+ def get_dimensions_or_default(self, default: int = 384) -> int:
+ """Get dimensions with fallback to default if not specified."""
+ return self.dimensions if self.dimensions is not None else default