feat(vector-io): configurable embedding models for all providers (v2)\n\nAdds embedding_model and embedding_dimension fields to all VectorIOConfig classes.\nRouter respects provider defaults with fallback.\nIntroduces embedding_utils helper.\nComprehensive docs & samples.\nResolves #2729

This commit is contained in:
skamenan7 2025-07-17 11:51:40 -04:00
parent c8f274347d
commit d55dd3e9a0
24 changed files with 482 additions and 14 deletions

View file

@ -0,0 +1,133 @@
# Sample Vector IO Configuration with Configurable Embedding Models
#
# This example demonstrates how to configure embedding models for different vector IO providers.
# Each provider can have its own default embedding model and dimension configuration.
# Vector IO providers with different embedding configurations
vector_io:
# Fast local search with lightweight embeddings
- provider_id: fast_local_search
provider_type: inline::faiss
config:
db_path: ~/.llama/distributions/together/faiss_fast.db
# Use lightweight embedding model for fast processing
embedding_model: "all-MiniLM-L6-v2"
embedding_dimension: 384 # Fixed dimension for this model
# Compact storage with variable dimension embeddings
- provider_id: compact_storage
provider_type: inline::faiss
config:
db_path: ~/.llama/distributions/together/faiss_compact.db
# Use Matryoshka embeddings with custom dimension
embedding_model: "nomic-embed-text"
embedding_dimension: 256 # Reduced from default 768 for storage efficiency
# High-quality persistent search
- provider_id: persistent_search
provider_type: inline::sqlite_vec
config:
db_path: ~/.llama/distributions/together/sqlite_vec.db
# Use high-quality embedding model
embedding_model: "sentence-transformers/all-mpnet-base-v2"
embedding_dimension: 768 # Full dimension for best quality
# Remote Qdrant with cloud embeddings
- provider_id: cloud_search
provider_type: remote::qdrant
config:
api_key: "${env.QDRANT_API_KEY}"
url: "${env.QDRANT_URL}"
# Use OpenAI embeddings for cloud deployment
embedding_model: "text-embedding-3-small"
embedding_dimension: 1536 # OpenAI's default dimension
# Remote ChromaDB without explicit embedding config (uses system default)
- provider_id: default_search
provider_type: remote::chroma
config:
host: "${env.CHROMA_HOST:=localhost}"
port: 8000
# No embedding_model specified - will use system default from model registry
# Milvus with production-grade configuration
- provider_id: production_search
provider_type: remote::milvus
config:
uri: "${env.MILVUS_ENDPOINT}"
token: "${env.MILVUS_TOKEN}"
kvstore:
type: sqlite
db_path: ~/.llama/distributions/together/milvus_registry.db
# High-performance embedding model for production
embedding_model: "text-embedding-3-large"
embedding_dimension: 3072 # Large dimension for maximum quality
# Model registry - ensure embedding models are properly configured
models:
# Lightweight embedding model (384 dimensions)
- model_id: all-MiniLM-L6-v2
provider_id: local_inference
provider_model_id: sentence-transformers/all-MiniLM-L6-v2
model_type: embedding
metadata:
embedding_dimension: 384
description: "Fast, lightweight embeddings for general use"
# Matryoshka embedding model (variable dimensions)
- model_id: nomic-embed-text
provider_id: local_inference
provider_model_id: nomic-embed-text
model_type: embedding
metadata:
embedding_dimension: 768 # Default, can be overridden
description: "Flexible Matryoshka embeddings supporting variable dimensions"
# High-quality embedding model (768 dimensions)
- model_id: sentence-transformers/all-mpnet-base-v2
provider_id: local_inference
provider_model_id: sentence-transformers/all-mpnet-base-v2
model_type: embedding
metadata:
embedding_dimension: 768
description: "High-quality embeddings for semantic search"
# OpenAI embedding models (for cloud usage)
- model_id: text-embedding-3-small
provider_id: openai_inference # Would need OpenAI provider configured
provider_model_id: text-embedding-3-small
model_type: embedding
metadata:
embedding_dimension: 1536 # Default OpenAI dimension
description: "OpenAI's efficient embedding model"
- model_id: text-embedding-3-large
provider_id: openai_inference
provider_model_id: text-embedding-3-large
model_type: embedding
metadata:
embedding_dimension: 3072 # Large dimension for maximum quality
description: "OpenAI's highest quality embedding model"
# Optional: Configure specific vector databases (will use provider defaults)
vector_dbs:
# Uses fast_local_search provider defaults (all-MiniLM-L6-v2, 384 dims)
- vector_db_id: general_docs
provider_id: fast_local_search
# Uses compact_storage provider defaults (nomic-embed-text, 256 dims)
- vector_db_id: compressed_knowledge
provider_id: compact_storage
# Uses persistent_search provider defaults (all-mpnet-base-v2, 768 dims)
- vector_db_id: semantic_library
provider_id: persistent_search
# Server configuration
server:
host: 0.0.0.0
port: 5000
# Logging configuration
logging:
level: INFO

View file

@ -0,0 +1,180 @@
# Vector IO Embedding Model Configuration
This guide explains how to configure embedding models for vector IO providers in Llama Stack, enabling you to use different embedding models for different use cases and optimize performance and storage requirements.
## Overview
Vector IO providers now support configurable embedding models at the provider level. This allows you to:
- **Use different embedding models** for different vector databases based on your use case
- **Optimize for performance** with lightweight models for fast retrieval
- **Optimize for quality** with high-dimensional models for semantic search
- **Save storage space** with variable-dimension embeddings (Matryoshka embeddings)
- **Ensure consistency** with provider-level defaults
## Configuration Options
Each vector IO provider configuration can include:
- `embedding_model`: The default embedding model ID to use for this provider
- `embedding_dimension`: Optional dimension override for models with variable dimensions
## Priority Order
The system uses the following priority order for embedding model selection:
1. **Explicit API parameters** (highest priority)
2. **Provider configuration defaults** (new feature)
3. **System default** from model registry (fallback)
## Example Configurations
### Fast Local Search with Lightweight Embeddings
```yaml
vector_io:
- provider_id: fast_search
provider_type: inline::faiss
config:
db_path: ~/.llama/faiss_fast.db
embedding_model: "all-MiniLM-L6-v2" # Fast, 384-dimensional
embedding_dimension: 384
```
### High-Quality Semantic Search
```yaml
vector_io:
- provider_id: quality_search
provider_type: inline::sqlite_vec
config:
db_path: ~/.llama/sqlite_quality.db
embedding_model: "sentence-transformers/all-mpnet-base-v2" # High quality, 768-dimensional
embedding_dimension: 768
```
### Storage-Optimized with Matryoshka Embeddings
```yaml
vector_io:
- provider_id: compact_search
provider_type: inline::faiss
config:
db_path: ~/.llama/faiss_compact.db
embedding_model: "nomic-embed-text" # Matryoshka model
embedding_dimension: 256 # Reduced from default 768 for storage efficiency
```
### Cloud Deployment with OpenAI Embeddings
```yaml
vector_io:
- provider_id: cloud_search
provider_type: remote::qdrant
config:
api_key: "${env.QDRANT_API_KEY}"
url: "${env.QDRANT_URL}"
embedding_model: "text-embedding-3-small"
embedding_dimension: 1536
```
## Model Registry Setup
Ensure your embedding models are properly configured in the model registry:
```yaml
models:
# Lightweight model
- model_id: all-MiniLM-L6-v2
provider_id: local_inference
provider_model_id: sentence-transformers/all-MiniLM-L6-v2
model_type: embedding
metadata:
embedding_dimension: 384
description: "Fast, lightweight embeddings"
# High-quality model
- model_id: sentence-transformers/all-mpnet-base-v2
provider_id: local_inference
provider_model_id: sentence-transformers/all-mpnet-base-v2
model_type: embedding
metadata:
embedding_dimension: 768
description: "High-quality embeddings"
# Matryoshka model
- model_id: nomic-embed-text
provider_id: local_inference
provider_model_id: nomic-embed-text
model_type: embedding
metadata:
embedding_dimension: 768 # Default dimension
description: "Variable-dimension Matryoshka embeddings"
```
## Use Cases
### Multi-Environment Setup
Configure different providers for different environments:
```yaml
vector_io:
# Development - fast, lightweight
- provider_id: dev_search
provider_type: inline::faiss
config:
db_path: ~/.llama/dev_faiss.db
embedding_model: "all-MiniLM-L6-v2"
embedding_dimension: 384
# Production - high quality, scalable
- provider_id: prod_search
provider_type: remote::qdrant
config:
api_key: "${env.QDRANT_API_KEY}"
embedding_model: "text-embedding-3-large"
embedding_dimension: 3072
```
### Domain-Specific Models
Use different models for different content types:
```yaml
vector_io:
# Code search - specialized model
- provider_id: code_search
provider_type: inline::sqlite_vec
config:
db_path: ~/.llama/code_vectors.db
embedding_model: "microsoft/codebert-base"
embedding_dimension: 768
# General documents - general-purpose model
- provider_id: doc_search
provider_type: inline::sqlite_vec
config:
db_path: ~/.llama/doc_vectors.db
embedding_model: "all-mpnet-base-v2"
embedding_dimension: 768
```
## Backward Compatibility
If no embedding model is specified in the provider configuration, the system will fall back to the existing behavior of using the first available embedding model from the model registry.
## Supported Providers
The configurable embedding models feature is supported by:
- **Inline providers**: Faiss, SQLite-vec, Milvus, ChromaDB, Qdrant
- **Remote providers**: Qdrant, Milvus, ChromaDB, PGVector, Weaviate
## Best Practices
1. **Match dimensions**: Ensure `embedding_dimension` matches your model's output
2. **Use variable dimensions wisely**: Only override dimensions for Matryoshka models that support it
3. **Consider performance trade-offs**: Smaller dimensions = faster search, larger = better quality
4. **Test configurations**: Validate your setup with sample queries before production use
5. **Document your choices**: Comment your configurations to explain model selection rationale

View file

@ -42,6 +42,8 @@ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introducti
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `db_path` | `<class 'str'>` | No | PydanticUndefined | |
| `embedding_model` | `str \| None` | No | | Optional default embedding model for this provider. If not specified, will use system default. |
| `embedding_dimension` | `int \| None` | No | | Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry. |
## Sample Configuration

View file

@ -38,6 +38,8 @@ more details about Faiss in general.
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | |
| `embedding_model` | `str \| None` | No | | Optional default embedding model for this provider. If not specified, will use system default. |
| `embedding_dimension` | `int \| None` | No | | Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry. |
## Sample Configuration

View file

@ -9,6 +9,8 @@ Meta's reference implementation of a vector database.
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | |
| `embedding_model` | `str \| None` | No | | Optional default embedding model for this provider. If not specified, will use system default. |
| `embedding_dimension` | `int \| None` | No | | Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry. |
## Sample Configuration

View file

@ -13,6 +13,8 @@ Please refer to the remote provider documentation.
| `db_path` | `<class 'str'>` | No | PydanticUndefined | |
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend (SQLite only for now) |
| `consistency_level` | `<class 'str'>` | No | Strong | The consistency level of the Milvus server |
| `embedding_model` | `str \| None` | No | | Optional default embedding model for this provider. If not specified, will use system default. |
| `embedding_dimension` | `int \| None` | No | | Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry. |
## Sample Configuration

View file

@ -207,6 +207,8 @@ See [sqlite-vec's GitHub repo](https://github.com/asg017/sqlite-vec/tree/main) f
|-------|------|----------|---------|-------------|
| `db_path` | `<class 'str'>` | No | PydanticUndefined | Path to the SQLite database file |
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend (SQLite only for now) |
| `embedding_model` | `str \| None` | No | | Optional default embedding model for this provider. If not specified, will use system default. |
| `embedding_dimension` | `int \| None` | No | | Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry. |
## Sample Configuration

View file

@ -12,6 +12,8 @@ Please refer to the sqlite-vec provider documentation.
|-------|------|----------|---------|-------------|
| `db_path` | `<class 'str'>` | No | PydanticUndefined | Path to the SQLite database file |
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend (SQLite only for now) |
| `embedding_model` | `str \| None` | No | | Optional default embedding model for this provider. If not specified, will use system default. |
| `embedding_dimension` | `int \| None` | No | | Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry. |
## Sample Configuration

View file

@ -41,6 +41,8 @@ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introducti
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `url` | `str \| None` | No | PydanticUndefined | |
| `embedding_model` | `str \| None` | No | | Optional default embedding model for this provider. If not specified, will use system default. |
| `embedding_dimension` | `int \| None` | No | | Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry. |
## Sample Configuration

View file

@ -115,6 +115,8 @@ For more details on TLS configuration, refer to the [TLS setup guide](https://mi
| `token` | `str \| None` | No | PydanticUndefined | The token of the Milvus server |
| `consistency_level` | `<class 'str'>` | No | Strong | The consistency level of the Milvus server |
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend |
| `embedding_model` | `str \| None` | No | | Optional default embedding model for this provider. If not specified, will use system default. |
| `embedding_dimension` | `int \| None` | No | | Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry. |
| `config` | `dict` | No | {} | This configuration allows additional fields to be passed through to the underlying Milvus client. See the [Milvus](https://milvus.io/docs/install-overview.md) documentation for more details about Milvus in general. |
> **Note**: This configuration class accepts additional fields beyond those listed above. You can pass any additional configuration options that will be forwarded to the underlying provider.

View file

@ -41,6 +41,8 @@ See [PGVector's documentation](https://github.com/pgvector/pgvector) for more de
| `user` | `str \| None` | No | postgres | |
| `password` | `str \| None` | No | mysecretpassword | |
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig, annotation=NoneType, required=False, default='sqlite', discriminator='type'` | No | | Config for KV store backend (SQLite only for now) |
| `embedding_model` | `str \| None` | No | | Optional default embedding model for this provider. If not specified, will use system default. |
| `embedding_dimension` | `int \| None` | No | | Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry. |
## Sample Configuration

View file

@ -20,6 +20,8 @@ Please refer to the inline provider documentation.
| `prefix` | `str \| None` | No | | |
| `timeout` | `int \| None` | No | | |
| `host` | `str \| None` | No | | |
| `embedding_model` | `str \| None` | No | | Optional default embedding model for this provider. If not specified, will use system default. |
| `embedding_dimension` | `int \| None` | No | | Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry. |
## Sample Configuration

View file

@ -33,6 +33,13 @@ To install Weaviate see the [Weaviate quickstart documentation](https://weaviate
See [Weaviate's documentation](https://weaviate.io/developers/weaviate) for more details about Weaviate in general.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `embedding_model` | `str \| None` | No | | Optional default embedding model for this provider. If not specified, will use system default. |
| `embedding_dimension` | `int \| None` | No | | Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry. |
## Sample Configuration
```yaml

View file

@ -129,13 +129,32 @@ class VectorIORouter(VectorIO):
) -> VectorStoreObject:
logger.debug(f"VectorIORouter.openai_create_vector_store: name={name}, provider_id={provider_id}")
# If no embedding model is provided, use the first available one
# If no embedding model is provided, try provider defaults then fallback
if embedding_model is None:
embedding_model_info = await self._get_first_embedding_model()
if embedding_model_info is None:
raise ValueError("No embedding model provided and no embedding models available in the system")
embedding_model, embedding_dimension = embedding_model_info
logger.info(f"No embedding model specified, using first available: {embedding_model}")
# Try to get provider-specific embedding model configuration
if provider_id:
try:
provider_impl = self.routing_table.get_provider_impl(provider_id)
provider_config = getattr(provider_impl, "config", None)
if provider_config:
if hasattr(provider_config, "embedding_model") and provider_config.embedding_model:
embedding_model = provider_config.embedding_model
logger.info(f"Using provider config default embedding model: {embedding_model}")
if hasattr(provider_config, "embedding_dimension") and provider_config.embedding_dimension:
embedding_dimension = provider_config.embedding_dimension
logger.info(f"Using provider config embedding dimension: {embedding_dimension}")
except Exception as e:
logger.debug(f"Could not get provider config for {provider_id}: {e}")
# If still no embedding model, use system fallback
if embedding_model is None:
embedding_model_info = await self._get_first_embedding_model()
if embedding_model_info is None:
raise ValueError("No embedding model provided and no embedding models available in the system")
embedding_model, embedding_dimension = embedding_model_info
logger.info(f"No embedding model specified, using first available: {embedding_model}")
vector_db_id = f"vs_{uuid.uuid4()}"
registered_vector_db = await self.routing_table.register_vector_db(

View file

@ -6,12 +6,25 @@
from typing import Any
from pydantic import BaseModel
from pydantic import BaseModel, Field
class ChromaVectorIOConfig(BaseModel):
db_path: str
embedding_model: str | None = Field(
default=None,
description="Optional default embedding model for this provider. If not specified, will use system default.",
)
embedding_dimension: int | None = Field(
default=None,
description="Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry.",
)
@classmethod
def sample_run_config(cls, db_path: str = "${env.CHROMADB_PATH}", **kwargs: Any) -> dict[str, Any]:
return {"db_path": db_path}
return {
"db_path": db_path,
# Optional: Configure default embedding model for this provider
# "embedding_model": "all-MiniLM-L6-v2",
# "embedding_dimension": 384, # Only needed for variable-dimension models
}

View file

@ -6,7 +6,7 @@
from typing import Any
from pydantic import BaseModel
from pydantic import BaseModel, Field
from llama_stack.providers.utils.kvstore.config import (
KVStoreConfig,
@ -18,6 +18,14 @@ from llama_stack.schema_utils import json_schema_type
@json_schema_type
class FaissVectorIOConfig(BaseModel):
kvstore: KVStoreConfig
embedding_model: str | None = Field(
default=None,
description="Optional default embedding model for this provider. If not specified, will use system default.",
)
embedding_dimension: int | None = Field(
default=None,
description="Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry.",
)
@classmethod
def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
@ -25,5 +33,8 @@ class FaissVectorIOConfig(BaseModel):
"kvstore": SqliteKVStoreConfig.sample_run_config(
__distro_dir__=__distro_dir__,
db_name="faiss_store.db",
)
),
# Optional: Configure default embedding model for this provider
# "embedding_model": "all-MiniLM-L6-v2",
# "embedding_dimension": 384, # Only needed for variable-dimension models
}

View file

@ -20,6 +20,14 @@ class MilvusVectorIOConfig(BaseModel):
db_path: str
kvstore: KVStoreConfig = Field(description="Config for KV store backend (SQLite only for now)")
consistency_level: str = Field(description="The consistency level of the Milvus server", default="Strong")
embedding_model: str | None = Field(
default=None,
description="Optional default embedding model for this provider. If not specified, will use system default.",
)
embedding_dimension: int | None = Field(
default=None,
description="Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry.",
)
@classmethod
def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
@ -29,4 +37,7 @@ class MilvusVectorIOConfig(BaseModel):
__distro_dir__=__distro_dir__,
db_name="milvus_registry.db",
),
# Optional: Configure default embedding model for this provider
# "embedding_model": "all-MiniLM-L6-v2",
# "embedding_dimension": 384, # Only needed for variable-dimension models
}

View file

@ -17,6 +17,14 @@ from llama_stack.providers.utils.kvstore.config import (
class SQLiteVectorIOConfig(BaseModel):
db_path: str = Field(description="Path to the SQLite database file")
kvstore: KVStoreConfig = Field(description="Config for KV store backend (SQLite only for now)")
embedding_model: str | None = Field(
default=None,
description="Optional default embedding model for this provider. If not specified, will use system default.",
)
embedding_dimension: int | None = Field(
default=None,
description="Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry.",
)
@classmethod
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
@ -26,4 +34,7 @@ class SQLiteVectorIOConfig(BaseModel):
__distro_dir__=__distro_dir__,
db_name="sqlite_vec_registry.db",
),
# Optional: Configure default embedding model for this provider
# "embedding_model": "all-MiniLM-L6-v2",
# "embedding_dimension": 384, # Only needed for variable-dimension models
}

View file

@ -6,12 +6,25 @@
from typing import Any
from pydantic import BaseModel
from pydantic import BaseModel, Field
class ChromaVectorIOConfig(BaseModel):
url: str | None
embedding_model: str | None = Field(
default=None,
description="Optional default embedding model for this provider. If not specified, will use system default.",
)
embedding_dimension: int | None = Field(
default=None,
description="Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry.",
)
@classmethod
def sample_run_config(cls, url: str = "${env.CHROMADB_URL}", **kwargs: Any) -> dict[str, Any]:
return {"url": url}
return {
"url": url,
# Optional: Configure default embedding model for this provider
# "embedding_model": "all-MiniLM-L6-v2",
# "embedding_dimension": 384, # Only needed for variable-dimension models
}

View file

@ -18,6 +18,14 @@ class MilvusVectorIOConfig(BaseModel):
token: str | None = Field(description="The token of the Milvus server")
consistency_level: str = Field(description="The consistency level of the Milvus server", default="Strong")
kvstore: KVStoreConfig = Field(description="Config for KV store backend")
embedding_model: str | None = Field(
default=None,
description="Optional default embedding model for this provider. If not specified, will use system default.",
)
embedding_dimension: int | None = Field(
default=None,
description="Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry.",
)
# This configuration allows additional fields to be passed through to the underlying Milvus client.
# See the [Milvus](https://milvus.io/docs/install-overview.md) documentation for more details about Milvus in general.
@ -32,4 +40,7 @@ class MilvusVectorIOConfig(BaseModel):
__distro_dir__=__distro_dir__,
db_name="milvus_remote_registry.db",
),
# Optional: Configure default embedding model for this provider
# "embedding_model": "all-MiniLM-L6-v2",
# "embedding_dimension": 384, # Only needed for variable-dimension models
}

View file

@ -23,13 +23,21 @@ class PGVectorVectorIOConfig(BaseModel):
user: str | None = Field(default="postgres")
password: str | None = Field(default="mysecretpassword")
kvstore: KVStoreConfig | None = Field(description="Config for KV store backend (SQLite only for now)", default=None)
embedding_model: str | None = Field(
default=None,
description="Optional default embedding model for this provider. If not specified, will use system default.",
)
embedding_dimension: int | None = Field(
default=None,
description="Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry.",
)
@classmethod
def sample_run_config(
cls,
__distro_dir__: str,
host: str = "${env.PGVECTOR_HOST:=localhost}",
port: int = "${env.PGVECTOR_PORT:=5432}",
port: int | str = "${env.PGVECTOR_PORT:=5432}",
db: str = "${env.PGVECTOR_DB}",
user: str = "${env.PGVECTOR_USER}",
password: str = "${env.PGVECTOR_PASSWORD}",
@ -45,4 +53,7 @@ class PGVectorVectorIOConfig(BaseModel):
__distro_dir__=__distro_dir__,
db_name="pgvector_registry.db",
),
# Optional: Configure default embedding model for this provider
# "embedding_model": "all-MiniLM-L6-v2",
# "embedding_dimension": 384, # Only needed for variable-dimension models
}

View file

@ -6,7 +6,7 @@
from typing import Any
from pydantic import BaseModel
from pydantic import BaseModel, Field
from llama_stack.schema_utils import json_schema_type
@ -23,9 +23,20 @@ class QdrantVectorIOConfig(BaseModel):
prefix: str | None = None
timeout: int | None = None
host: str | None = None
embedding_model: str | None = Field(
default=None,
description="Optional default embedding model for this provider. If not specified, will use system default.",
)
embedding_dimension: int | None = Field(
default=None,
description="Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry.",
)
@classmethod
def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]:
return {
"api_key": "${env.QDRANT_API_KEY}",
# Optional: Configure default embedding model for this provider
# "embedding_model": "all-MiniLM-L6-v2",
# "embedding_dimension": 384, # Only needed for variable-dimension models
}

View file

@ -21,6 +21,15 @@ class WeaviateRequestProviderData(BaseModel):
class WeaviateVectorIOConfig(BaseModel):
embedding_model: str | None = Field(
default=None,
description="Optional default embedding model for this provider. If not specified, will use system default.",
)
embedding_dimension: int | None = Field(
default=None,
description="Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry.",
)
@classmethod
def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
return {
@ -28,4 +37,7 @@ class WeaviateVectorIOConfig(BaseModel):
__distro_dir__=__distro_dir__,
db_name="weaviate_registry.db",
),
# Optional: Configure default embedding model for this provider
# "embedding_model": "all-MiniLM-L6-v2",
# "embedding_dimension": 384, # Only needed for variable-dimension models
}

View file

@ -0,0 +1,5 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.