From d55dd3e9a0e660d3985ddaa0d1e65363944342f8 Mon Sep 17 00:00:00 2001 From: skamenan7 Date: Thu, 17 Jul 2025 11:51:40 -0400 Subject: [PATCH] feat(vector-io): configurable embedding models for all providers (v2)\n\nAdds embedding_model and embedding_dimension fields to all VectorIOConfig classes.\nRouter respects provider defaults with fallback.\nIntroduces embedding_utils helper.\nComprehensive docs & samples.\nResolves #2729 --- docs/examples/sample_vector_io_config.yaml | 133 +++++++++++++ .../vector_io_embedding_configuration.md | 180 ++++++++++++++++++ .../providers/vector_io/inline_chromadb.md | 2 + .../providers/vector_io/inline_faiss.md | 2 + .../vector_io/inline_meta-reference.md | 2 + .../providers/vector_io/inline_milvus.md | 2 + .../providers/vector_io/inline_sqlite-vec.md | 2 + .../providers/vector_io/inline_sqlite_vec.md | 2 + .../providers/vector_io/remote_chromadb.md | 2 + .../providers/vector_io/remote_milvus.md | 2 + .../providers/vector_io/remote_pgvector.md | 2 + .../providers/vector_io/remote_qdrant.md | 2 + .../providers/vector_io/remote_weaviate.md | 7 + llama_stack/distribution/routers/vector_io.py | 31 ++- .../inline/vector_io/chroma/config.py | 17 +- .../inline/vector_io/faiss/config.py | 15 +- .../inline/vector_io/milvus/config.py | 11 ++ .../inline/vector_io/sqlite_vec/config.py | 11 ++ .../remote/vector_io/chroma/config.py | 17 +- .../remote/vector_io/milvus/config.py | 11 ++ .../remote/vector_io/pgvector/config.py | 13 +- .../remote/vector_io/qdrant/config.py | 13 +- .../remote/vector_io/weaviate/config.py | 12 ++ .../utils/vector_io/embedding_utils.py | 5 + 24 files changed, 482 insertions(+), 14 deletions(-) create mode 100644 docs/examples/sample_vector_io_config.yaml create mode 100644 docs/examples/vector_io_embedding_configuration.md create mode 100644 llama_stack/providers/utils/vector_io/embedding_utils.py diff --git a/docs/examples/sample_vector_io_config.yaml b/docs/examples/sample_vector_io_config.yaml new file mode 100644 index 000000000..b00cd0831 --- /dev/null +++ b/docs/examples/sample_vector_io_config.yaml @@ -0,0 +1,133 @@ +# Sample Vector IO Configuration with Configurable Embedding Models +# +# This example demonstrates how to configure embedding models for different vector IO providers. +# Each provider can have its own default embedding model and dimension configuration. + +# Vector IO providers with different embedding configurations +vector_io: + # Fast local search with lightweight embeddings + - provider_id: fast_local_search + provider_type: inline::faiss + config: + db_path: ~/.llama/distributions/together/faiss_fast.db + # Use lightweight embedding model for fast processing + embedding_model: "all-MiniLM-L6-v2" + embedding_dimension: 384 # Fixed dimension for this model + + # Compact storage with variable dimension embeddings + - provider_id: compact_storage + provider_type: inline::faiss + config: + db_path: ~/.llama/distributions/together/faiss_compact.db + # Use Matryoshka embeddings with custom dimension + embedding_model: "nomic-embed-text" + embedding_dimension: 256 # Reduced from default 768 for storage efficiency + + # High-quality persistent search + - provider_id: persistent_search + provider_type: inline::sqlite_vec + config: + db_path: ~/.llama/distributions/together/sqlite_vec.db + # Use high-quality embedding model + embedding_model: "sentence-transformers/all-mpnet-base-v2" + embedding_dimension: 768 # Full dimension for best quality + + # Remote Qdrant with cloud embeddings + - provider_id: cloud_search + provider_type: remote::qdrant + config: + api_key: "${env.QDRANT_API_KEY}" + url: "${env.QDRANT_URL}" + # Use OpenAI embeddings for cloud deployment + embedding_model: "text-embedding-3-small" + embedding_dimension: 1536 # OpenAI's default dimension + + # Remote ChromaDB without explicit embedding config (uses system default) + - provider_id: default_search + provider_type: remote::chroma + config: + host: "${env.CHROMA_HOST:=localhost}" + port: 8000 + # No embedding_model specified - will use system default from model registry + + # Milvus with production-grade configuration + - provider_id: production_search + provider_type: remote::milvus + config: + uri: "${env.MILVUS_ENDPOINT}" + token: "${env.MILVUS_TOKEN}" + kvstore: + type: sqlite + db_path: ~/.llama/distributions/together/milvus_registry.db + # High-performance embedding model for production + embedding_model: "text-embedding-3-large" + embedding_dimension: 3072 # Large dimension for maximum quality + +# Model registry - ensure embedding models are properly configured +models: + # Lightweight embedding model (384 dimensions) + - model_id: all-MiniLM-L6-v2 + provider_id: local_inference + provider_model_id: sentence-transformers/all-MiniLM-L6-v2 + model_type: embedding + metadata: + embedding_dimension: 384 + description: "Fast, lightweight embeddings for general use" + + # Matryoshka embedding model (variable dimensions) + - model_id: nomic-embed-text + provider_id: local_inference + provider_model_id: nomic-embed-text + model_type: embedding + metadata: + embedding_dimension: 768 # Default, can be overridden + description: "Flexible Matryoshka embeddings supporting variable dimensions" + + # High-quality embedding model (768 dimensions) + - model_id: sentence-transformers/all-mpnet-base-v2 + provider_id: local_inference + provider_model_id: sentence-transformers/all-mpnet-base-v2 + model_type: embedding + metadata: + embedding_dimension: 768 + description: "High-quality embeddings for semantic search" + + # OpenAI embedding models (for cloud usage) + - model_id: text-embedding-3-small + provider_id: openai_inference # Would need OpenAI provider configured + provider_model_id: text-embedding-3-small + model_type: embedding + metadata: + embedding_dimension: 1536 # Default OpenAI dimension + description: "OpenAI's efficient embedding model" + + - model_id: text-embedding-3-large + provider_id: openai_inference + provider_model_id: text-embedding-3-large + model_type: embedding + metadata: + embedding_dimension: 3072 # Large dimension for maximum quality + description: "OpenAI's highest quality embedding model" + +# Optional: Configure specific vector databases (will use provider defaults) +vector_dbs: + # Uses fast_local_search provider defaults (all-MiniLM-L6-v2, 384 dims) + - vector_db_id: general_docs + provider_id: fast_local_search + + # Uses compact_storage provider defaults (nomic-embed-text, 256 dims) + - vector_db_id: compressed_knowledge + provider_id: compact_storage + + # Uses persistent_search provider defaults (all-mpnet-base-v2, 768 dims) + - vector_db_id: semantic_library + provider_id: persistent_search + +# Server configuration +server: + host: 0.0.0.0 + port: 5000 + +# Logging configuration +logging: + level: INFO diff --git a/docs/examples/vector_io_embedding_configuration.md b/docs/examples/vector_io_embedding_configuration.md new file mode 100644 index 000000000..8c61d0cf4 --- /dev/null +++ b/docs/examples/vector_io_embedding_configuration.md @@ -0,0 +1,180 @@ +# Vector IO Embedding Model Configuration + +This guide explains how to configure embedding models for vector IO providers in Llama Stack, enabling you to use different embedding models for different use cases and optimize performance and storage requirements. + +## Overview + +Vector IO providers now support configurable embedding models at the provider level. This allows you to: + +- **Use different embedding models** for different vector databases based on your use case +- **Optimize for performance** with lightweight models for fast retrieval +- **Optimize for quality** with high-dimensional models for semantic search +- **Save storage space** with variable-dimension embeddings (Matryoshka embeddings) +- **Ensure consistency** with provider-level defaults + +## Configuration Options + +Each vector IO provider configuration can include: + +- `embedding_model`: The default embedding model ID to use for this provider +- `embedding_dimension`: Optional dimension override for models with variable dimensions + +## Priority Order + +The system uses the following priority order for embedding model selection: + +1. **Explicit API parameters** (highest priority) +2. **Provider configuration defaults** (new feature) +3. **System default** from model registry (fallback) + +## Example Configurations + +### Fast Local Search with Lightweight Embeddings + +```yaml +vector_io: + - provider_id: fast_search + provider_type: inline::faiss + config: + db_path: ~/.llama/faiss_fast.db + embedding_model: "all-MiniLM-L6-v2" # Fast, 384-dimensional + embedding_dimension: 384 +``` + +### High-Quality Semantic Search + +```yaml +vector_io: + - provider_id: quality_search + provider_type: inline::sqlite_vec + config: + db_path: ~/.llama/sqlite_quality.db + embedding_model: "sentence-transformers/all-mpnet-base-v2" # High quality, 768-dimensional + embedding_dimension: 768 +``` + +### Storage-Optimized with Matryoshka Embeddings + +```yaml +vector_io: + - provider_id: compact_search + provider_type: inline::faiss + config: + db_path: ~/.llama/faiss_compact.db + embedding_model: "nomic-embed-text" # Matryoshka model + embedding_dimension: 256 # Reduced from default 768 for storage efficiency +``` + +### Cloud Deployment with OpenAI Embeddings + +```yaml +vector_io: + - provider_id: cloud_search + provider_type: remote::qdrant + config: + api_key: "${env.QDRANT_API_KEY}" + url: "${env.QDRANT_URL}" + embedding_model: "text-embedding-3-small" + embedding_dimension: 1536 +``` + +## Model Registry Setup + +Ensure your embedding models are properly configured in the model registry: + +```yaml +models: + # Lightweight model + - model_id: all-MiniLM-L6-v2 + provider_id: local_inference + provider_model_id: sentence-transformers/all-MiniLM-L6-v2 + model_type: embedding + metadata: + embedding_dimension: 384 + description: "Fast, lightweight embeddings" + + # High-quality model + - model_id: sentence-transformers/all-mpnet-base-v2 + provider_id: local_inference + provider_model_id: sentence-transformers/all-mpnet-base-v2 + model_type: embedding + metadata: + embedding_dimension: 768 + description: "High-quality embeddings" + + # Matryoshka model + - model_id: nomic-embed-text + provider_id: local_inference + provider_model_id: nomic-embed-text + model_type: embedding + metadata: + embedding_dimension: 768 # Default dimension + description: "Variable-dimension Matryoshka embeddings" +``` + +## Use Cases + +### Multi-Environment Setup + +Configure different providers for different environments: + +```yaml +vector_io: + # Development - fast, lightweight + - provider_id: dev_search + provider_type: inline::faiss + config: + db_path: ~/.llama/dev_faiss.db + embedding_model: "all-MiniLM-L6-v2" + embedding_dimension: 384 + + # Production - high quality, scalable + - provider_id: prod_search + provider_type: remote::qdrant + config: + api_key: "${env.QDRANT_API_KEY}" + embedding_model: "text-embedding-3-large" + embedding_dimension: 3072 +``` + +### Domain-Specific Models + +Use different models for different content types: + +```yaml +vector_io: + # Code search - specialized model + - provider_id: code_search + provider_type: inline::sqlite_vec + config: + db_path: ~/.llama/code_vectors.db + embedding_model: "microsoft/codebert-base" + embedding_dimension: 768 + + # General documents - general-purpose model + - provider_id: doc_search + provider_type: inline::sqlite_vec + config: + db_path: ~/.llama/doc_vectors.db + embedding_model: "all-mpnet-base-v2" + embedding_dimension: 768 +``` + +## Backward Compatibility + +If no embedding model is specified in the provider configuration, the system will fall back to the existing behavior of using the first available embedding model from the model registry. + +## Supported Providers + +The configurable embedding models feature is supported by: + +- **Inline providers**: Faiss, SQLite-vec, Milvus, ChromaDB, Qdrant +- **Remote providers**: Qdrant, Milvus, ChromaDB, PGVector, Weaviate + +## Best Practices + +1. **Match dimensions**: Ensure `embedding_dimension` matches your model's output +2. **Use variable dimensions wisely**: Only override dimensions for Matryoshka models that support it +3. **Consider performance trade-offs**: Smaller dimensions = faster search, larger = better quality +4. **Test configurations**: Validate your setup with sample queries before production use +5. **Document your choices**: Comment your configurations to explain model selection rationale \ No newline at end of file diff --git a/docs/source/providers/vector_io/inline_chromadb.md b/docs/source/providers/vector_io/inline_chromadb.md index 172215414..c4f6bb166 100644 --- a/docs/source/providers/vector_io/inline_chromadb.md +++ b/docs/source/providers/vector_io/inline_chromadb.md @@ -42,6 +42,8 @@ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introducti | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `db_path` | `` | No | PydanticUndefined | | +| `embedding_model` | `str \| None` | No | | Optional default embedding model for this provider. If not specified, will use system default. | +| `embedding_dimension` | `int \| None` | No | | Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry. | ## Sample Configuration diff --git a/docs/source/providers/vector_io/inline_faiss.md b/docs/source/providers/vector_io/inline_faiss.md index bcff66f3f..6f4a88561 100644 --- a/docs/source/providers/vector_io/inline_faiss.md +++ b/docs/source/providers/vector_io/inline_faiss.md @@ -38,6 +38,8 @@ more details about Faiss in general. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | | +| `embedding_model` | `str \| None` | No | | Optional default embedding model for this provider. If not specified, will use system default. | +| `embedding_dimension` | `int \| None` | No | | Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry. | ## Sample Configuration diff --git a/docs/source/providers/vector_io/inline_meta-reference.md b/docs/source/providers/vector_io/inline_meta-reference.md index 0aac445bd..4d05869a6 100644 --- a/docs/source/providers/vector_io/inline_meta-reference.md +++ b/docs/source/providers/vector_io/inline_meta-reference.md @@ -9,6 +9,8 @@ Meta's reference implementation of a vector database. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | | +| `embedding_model` | `str \| None` | No | | Optional default embedding model for this provider. If not specified, will use system default. | +| `embedding_dimension` | `int \| None` | No | | Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry. | ## Sample Configuration diff --git a/docs/source/providers/vector_io/inline_milvus.md b/docs/source/providers/vector_io/inline_milvus.md index 3b3aad3fc..bd6e033ce 100644 --- a/docs/source/providers/vector_io/inline_milvus.md +++ b/docs/source/providers/vector_io/inline_milvus.md @@ -13,6 +13,8 @@ Please refer to the remote provider documentation. | `db_path` | `` | No | PydanticUndefined | | | `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend (SQLite only for now) | | `consistency_level` | `` | No | Strong | The consistency level of the Milvus server | +| `embedding_model` | `str \| None` | No | | Optional default embedding model for this provider. If not specified, will use system default. | +| `embedding_dimension` | `int \| None` | No | | Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry. | ## Sample Configuration diff --git a/docs/source/providers/vector_io/inline_sqlite-vec.md b/docs/source/providers/vector_io/inline_sqlite-vec.md index ae7c45b21..71c9af876 100644 --- a/docs/source/providers/vector_io/inline_sqlite-vec.md +++ b/docs/source/providers/vector_io/inline_sqlite-vec.md @@ -207,6 +207,8 @@ See [sqlite-vec's GitHub repo](https://github.com/asg017/sqlite-vec/tree/main) f |-------|------|----------|---------|-------------| | `db_path` | `` | No | PydanticUndefined | Path to the SQLite database file | | `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend (SQLite only for now) | +| `embedding_model` | `str \| None` | No | | Optional default embedding model for this provider. If not specified, will use system default. | +| `embedding_dimension` | `int \| None` | No | | Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry. | ## Sample Configuration diff --git a/docs/source/providers/vector_io/inline_sqlite_vec.md b/docs/source/providers/vector_io/inline_sqlite_vec.md index 7e14bb8bd..c553b636f 100644 --- a/docs/source/providers/vector_io/inline_sqlite_vec.md +++ b/docs/source/providers/vector_io/inline_sqlite_vec.md @@ -12,6 +12,8 @@ Please refer to the sqlite-vec provider documentation. |-------|------|----------|---------|-------------| | `db_path` | `` | No | PydanticUndefined | Path to the SQLite database file | | `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend (SQLite only for now) | +| `embedding_model` | `str \| None` | No | | Optional default embedding model for this provider. If not specified, will use system default. | +| `embedding_dimension` | `int \| None` | No | | Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry. | ## Sample Configuration diff --git a/docs/source/providers/vector_io/remote_chromadb.md b/docs/source/providers/vector_io/remote_chromadb.md index cc1dcc4d1..1946cfaec 100644 --- a/docs/source/providers/vector_io/remote_chromadb.md +++ b/docs/source/providers/vector_io/remote_chromadb.md @@ -41,6 +41,8 @@ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introducti | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `url` | `str \| None` | No | PydanticUndefined | | +| `embedding_model` | `str \| None` | No | | Optional default embedding model for this provider. If not specified, will use system default. | +| `embedding_dimension` | `int \| None` | No | | Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry. | ## Sample Configuration diff --git a/docs/source/providers/vector_io/remote_milvus.md b/docs/source/providers/vector_io/remote_milvus.md index 6734d8315..b1a640a61 100644 --- a/docs/source/providers/vector_io/remote_milvus.md +++ b/docs/source/providers/vector_io/remote_milvus.md @@ -115,6 +115,8 @@ For more details on TLS configuration, refer to the [TLS setup guide](https://mi | `token` | `str \| None` | No | PydanticUndefined | The token of the Milvus server | | `consistency_level` | `` | No | Strong | The consistency level of the Milvus server | | `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend | +| `embedding_model` | `str \| None` | No | | Optional default embedding model for this provider. If not specified, will use system default. | +| `embedding_dimension` | `int \| None` | No | | Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry. | | `config` | `dict` | No | {} | This configuration allows additional fields to be passed through to the underlying Milvus client. See the [Milvus](https://milvus.io/docs/install-overview.md) documentation for more details about Milvus in general. | > **Note**: This configuration class accepts additional fields beyond those listed above. You can pass any additional configuration options that will be forwarded to the underlying provider. diff --git a/docs/source/providers/vector_io/remote_pgvector.md b/docs/source/providers/vector_io/remote_pgvector.md index 3e7d6e776..0eecbc862 100644 --- a/docs/source/providers/vector_io/remote_pgvector.md +++ b/docs/source/providers/vector_io/remote_pgvector.md @@ -41,6 +41,8 @@ See [PGVector's documentation](https://github.com/pgvector/pgvector) for more de | `user` | `str \| None` | No | postgres | | | `password` | `str \| None` | No | mysecretpassword | | | `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig, annotation=NoneType, required=False, default='sqlite', discriminator='type'` | No | | Config for KV store backend (SQLite only for now) | +| `embedding_model` | `str \| None` | No | | Optional default embedding model for this provider. If not specified, will use system default. | +| `embedding_dimension` | `int \| None` | No | | Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry. | ## Sample Configuration diff --git a/docs/source/providers/vector_io/remote_qdrant.md b/docs/source/providers/vector_io/remote_qdrant.md index 14c821f35..66818d382 100644 --- a/docs/source/providers/vector_io/remote_qdrant.md +++ b/docs/source/providers/vector_io/remote_qdrant.md @@ -20,6 +20,8 @@ Please refer to the inline provider documentation. | `prefix` | `str \| None` | No | | | | `timeout` | `int \| None` | No | | | | `host` | `str \| None` | No | | | +| `embedding_model` | `str \| None` | No | | Optional default embedding model for this provider. If not specified, will use system default. | +| `embedding_dimension` | `int \| None` | No | | Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry. | ## Sample Configuration diff --git a/docs/source/providers/vector_io/remote_weaviate.md b/docs/source/providers/vector_io/remote_weaviate.md index d930515d5..dede5ff16 100644 --- a/docs/source/providers/vector_io/remote_weaviate.md +++ b/docs/source/providers/vector_io/remote_weaviate.md @@ -33,6 +33,13 @@ To install Weaviate see the [Weaviate quickstart documentation](https://weaviate See [Weaviate's documentation](https://weaviate.io/developers/weaviate) for more details about Weaviate in general. +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `embedding_model` | `str \| None` | No | | Optional default embedding model for this provider. If not specified, will use system default. | +| `embedding_dimension` | `int \| None` | No | | Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry. | + ## Sample Configuration ```yaml diff --git a/llama_stack/distribution/routers/vector_io.py b/llama_stack/distribution/routers/vector_io.py index a1dd66060..1c07d37af 100644 --- a/llama_stack/distribution/routers/vector_io.py +++ b/llama_stack/distribution/routers/vector_io.py @@ -129,13 +129,32 @@ class VectorIORouter(VectorIO): ) -> VectorStoreObject: logger.debug(f"VectorIORouter.openai_create_vector_store: name={name}, provider_id={provider_id}") - # If no embedding model is provided, use the first available one + # If no embedding model is provided, try provider defaults then fallback if embedding_model is None: - embedding_model_info = await self._get_first_embedding_model() - if embedding_model_info is None: - raise ValueError("No embedding model provided and no embedding models available in the system") - embedding_model, embedding_dimension = embedding_model_info - logger.info(f"No embedding model specified, using first available: {embedding_model}") + # Try to get provider-specific embedding model configuration + if provider_id: + try: + provider_impl = self.routing_table.get_provider_impl(provider_id) + provider_config = getattr(provider_impl, "config", None) + + if provider_config: + if hasattr(provider_config, "embedding_model") and provider_config.embedding_model: + embedding_model = provider_config.embedding_model + logger.info(f"Using provider config default embedding model: {embedding_model}") + + if hasattr(provider_config, "embedding_dimension") and provider_config.embedding_dimension: + embedding_dimension = provider_config.embedding_dimension + logger.info(f"Using provider config embedding dimension: {embedding_dimension}") + except Exception as e: + logger.debug(f"Could not get provider config for {provider_id}: {e}") + + # If still no embedding model, use system fallback + if embedding_model is None: + embedding_model_info = await self._get_first_embedding_model() + if embedding_model_info is None: + raise ValueError("No embedding model provided and no embedding models available in the system") + embedding_model, embedding_dimension = embedding_model_info + logger.info(f"No embedding model specified, using first available: {embedding_model}") vector_db_id = f"vs_{uuid.uuid4()}" registered_vector_db = await self.routing_table.register_vector_db( diff --git a/llama_stack/providers/inline/vector_io/chroma/config.py b/llama_stack/providers/inline/vector_io/chroma/config.py index 81e2f289e..d50740537 100644 --- a/llama_stack/providers/inline/vector_io/chroma/config.py +++ b/llama_stack/providers/inline/vector_io/chroma/config.py @@ -6,12 +6,25 @@ from typing import Any -from pydantic import BaseModel +from pydantic import BaseModel, Field class ChromaVectorIOConfig(BaseModel): db_path: str + embedding_model: str | None = Field( + default=None, + description="Optional default embedding model for this provider. If not specified, will use system default.", + ) + embedding_dimension: int | None = Field( + default=None, + description="Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry.", + ) @classmethod def sample_run_config(cls, db_path: str = "${env.CHROMADB_PATH}", **kwargs: Any) -> dict[str, Any]: - return {"db_path": db_path} + return { + "db_path": db_path, + # Optional: Configure default embedding model for this provider + # "embedding_model": "all-MiniLM-L6-v2", + # "embedding_dimension": 384, # Only needed for variable-dimension models + } diff --git a/llama_stack/providers/inline/vector_io/faiss/config.py b/llama_stack/providers/inline/vector_io/faiss/config.py index cbcbb1762..67ad59694 100644 --- a/llama_stack/providers/inline/vector_io/faiss/config.py +++ b/llama_stack/providers/inline/vector_io/faiss/config.py @@ -6,7 +6,7 @@ from typing import Any -from pydantic import BaseModel +from pydantic import BaseModel, Field from llama_stack.providers.utils.kvstore.config import ( KVStoreConfig, @@ -18,6 +18,14 @@ from llama_stack.schema_utils import json_schema_type @json_schema_type class FaissVectorIOConfig(BaseModel): kvstore: KVStoreConfig + embedding_model: str | None = Field( + default=None, + description="Optional default embedding model for this provider. If not specified, will use system default.", + ) + embedding_dimension: int | None = Field( + default=None, + description="Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry.", + ) @classmethod def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]: @@ -25,5 +33,8 @@ class FaissVectorIOConfig(BaseModel): "kvstore": SqliteKVStoreConfig.sample_run_config( __distro_dir__=__distro_dir__, db_name="faiss_store.db", - ) + ), + # Optional: Configure default embedding model for this provider + # "embedding_model": "all-MiniLM-L6-v2", + # "embedding_dimension": 384, # Only needed for variable-dimension models } diff --git a/llama_stack/providers/inline/vector_io/milvus/config.py b/llama_stack/providers/inline/vector_io/milvus/config.py index 8cbd056be..aceb7a6c1 100644 --- a/llama_stack/providers/inline/vector_io/milvus/config.py +++ b/llama_stack/providers/inline/vector_io/milvus/config.py @@ -20,6 +20,14 @@ class MilvusVectorIOConfig(BaseModel): db_path: str kvstore: KVStoreConfig = Field(description="Config for KV store backend (SQLite only for now)") consistency_level: str = Field(description="The consistency level of the Milvus server", default="Strong") + embedding_model: str | None = Field( + default=None, + description="Optional default embedding model for this provider. If not specified, will use system default.", + ) + embedding_dimension: int | None = Field( + default=None, + description="Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry.", + ) @classmethod def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]: @@ -29,4 +37,7 @@ class MilvusVectorIOConfig(BaseModel): __distro_dir__=__distro_dir__, db_name="milvus_registry.db", ), + # Optional: Configure default embedding model for this provider + # "embedding_model": "all-MiniLM-L6-v2", + # "embedding_dimension": 384, # Only needed for variable-dimension models } diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/config.py b/llama_stack/providers/inline/vector_io/sqlite_vec/config.py index 525ed4b1f..f0876c8ab 100644 --- a/llama_stack/providers/inline/vector_io/sqlite_vec/config.py +++ b/llama_stack/providers/inline/vector_io/sqlite_vec/config.py @@ -17,6 +17,14 @@ from llama_stack.providers.utils.kvstore.config import ( class SQLiteVectorIOConfig(BaseModel): db_path: str = Field(description="Path to the SQLite database file") kvstore: KVStoreConfig = Field(description="Config for KV store backend (SQLite only for now)") + embedding_model: str | None = Field( + default=None, + description="Optional default embedding model for this provider. If not specified, will use system default.", + ) + embedding_dimension: int | None = Field( + default=None, + description="Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry.", + ) @classmethod def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: @@ -26,4 +34,7 @@ class SQLiteVectorIOConfig(BaseModel): __distro_dir__=__distro_dir__, db_name="sqlite_vec_registry.db", ), + # Optional: Configure default embedding model for this provider + # "embedding_model": "all-MiniLM-L6-v2", + # "embedding_dimension": 384, # Only needed for variable-dimension models } diff --git a/llama_stack/providers/remote/vector_io/chroma/config.py b/llama_stack/providers/remote/vector_io/chroma/config.py index bd11d5f8c..a6f1b9386 100644 --- a/llama_stack/providers/remote/vector_io/chroma/config.py +++ b/llama_stack/providers/remote/vector_io/chroma/config.py @@ -6,12 +6,25 @@ from typing import Any -from pydantic import BaseModel +from pydantic import BaseModel, Field class ChromaVectorIOConfig(BaseModel): url: str | None + embedding_model: str | None = Field( + default=None, + description="Optional default embedding model for this provider. If not specified, will use system default.", + ) + embedding_dimension: int | None = Field( + default=None, + description="Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry.", + ) @classmethod def sample_run_config(cls, url: str = "${env.CHROMADB_URL}", **kwargs: Any) -> dict[str, Any]: - return {"url": url} + return { + "url": url, + # Optional: Configure default embedding model for this provider + # "embedding_model": "all-MiniLM-L6-v2", + # "embedding_dimension": 384, # Only needed for variable-dimension models + } diff --git a/llama_stack/providers/remote/vector_io/milvus/config.py b/llama_stack/providers/remote/vector_io/milvus/config.py index 899d3678d..4e26112b1 100644 --- a/llama_stack/providers/remote/vector_io/milvus/config.py +++ b/llama_stack/providers/remote/vector_io/milvus/config.py @@ -18,6 +18,14 @@ class MilvusVectorIOConfig(BaseModel): token: str | None = Field(description="The token of the Milvus server") consistency_level: str = Field(description="The consistency level of the Milvus server", default="Strong") kvstore: KVStoreConfig = Field(description="Config for KV store backend") + embedding_model: str | None = Field( + default=None, + description="Optional default embedding model for this provider. If not specified, will use system default.", + ) + embedding_dimension: int | None = Field( + default=None, + description="Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry.", + ) # This configuration allows additional fields to be passed through to the underlying Milvus client. # See the [Milvus](https://milvus.io/docs/install-overview.md) documentation for more details about Milvus in general. @@ -32,4 +40,7 @@ class MilvusVectorIOConfig(BaseModel): __distro_dir__=__distro_dir__, db_name="milvus_remote_registry.db", ), + # Optional: Configure default embedding model for this provider + # "embedding_model": "all-MiniLM-L6-v2", + # "embedding_dimension": 384, # Only needed for variable-dimension models } diff --git a/llama_stack/providers/remote/vector_io/pgvector/config.py b/llama_stack/providers/remote/vector_io/pgvector/config.py index 334cbe5be..0bcfbfa7a 100644 --- a/llama_stack/providers/remote/vector_io/pgvector/config.py +++ b/llama_stack/providers/remote/vector_io/pgvector/config.py @@ -23,13 +23,21 @@ class PGVectorVectorIOConfig(BaseModel): user: str | None = Field(default="postgres") password: str | None = Field(default="mysecretpassword") kvstore: KVStoreConfig | None = Field(description="Config for KV store backend (SQLite only for now)", default=None) + embedding_model: str | None = Field( + default=None, + description="Optional default embedding model for this provider. If not specified, will use system default.", + ) + embedding_dimension: int | None = Field( + default=None, + description="Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry.", + ) @classmethod def sample_run_config( cls, __distro_dir__: str, host: str = "${env.PGVECTOR_HOST:=localhost}", - port: int = "${env.PGVECTOR_PORT:=5432}", + port: int | str = "${env.PGVECTOR_PORT:=5432}", db: str = "${env.PGVECTOR_DB}", user: str = "${env.PGVECTOR_USER}", password: str = "${env.PGVECTOR_PASSWORD}", @@ -45,4 +53,7 @@ class PGVectorVectorIOConfig(BaseModel): __distro_dir__=__distro_dir__, db_name="pgvector_registry.db", ), + # Optional: Configure default embedding model for this provider + # "embedding_model": "all-MiniLM-L6-v2", + # "embedding_dimension": 384, # Only needed for variable-dimension models } diff --git a/llama_stack/providers/remote/vector_io/qdrant/config.py b/llama_stack/providers/remote/vector_io/qdrant/config.py index 314d3f5f1..ac80bbc49 100644 --- a/llama_stack/providers/remote/vector_io/qdrant/config.py +++ b/llama_stack/providers/remote/vector_io/qdrant/config.py @@ -6,7 +6,7 @@ from typing import Any -from pydantic import BaseModel +from pydantic import BaseModel, Field from llama_stack.schema_utils import json_schema_type @@ -23,9 +23,20 @@ class QdrantVectorIOConfig(BaseModel): prefix: str | None = None timeout: int | None = None host: str | None = None + embedding_model: str | None = Field( + default=None, + description="Optional default embedding model for this provider. If not specified, will use system default.", + ) + embedding_dimension: int | None = Field( + default=None, + description="Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry.", + ) @classmethod def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]: return { "api_key": "${env.QDRANT_API_KEY}", + # Optional: Configure default embedding model for this provider + # "embedding_model": "all-MiniLM-L6-v2", + # "embedding_dimension": 384, # Only needed for variable-dimension models } diff --git a/llama_stack/providers/remote/vector_io/weaviate/config.py b/llama_stack/providers/remote/vector_io/weaviate/config.py index 4283b8d3b..a5aca2f34 100644 --- a/llama_stack/providers/remote/vector_io/weaviate/config.py +++ b/llama_stack/providers/remote/vector_io/weaviate/config.py @@ -21,6 +21,15 @@ class WeaviateRequestProviderData(BaseModel): class WeaviateVectorIOConfig(BaseModel): + embedding_model: str | None = Field( + default=None, + description="Optional default embedding model for this provider. If not specified, will use system default.", + ) + embedding_dimension: int | None = Field( + default=None, + description="Optional embedding dimension override. Only needed for models with variable dimensions (e.g., Matryoshka embeddings). If not specified, will auto-lookup from model registry.", + ) + @classmethod def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]: return { @@ -28,4 +37,7 @@ class WeaviateVectorIOConfig(BaseModel): __distro_dir__=__distro_dir__, db_name="weaviate_registry.db", ), + # Optional: Configure default embedding model for this provider + # "embedding_model": "all-MiniLM-L6-v2", + # "embedding_dimension": 384, # Only needed for variable-dimension models } diff --git a/llama_stack/providers/utils/vector_io/embedding_utils.py b/llama_stack/providers/utils/vector_io/embedding_utils.py new file mode 100644 index 000000000..756f351d8 --- /dev/null +++ b/llama_stack/providers/utils/vector_io/embedding_utils.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree.