mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-23 09:02:27 +00:00
feat(vector-io): configurable embedding models for all providers (v2)\n\nAdds embedding_model and embedding_dimension fields to all VectorIOConfig classes.\nRouter respects provider defaults with fallback.\nIntroduces embedding_utils helper.\nComprehensive docs & samples.\nResolves #2729
This commit is contained in:
parent
c8f274347d
commit
d55dd3e9a0
24 changed files with 482 additions and 14 deletions
133
docs/examples/sample_vector_io_config.yaml
Normal file
133
docs/examples/sample_vector_io_config.yaml
Normal file
|
|
@ -0,0 +1,133 @@
|
|||
# Sample Vector IO Configuration with Configurable Embedding Models
|
||||
#
|
||||
# This example demonstrates how to configure embedding models for different vector IO providers.
|
||||
# Each provider can have its own default embedding model and dimension configuration.
|
||||
|
||||
# Vector IO providers with different embedding configurations
|
||||
vector_io:
|
||||
# Fast local search with lightweight embeddings
|
||||
- provider_id: fast_local_search
|
||||
provider_type: inline::faiss
|
||||
config:
|
||||
db_path: ~/.llama/distributions/together/faiss_fast.db
|
||||
# Use lightweight embedding model for fast processing
|
||||
embedding_model: "all-MiniLM-L6-v2"
|
||||
embedding_dimension: 384 # Fixed dimension for this model
|
||||
|
||||
# Compact storage with variable dimension embeddings
|
||||
- provider_id: compact_storage
|
||||
provider_type: inline::faiss
|
||||
config:
|
||||
db_path: ~/.llama/distributions/together/faiss_compact.db
|
||||
# Use Matryoshka embeddings with custom dimension
|
||||
embedding_model: "nomic-embed-text"
|
||||
embedding_dimension: 256 # Reduced from default 768 for storage efficiency
|
||||
|
||||
# High-quality persistent search
|
||||
- provider_id: persistent_search
|
||||
provider_type: inline::sqlite_vec
|
||||
config:
|
||||
db_path: ~/.llama/distributions/together/sqlite_vec.db
|
||||
# Use high-quality embedding model
|
||||
embedding_model: "sentence-transformers/all-mpnet-base-v2"
|
||||
embedding_dimension: 768 # Full dimension for best quality
|
||||
|
||||
# Remote Qdrant with cloud embeddings
|
||||
- provider_id: cloud_search
|
||||
provider_type: remote::qdrant
|
||||
config:
|
||||
api_key: "${env.QDRANT_API_KEY}"
|
||||
url: "${env.QDRANT_URL}"
|
||||
# Use OpenAI embeddings for cloud deployment
|
||||
embedding_model: "text-embedding-3-small"
|
||||
embedding_dimension: 1536 # OpenAI's default dimension
|
||||
|
||||
# Remote ChromaDB without explicit embedding config (uses system default)
|
||||
- provider_id: default_search
|
||||
provider_type: remote::chroma
|
||||
config:
|
||||
host: "${env.CHROMA_HOST:=localhost}"
|
||||
port: 8000
|
||||
# No embedding_model specified - will use system default from model registry
|
||||
|
||||
# Milvus with production-grade configuration
|
||||
- provider_id: production_search
|
||||
provider_type: remote::milvus
|
||||
config:
|
||||
uri: "${env.MILVUS_ENDPOINT}"
|
||||
token: "${env.MILVUS_TOKEN}"
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ~/.llama/distributions/together/milvus_registry.db
|
||||
# High-performance embedding model for production
|
||||
embedding_model: "text-embedding-3-large"
|
||||
embedding_dimension: 3072 # Large dimension for maximum quality
|
||||
|
||||
# Model registry - ensure embedding models are properly configured
|
||||
models:
|
||||
# Lightweight embedding model (384 dimensions)
|
||||
- model_id: all-MiniLM-L6-v2
|
||||
provider_id: local_inference
|
||||
provider_model_id: sentence-transformers/all-MiniLM-L6-v2
|
||||
model_type: embedding
|
||||
metadata:
|
||||
embedding_dimension: 384
|
||||
description: "Fast, lightweight embeddings for general use"
|
||||
|
||||
# Matryoshka embedding model (variable dimensions)
|
||||
- model_id: nomic-embed-text
|
||||
provider_id: local_inference
|
||||
provider_model_id: nomic-embed-text
|
||||
model_type: embedding
|
||||
metadata:
|
||||
embedding_dimension: 768 # Default, can be overridden
|
||||
description: "Flexible Matryoshka embeddings supporting variable dimensions"
|
||||
|
||||
# High-quality embedding model (768 dimensions)
|
||||
- model_id: sentence-transformers/all-mpnet-base-v2
|
||||
provider_id: local_inference
|
||||
provider_model_id: sentence-transformers/all-mpnet-base-v2
|
||||
model_type: embedding
|
||||
metadata:
|
||||
embedding_dimension: 768
|
||||
description: "High-quality embeddings for semantic search"
|
||||
|
||||
# OpenAI embedding models (for cloud usage)
|
||||
- model_id: text-embedding-3-small
|
||||
provider_id: openai_inference # Would need OpenAI provider configured
|
||||
provider_model_id: text-embedding-3-small
|
||||
model_type: embedding
|
||||
metadata:
|
||||
embedding_dimension: 1536 # Default OpenAI dimension
|
||||
description: "OpenAI's efficient embedding model"
|
||||
|
||||
- model_id: text-embedding-3-large
|
||||
provider_id: openai_inference
|
||||
provider_model_id: text-embedding-3-large
|
||||
model_type: embedding
|
||||
metadata:
|
||||
embedding_dimension: 3072 # Large dimension for maximum quality
|
||||
description: "OpenAI's highest quality embedding model"
|
||||
|
||||
# Optional: Configure specific vector databases (will use provider defaults)
|
||||
vector_dbs:
|
||||
# Uses fast_local_search provider defaults (all-MiniLM-L6-v2, 384 dims)
|
||||
- vector_db_id: general_docs
|
||||
provider_id: fast_local_search
|
||||
|
||||
# Uses compact_storage provider defaults (nomic-embed-text, 256 dims)
|
||||
- vector_db_id: compressed_knowledge
|
||||
provider_id: compact_storage
|
||||
|
||||
# Uses persistent_search provider defaults (all-mpnet-base-v2, 768 dims)
|
||||
- vector_db_id: semantic_library
|
||||
provider_id: persistent_search
|
||||
|
||||
# Server configuration
|
||||
server:
|
||||
host: 0.0.0.0
|
||||
port: 5000
|
||||
|
||||
# Logging configuration
|
||||
logging:
|
||||
level: INFO
|
||||
180
docs/examples/vector_io_embedding_configuration.md
Normal file
180
docs/examples/vector_io_embedding_configuration.md
Normal file
|
|
@ -0,0 +1,180 @@
|
|||
# Vector IO Embedding Model Configuration
|
||||
|
||||
This guide explains how to configure embedding models for vector IO providers in Llama Stack, enabling you to use different embedding models for different use cases and optimize performance and storage requirements.
|
||||
|
||||
## Overview
|
||||
|
||||
Vector IO providers now support configurable embedding models at the provider level. This allows you to:
|
||||
|
||||
- **Use different embedding models** for different vector databases based on your use case
|
||||
- **Optimize for performance** with lightweight models for fast retrieval
|
||||
- **Optimize for quality** with high-dimensional models for semantic search
|
||||
- **Save storage space** with variable-dimension embeddings (Matryoshka embeddings)
|
||||
- **Ensure consistency** with provider-level defaults
|
||||
|
||||
## Configuration Options
|
||||
|
||||
Each vector IO provider configuration can include:
|
||||
|
||||
- `embedding_model`: The default embedding model ID to use for this provider
|
||||
- `embedding_dimension`: Optional dimension override for models with variable dimensions
|
||||
|
||||
## Priority Order
|
||||
|
||||
The system uses the following priority order for embedding model selection:
|
||||
|
||||
1. **Explicit API parameters** (highest priority)
|
||||
2. **Provider configuration defaults** (new feature)
|
||||
3. **System default** from model registry (fallback)
|
||||
|
||||
## Example Configurations
|
||||
|
||||
### Fast Local Search with Lightweight Embeddings
|
||||
|
||||
```yaml
|
||||
vector_io:
|
||||
- provider_id: fast_search
|
||||
provider_type: inline::faiss
|
||||
config:
|
||||
db_path: ~/.llama/faiss_fast.db
|
||||
embedding_model: "all-MiniLM-L6-v2" # Fast, 384-dimensional
|
||||
embedding_dimension: 384
|
||||
```
|
||||
|
||||
### High-Quality Semantic Search
|
||||
|
||||
```yaml
|
||||
vector_io:
|
||||
- provider_id: quality_search
|
||||
provider_type: inline::sqlite_vec
|
||||
config:
|
||||
db_path: ~/.llama/sqlite_quality.db
|
||||
embedding_model: "sentence-transformers/all-mpnet-base-v2" # High quality, 768-dimensional
|
||||
embedding_dimension: 768
|
||||
```
|
||||
|
||||
### Storage-Optimized with Matryoshka Embeddings
|
||||
|
||||
```yaml
|
||||
vector_io:
|
||||
- provider_id: compact_search
|
||||
provider_type: inline::faiss
|
||||
config:
|
||||
db_path: ~/.llama/faiss_compact.db
|
||||
embedding_model: "nomic-embed-text" # Matryoshka model
|
||||
embedding_dimension: 256 # Reduced from default 768 for storage efficiency
|
||||
```
|
||||
|
||||
### Cloud Deployment with OpenAI Embeddings
|
||||
|
||||
```yaml
|
||||
vector_io:
|
||||
- provider_id: cloud_search
|
||||
provider_type: remote::qdrant
|
||||
config:
|
||||
api_key: "${env.QDRANT_API_KEY}"
|
||||
url: "${env.QDRANT_URL}"
|
||||
embedding_model: "text-embedding-3-small"
|
||||
embedding_dimension: 1536
|
||||
```
|
||||
|
||||
## Model Registry Setup
|
||||
|
||||
Ensure your embedding models are properly configured in the model registry:
|
||||
|
||||
```yaml
|
||||
models:
|
||||
# Lightweight model
|
||||
- model_id: all-MiniLM-L6-v2
|
||||
provider_id: local_inference
|
||||
provider_model_id: sentence-transformers/all-MiniLM-L6-v2
|
||||
model_type: embedding
|
||||
metadata:
|
||||
embedding_dimension: 384
|
||||
description: "Fast, lightweight embeddings"
|
||||
|
||||
# High-quality model
|
||||
- model_id: sentence-transformers/all-mpnet-base-v2
|
||||
provider_id: local_inference
|
||||
provider_model_id: sentence-transformers/all-mpnet-base-v2
|
||||
model_type: embedding
|
||||
metadata:
|
||||
embedding_dimension: 768
|
||||
description: "High-quality embeddings"
|
||||
|
||||
# Matryoshka model
|
||||
- model_id: nomic-embed-text
|
||||
provider_id: local_inference
|
||||
provider_model_id: nomic-embed-text
|
||||
model_type: embedding
|
||||
metadata:
|
||||
embedding_dimension: 768 # Default dimension
|
||||
description: "Variable-dimension Matryoshka embeddings"
|
||||
```
|
||||
|
||||
## Use Cases
|
||||
|
||||
### Multi-Environment Setup
|
||||
|
||||
Configure different providers for different environments:
|
||||
|
||||
```yaml
|
||||
vector_io:
|
||||
# Development - fast, lightweight
|
||||
- provider_id: dev_search
|
||||
provider_type: inline::faiss
|
||||
config:
|
||||
db_path: ~/.llama/dev_faiss.db
|
||||
embedding_model: "all-MiniLM-L6-v2"
|
||||
embedding_dimension: 384
|
||||
|
||||
# Production - high quality, scalable
|
||||
- provider_id: prod_search
|
||||
provider_type: remote::qdrant
|
||||
config:
|
||||
api_key: "${env.QDRANT_API_KEY}"
|
||||
embedding_model: "text-embedding-3-large"
|
||||
embedding_dimension: 3072
|
||||
```
|
||||
|
||||
### Domain-Specific Models
|
||||
|
||||
Use different models for different content types:
|
||||
|
||||
```yaml
|
||||
vector_io:
|
||||
# Code search - specialized model
|
||||
- provider_id: code_search
|
||||
provider_type: inline::sqlite_vec
|
||||
config:
|
||||
db_path: ~/.llama/code_vectors.db
|
||||
embedding_model: "microsoft/codebert-base"
|
||||
embedding_dimension: 768
|
||||
|
||||
# General documents - general-purpose model
|
||||
- provider_id: doc_search
|
||||
provider_type: inline::sqlite_vec
|
||||
config:
|
||||
db_path: ~/.llama/doc_vectors.db
|
||||
embedding_model: "all-mpnet-base-v2"
|
||||
embedding_dimension: 768
|
||||
```
|
||||
|
||||
## Backward Compatibility
|
||||
|
||||
If no embedding model is specified in the provider configuration, the system will fall back to the existing behavior of using the first available embedding model from the model registry.
|
||||
|
||||
## Supported Providers
|
||||
|
||||
The configurable embedding models feature is supported by:
|
||||
|
||||
- **Inline providers**: Faiss, SQLite-vec, Milvus, ChromaDB, Qdrant
|
||||
- **Remote providers**: Qdrant, Milvus, ChromaDB, PGVector, Weaviate
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Match dimensions**: Ensure `embedding_dimension` matches your model's output
|
||||
2. **Use variable dimensions wisely**: Only override dimensions for Matryoshka models that support it
|
||||
3. **Consider performance trade-offs**: Smaller dimensions = faster search, larger = better quality
|
||||
4. **Test configurations**: Validate your setup with sample queries before production use
|
||||
5. **Document your choices**: Comment your configurations to explain model selection rationale
|
||||
Loading…
Add table
Add a link
Reference in a new issue