mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-24 04:48:04 +00:00
Add configurable embedding models for vector IO providers
This change lets users configure default embedding models at the provider level instead of always relying on system defaults. Each vector store provider can now specify an embedding_model and optional embedding_dimension in their config. Key features: - Auto-dimension lookup for standard models from the registry - Support for Matryoshka embeddings with custom dimensions - Three-tier priority: explicit params > provider config > system fallback - Full backward compatibility - existing setups work unchanged - Comprehensive test coverage with 20 test cases Updated all vector IO providers (FAISS, Chroma, Milvus, Qdrant, etc.) with the new config fields and added detailed documentation with examples. Fixes #2729
This commit is contained in:
parent
2298d2473c
commit
474b50b422
28 changed files with 1160 additions and 24 deletions
127
docs/examples/sample_vector_io_config.yaml
Normal file
127
docs/examples/sample_vector_io_config.yaml
Normal file
|
|
@ -0,0 +1,127 @@
|
|||
# Sample Vector IO Configuration with Embedding Model Defaults
|
||||
# This example demonstrates the new provider-level embedding configuration features
|
||||
|
||||
# Image and version info
|
||||
version: 3
|
||||
image_name: my-embedding-app
|
||||
|
||||
# APIs to serve
|
||||
apis:
|
||||
- inference
|
||||
- vector_io
|
||||
|
||||
# Provider configurations
|
||||
providers:
|
||||
# Inference provider for embedding models
|
||||
inference:
|
||||
- provider_id: local_inference
|
||||
provider_type: inline::ollama
|
||||
config:
|
||||
url: http://localhost:11434
|
||||
|
||||
# Vector IO providers with embedding model defaults
|
||||
vector_io:
|
||||
# FAISS for fast local search with lightweight embeddings
|
||||
- provider_id: fast_local_search
|
||||
provider_type: inline::faiss
|
||||
config:
|
||||
kvstore:
|
||||
provider_type: sqlite
|
||||
config:
|
||||
db_path: ~/.llama/distributions/my-app/faiss_store.db
|
||||
# NEW: Default embedding model for this provider
|
||||
embedding_model: "all-MiniLM-L6-v2"
|
||||
# Dimension auto-lookup: 384 (from model registry)
|
||||
|
||||
# SQLite Vec for lightweight vector storage with Matryoshka embeddings
|
||||
- provider_id: compact_storage
|
||||
provider_type: inline::sqlite_vec
|
||||
config:
|
||||
db_path: ~/.llama/distributions/my-app/sqlite_vec.db
|
||||
kvstore:
|
||||
provider_type: sqlite
|
||||
config:
|
||||
db_name: sqlite_vec_registry.db
|
||||
# Matryoshka embedding with custom dimension
|
||||
embedding_model: "nomic-embed-text"
|
||||
embedding_dimension: 256 # Reduced from default 768 for efficiency
|
||||
|
||||
# Chroma for persistent local storage
|
||||
- provider_id: persistent_search
|
||||
provider_type: inline::chroma
|
||||
config:
|
||||
db_path: ~/.llama/distributions/my-app/chroma.db
|
||||
# High-quality embeddings for semantic search
|
||||
embedding_model: "sentence-transformers/all-mpnet-base-v2"
|
||||
# Auto-lookup dimension from model registry
|
||||
|
||||
# Qdrant Cloud for production-scale search (when available)
|
||||
- provider_id: cloud_search
|
||||
provider_type: remote::qdrant
|
||||
config:
|
||||
api_key: "${env.QDRANT_API_KEY}"
|
||||
url: "${env.QDRANT_URL}"
|
||||
# Production-grade embedding model
|
||||
embedding_model: "text-embedding-3-small"
|
||||
embedding_dimension: 512 # Custom dimension for performance
|
||||
|
||||
# Model registry - ensure embedding models are properly configured
|
||||
models:
|
||||
# Lightweight embedding model (384 dimensions)
|
||||
- model_id: all-MiniLM-L6-v2
|
||||
provider_id: local_inference
|
||||
provider_model_id: sentence-transformers/all-MiniLM-L6-v2
|
||||
model_type: embedding
|
||||
metadata:
|
||||
embedding_dimension: 384
|
||||
description: "Fast, lightweight embeddings for general use"
|
||||
|
||||
# Matryoshka embedding model (variable dimensions)
|
||||
- model_id: nomic-embed-text
|
||||
provider_id: local_inference
|
||||
provider_model_id: nomic-embed-text
|
||||
model_type: embedding
|
||||
metadata:
|
||||
embedding_dimension: 768 # Default, can be overridden
|
||||
description: "Flexible Matryoshka embeddings supporting variable dimensions"
|
||||
|
||||
# High-quality embedding model (768 dimensions)
|
||||
- model_id: sentence-transformers/all-mpnet-base-v2
|
||||
provider_id: local_inference
|
||||
provider_model_id: sentence-transformers/all-mpnet-base-v2
|
||||
model_type: embedding
|
||||
metadata:
|
||||
embedding_dimension: 768
|
||||
description: "High-quality embeddings for semantic search"
|
||||
|
||||
# OpenAI embedding model (for cloud usage)
|
||||
- model_id: text-embedding-3-small
|
||||
provider_id: openai_inference # Would need OpenAI provider configured
|
||||
provider_model_id: text-embedding-3-small
|
||||
model_type: embedding
|
||||
metadata:
|
||||
embedding_dimension: 1536 # Default OpenAI dimension
|
||||
description: "OpenAI's efficient embedding model"
|
||||
|
||||
# Optional: Configure specific vector databases (will use provider defaults)
|
||||
vector_dbs:
|
||||
# Uses fast_local_search provider defaults (all-MiniLM-L6-v2, 384 dims)
|
||||
- vector_db_id: general_docs
|
||||
provider_id: fast_local_search
|
||||
|
||||
# Uses compact_storage provider defaults (nomic-embed-text, 256 dims)
|
||||
- vector_db_id: compressed_knowledge
|
||||
provider_id: compact_storage
|
||||
|
||||
# Uses persistent_search provider defaults (all-mpnet-base-v2, 768 dims)
|
||||
- vector_db_id: semantic_library
|
||||
provider_id: persistent_search
|
||||
|
||||
# Server configuration
|
||||
server:
|
||||
host: 0.0.0.0
|
||||
port: 5000
|
||||
|
||||
# Logging configuration
|
||||
logging:
|
||||
level: INFO
|
||||
Loading…
Add table
Add a link
Reference in a new issue