mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-23 16:43:56 +00:00
This change lets users configure default embedding models at the provider level instead of always relying on system defaults. Each vector store provider can now specify an embedding_model and optional embedding_dimension in their config. Key features: - Auto-dimension lookup for standard models from the registry - Support for Matryoshka embeddings with custom dimensions - Three-tier priority: explicit params > provider config > system fallback - Full backward compatibility - existing setups work unchanged - Comprehensive test coverage with 20 test cases Updated all vector IO providers (FAISS, Chroma, Milvus, Qdrant, etc.) with the new config fields and added detailed documentation with examples. Fixes #2729
127 lines
4.2 KiB
YAML
127 lines
4.2 KiB
YAML
# Sample Vector IO Configuration with Embedding Model Defaults
|
|
# This example demonstrates the new provider-level embedding configuration features
|
|
|
|
# Image and version info
|
|
version: 3
|
|
image_name: my-embedding-app
|
|
|
|
# APIs to serve
|
|
apis:
|
|
- inference
|
|
- vector_io
|
|
|
|
# Provider configurations
|
|
providers:
|
|
# Inference provider for embedding models
|
|
inference:
|
|
- provider_id: local_inference
|
|
provider_type: inline::ollama
|
|
config:
|
|
url: http://localhost:11434
|
|
|
|
# Vector IO providers with embedding model defaults
|
|
vector_io:
|
|
# FAISS for fast local search with lightweight embeddings
|
|
- provider_id: fast_local_search
|
|
provider_type: inline::faiss
|
|
config:
|
|
kvstore:
|
|
provider_type: sqlite
|
|
config:
|
|
db_path: ~/.llama/distributions/my-app/faiss_store.db
|
|
# NEW: Default embedding model for this provider
|
|
embedding_model: "all-MiniLM-L6-v2"
|
|
# Dimension auto-lookup: 384 (from model registry)
|
|
|
|
# SQLite Vec for lightweight vector storage with Matryoshka embeddings
|
|
- provider_id: compact_storage
|
|
provider_type: inline::sqlite_vec
|
|
config:
|
|
db_path: ~/.llama/distributions/my-app/sqlite_vec.db
|
|
kvstore:
|
|
provider_type: sqlite
|
|
config:
|
|
db_name: sqlite_vec_registry.db
|
|
# Matryoshka embedding with custom dimension
|
|
embedding_model: "nomic-embed-text"
|
|
embedding_dimension: 256 # Reduced from default 768 for efficiency
|
|
|
|
# Chroma for persistent local storage
|
|
- provider_id: persistent_search
|
|
provider_type: inline::chroma
|
|
config:
|
|
db_path: ~/.llama/distributions/my-app/chroma.db
|
|
# High-quality embeddings for semantic search
|
|
embedding_model: "sentence-transformers/all-mpnet-base-v2"
|
|
# Auto-lookup dimension from model registry
|
|
|
|
# Qdrant Cloud for production-scale search (when available)
|
|
- provider_id: cloud_search
|
|
provider_type: remote::qdrant
|
|
config:
|
|
api_key: "${env.QDRANT_API_KEY}"
|
|
url: "${env.QDRANT_URL}"
|
|
# Production-grade embedding model
|
|
embedding_model: "text-embedding-3-small"
|
|
embedding_dimension: 512 # Custom dimension for performance
|
|
|
|
# Model registry - ensure embedding models are properly configured
|
|
models:
|
|
# Lightweight embedding model (384 dimensions)
|
|
- model_id: all-MiniLM-L6-v2
|
|
provider_id: local_inference
|
|
provider_model_id: sentence-transformers/all-MiniLM-L6-v2
|
|
model_type: embedding
|
|
metadata:
|
|
embedding_dimension: 384
|
|
description: "Fast, lightweight embeddings for general use"
|
|
|
|
# Matryoshka embedding model (variable dimensions)
|
|
- model_id: nomic-embed-text
|
|
provider_id: local_inference
|
|
provider_model_id: nomic-embed-text
|
|
model_type: embedding
|
|
metadata:
|
|
embedding_dimension: 768 # Default, can be overridden
|
|
description: "Flexible Matryoshka embeddings supporting variable dimensions"
|
|
|
|
# High-quality embedding model (768 dimensions)
|
|
- model_id: sentence-transformers/all-mpnet-base-v2
|
|
provider_id: local_inference
|
|
provider_model_id: sentence-transformers/all-mpnet-base-v2
|
|
model_type: embedding
|
|
metadata:
|
|
embedding_dimension: 768
|
|
description: "High-quality embeddings for semantic search"
|
|
|
|
# OpenAI embedding model (for cloud usage)
|
|
- model_id: text-embedding-3-small
|
|
provider_id: openai_inference # Would need OpenAI provider configured
|
|
provider_model_id: text-embedding-3-small
|
|
model_type: embedding
|
|
metadata:
|
|
embedding_dimension: 1536 # Default OpenAI dimension
|
|
description: "OpenAI's efficient embedding model"
|
|
|
|
# Optional: Configure specific vector databases (will use provider defaults)
|
|
vector_dbs:
|
|
# Uses fast_local_search provider defaults (all-MiniLM-L6-v2, 384 dims)
|
|
- vector_db_id: general_docs
|
|
provider_id: fast_local_search
|
|
|
|
# Uses compact_storage provider defaults (nomic-embed-text, 256 dims)
|
|
- vector_db_id: compressed_knowledge
|
|
provider_id: compact_storage
|
|
|
|
# Uses persistent_search provider defaults (all-mpnet-base-v2, 768 dims)
|
|
- vector_db_id: semantic_library
|
|
provider_id: persistent_search
|
|
|
|
# Server configuration
|
|
server:
|
|
host: 0.0.0.0
|
|
port: 5000
|
|
|
|
# Logging configuration
|
|
logging:
|
|
level: INFO
|