llama-stack-mirror/docs/examples/sample_vector_io_config.yaml
skamenan7 474b50b422 Add configurable embedding models for vector IO providers
This change lets users configure default embedding models at the provider level instead of always relying on system defaults. Each vector store provider can now specify an embedding_model and optional embedding_dimension in their config.

Key features:
- Auto-dimension lookup for standard models from the registry
- Support for Matryoshka embeddings with custom dimensions
- Three-tier priority: explicit params > provider config > system fallback
- Full backward compatibility - existing setups work unchanged
- Comprehensive test coverage with 20 test cases

Updated all vector IO providers (FAISS, Chroma, Milvus, Qdrant, etc.) with the new config fields and added detailed documentation with examples.

Fixes #2729
2025-07-15 16:46:40 -04:00

127 lines
4.2 KiB
YAML

# Sample Vector IO Configuration with Embedding Model Defaults
# This example demonstrates the new provider-level embedding configuration features
# Image and version info
version: 3
image_name: my-embedding-app
# APIs to serve
apis:
- inference
- vector_io
# Provider configurations
providers:
# Inference provider for embedding models
inference:
- provider_id: local_inference
provider_type: inline::ollama
config:
url: http://localhost:11434
# Vector IO providers with embedding model defaults
vector_io:
# FAISS for fast local search with lightweight embeddings
- provider_id: fast_local_search
provider_type: inline::faiss
config:
kvstore:
provider_type: sqlite
config:
db_path: ~/.llama/distributions/my-app/faiss_store.db
# NEW: Default embedding model for this provider
embedding_model: "all-MiniLM-L6-v2"
# Dimension auto-lookup: 384 (from model registry)
# SQLite Vec for lightweight vector storage with Matryoshka embeddings
- provider_id: compact_storage
provider_type: inline::sqlite_vec
config:
db_path: ~/.llama/distributions/my-app/sqlite_vec.db
kvstore:
provider_type: sqlite
config:
db_name: sqlite_vec_registry.db
# Matryoshka embedding with custom dimension
embedding_model: "nomic-embed-text"
embedding_dimension: 256 # Reduced from default 768 for efficiency
# Chroma for persistent local storage
- provider_id: persistent_search
provider_type: inline::chroma
config:
db_path: ~/.llama/distributions/my-app/chroma.db
# High-quality embeddings for semantic search
embedding_model: "sentence-transformers/all-mpnet-base-v2"
# Auto-lookup dimension from model registry
# Qdrant Cloud for production-scale search (when available)
- provider_id: cloud_search
provider_type: remote::qdrant
config:
api_key: "${env.QDRANT_API_KEY}"
url: "${env.QDRANT_URL}"
# Production-grade embedding model
embedding_model: "text-embedding-3-small"
embedding_dimension: 512 # Custom dimension for performance
# Model registry - ensure embedding models are properly configured
models:
# Lightweight embedding model (384 dimensions)
- model_id: all-MiniLM-L6-v2
provider_id: local_inference
provider_model_id: sentence-transformers/all-MiniLM-L6-v2
model_type: embedding
metadata:
embedding_dimension: 384
description: "Fast, lightweight embeddings for general use"
# Matryoshka embedding model (variable dimensions)
- model_id: nomic-embed-text
provider_id: local_inference
provider_model_id: nomic-embed-text
model_type: embedding
metadata:
embedding_dimension: 768 # Default, can be overridden
description: "Flexible Matryoshka embeddings supporting variable dimensions"
# High-quality embedding model (768 dimensions)
- model_id: sentence-transformers/all-mpnet-base-v2
provider_id: local_inference
provider_model_id: sentence-transformers/all-mpnet-base-v2
model_type: embedding
metadata:
embedding_dimension: 768
description: "High-quality embeddings for semantic search"
# OpenAI embedding model (for cloud usage)
- model_id: text-embedding-3-small
provider_id: openai_inference # Would need OpenAI provider configured
provider_model_id: text-embedding-3-small
model_type: embedding
metadata:
embedding_dimension: 1536 # Default OpenAI dimension
description: "OpenAI's efficient embedding model"
# Optional: Configure specific vector databases (will use provider defaults)
vector_dbs:
# Uses fast_local_search provider defaults (all-MiniLM-L6-v2, 384 dims)
- vector_db_id: general_docs
provider_id: fast_local_search
# Uses compact_storage provider defaults (nomic-embed-text, 256 dims)
- vector_db_id: compressed_knowledge
provider_id: compact_storage
# Uses persistent_search provider defaults (all-mpnet-base-v2, 768 dims)
- vector_db_id: semantic_library
provider_id: persistent_search
# Server configuration
server:
host: 0.0.0.0
port: 5000
# Logging configuration
logging:
level: INFO