mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 04:04:14 +00:00
- Add VectorStoreConfig with global default_embedding_model and default_embedding_dimension - Support environment variables LLAMA_STACK_DEFAULT_EMBEDDING_MODEL and LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION - Implement precedence: explicit model > global default > clear error (no fallback) - Update VectorIORouter with _resolve_embedding_model() precedence logic - Remove non-deterministic 'first model in run.yaml' fallback behavior - Add vector_store_config to StackRunConfig and all distribution templates - Include comprehensive unit tests for config loading and router precedence - Update documentation with configuration examples and usage patterns - Fix error messages to include 'Failed to' prefix per coding standards Resolves deterministic vector store creation by eliminating unpredictable fallbacks and providing clear configuration options at the stack level.
49 lines
1.5 KiB
YAML
49 lines
1.5 KiB
YAML
version: 2
|
|
distribution_spec:
|
|
description: Use watsonx for running LLM inference
|
|
providers:
|
|
inference:
|
|
- provider_id: watsonx
|
|
provider_type: remote::watsonx
|
|
- provider_id: sentence-transformers
|
|
provider_type: inline::sentence-transformers
|
|
vector_io:
|
|
- provider_id: faiss
|
|
provider_type: inline::faiss
|
|
safety:
|
|
- provider_id: llama-guard
|
|
provider_type: inline::llama-guard
|
|
agents:
|
|
- provider_id: meta-reference
|
|
provider_type: inline::meta-reference
|
|
telemetry:
|
|
- provider_id: meta-reference
|
|
provider_type: inline::meta-reference
|
|
eval:
|
|
- provider_id: meta-reference
|
|
provider_type: inline::meta-reference
|
|
datasetio:
|
|
- provider_id: huggingface
|
|
provider_type: remote::huggingface
|
|
- provider_id: localfs
|
|
provider_type: inline::localfs
|
|
scoring:
|
|
- provider_id: basic
|
|
provider_type: inline::basic
|
|
- provider_id: llm-as-judge
|
|
provider_type: inline::llm-as-judge
|
|
- provider_id: braintrust
|
|
provider_type: inline::braintrust
|
|
tool_runtime:
|
|
- provider_type: remote::brave-search
|
|
- provider_type: remote::tavily-search
|
|
- provider_type: inline::rag-runtime
|
|
- provider_type: remote::model-context-protocol
|
|
vector_store_config:
|
|
default_embedding_model: ${env.LLAMA_STACK_DEFAULT_EMBEDDING_MODEL:=all-MiniLM-L6-v2}
|
|
default_embedding_dimension: ${env.LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION:=384}
|
|
image_type: venv
|
|
additional_pip_packages:
|
|
- sqlalchemy[asyncio]
|
|
- aiosqlite
|
|
- aiosqlite
|