From aec1df5a39eab972e72119bcc6d1cfdb664d25d1 Mon Sep 17 00:00:00 2001 From: skamenan7 Date: Wed, 30 Jul 2025 13:20:59 -0400 Subject: [PATCH] docs: update configuration documentation for global default embedding model - Clarified the optional nature of the default_embedding_dimension in the YAML configuration, specifying that it defaults to 384 if omitted. - Added a note in the VectorStoreConfig class to indicate that the router will fall back to 384 as the default dimension if not set. --- docs/source/distributions/configuration.md | 6 +++--- llama_stack/apis/common/vector_store_config.py | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/source/distributions/configuration.md b/docs/source/distributions/configuration.md index 2801fb115..b7d910869 100644 --- a/docs/source/distributions/configuration.md +++ b/docs/source/distributions/configuration.md @@ -690,14 +690,14 @@ shields: ### Global Vector-Store Defaults -Starting with Llama-Stack v2, you can provide a *stack-level* default embedding model that will be used whenever a new vector-store is created and the caller does **not** specify an `embedding_model` parameter. +You can provide a *stack-level* default embedding model that will be used whenever a new vector-store is created and the caller does **not** specify an `embedding_model` parameter. Add a top-level block next to `models:` and `vector_io:` in your build/run YAML: ```yaml vector_store_config: default_embedding_model: ${env.LLAMA_STACK_DEFAULT_EMBEDDING_MODEL:=all-MiniLM-L6-v2} - # optional but recommended + # optional - if omitted, defaults to 384 default_embedding_dimension: ${env.LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION:=384} ``` @@ -712,7 +712,7 @@ Precedence rules at runtime: | Variable | Purpose | Example | |----------|---------|---------| | `LLAMA_STACK_DEFAULT_EMBEDDING_MODEL` | Global default embedding model id | `all-MiniLM-L6-v2` | -| `LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION` | Dimension for embeddings (optional) | `384` | +| `LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION` | Dimension for embeddings (optional, defaults to 384) | `384` | If you include the `${env.…}` placeholder in `vector_store_config`, deployments can override the default without editing YAML: diff --git a/llama_stack/apis/common/vector_store_config.py b/llama_stack/apis/common/vector_store_config.py index 2d200bac8..2c396077a 100644 --- a/llama_stack/apis/common/vector_store_config.py +++ b/llama_stack/apis/common/vector_store_config.py @@ -41,5 +41,6 @@ class VectorStoreConfig(BaseModel): default_embedding_dimension: int | None = Field( default_factory=lambda: int(os.getenv("LLAMA_STACK_DEFAULT_EMBEDDING_DIMENSION", 0)) or None, ge=1 ) + # Note: If not set, the router will fall back to 384 as the default dimension model_config = ConfigDict(frozen=True)