From 2e3621f32b348be65dd005c87e8ed68dc8a0bc67 Mon Sep 17 00:00:00 2001 From: skamenan7 Date: Fri, 8 Aug 2025 16:41:17 -0400 Subject: [PATCH] Change default embedding model to all-MiniLM-L6-v2 --- docs/source/distributions/configuration.md | 4 ++-- llama_stack/core/routers/vector_io.py | 8 ++------ 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/docs/source/distributions/configuration.md b/docs/source/distributions/configuration.md index 760d4eccf..4132cab79 100644 --- a/docs/source/distributions/configuration.md +++ b/docs/source/distributions/configuration.md @@ -705,7 +705,7 @@ Precedence rules at runtime: 1. If `embedding_model` is explicitly passed in an API call, that value is used. 2. Otherwise the value in `vector_store_config.default_embedding_model` is used. -3. If neither is available the server will fall back to the system default (ibm-granite/granite-embedding-125m-english). +3. If neither is available the server will fall back to the system default (all-MiniLM-L6-v2). #### Environment variables @@ -721,4 +721,4 @@ export LLAMA_STACK_DEFAULT_EMBEDDING_MODEL="sentence-transformers/all-MiniLM-L6- llama stack run --config run.yaml ``` -> Tip: If you omit `vector_store_config` entirely and don't set `LLAMA_STACK_DEFAULT_EMBEDDING_MODEL`, the system will fall back to the default `ibm-granite/granite-embedding-125m-english` model with 384 dimensions for vector store creation. +> Tip: If you omit `vector_store_config` entirely and don't set `LLAMA_STACK_DEFAULT_EMBEDDING_MODEL`, the system will fall back to the default `all-MiniLM-L6-v2` model with 384 dimensions for vector store creation. diff --git a/llama_stack/core/routers/vector_io.py b/llama_stack/core/routers/vector_io.py index ac32e9243..e48c14e0e 100644 --- a/llama_stack/core/routers/vector_io.py +++ b/llama_stack/core/routers/vector_io.py @@ -98,8 +98,8 @@ class VectorIORouter(VectorIO): if config.default_embedding_model is not None: return config.default_embedding_model, config.default_embedding_dimension or 384 - # fallback to granite model - see issue #2418 for context - return "ibm-granite/granite-embedding-125m-english", 384 + # fallback to existing default model for compatibility + return "all-MiniLM-L6-v2", 384 async def register_vector_db( self, @@ -158,10 +158,6 @@ class VectorIORouter(VectorIO): # Determine which embedding model to use based on new precedence embedding_model, embedding_dimension = await self._resolve_embedding_model(embedding_model) - if embedding_dimension is None: - # try to fetch dimension from model metadata as fallback - embedding_model_info = await self._get_first_embedding_model() # may still help - embedding_dimension = embedding_model_info[1] if embedding_model_info else 384 vector_db_id = f"vs_{uuid.uuid4()}" registered_vector_db = await self.routing_table.register_vector_db(