Change default embedding model to all-MiniLM-L6-v2

This commit is contained in:
skamenan7 2025-08-08 16:41:17 -04:00 committed by Sumanth Kamenani
parent 70df4b7878
commit 2e3621f32b
2 changed files with 4 additions and 8 deletions

View file

@ -705,7 +705,7 @@ Precedence rules at runtime:
1. If `embedding_model` is explicitly passed in an API call, that value is used. 1. If `embedding_model` is explicitly passed in an API call, that value is used.
2. Otherwise the value in `vector_store_config.default_embedding_model` is used. 2. Otherwise the value in `vector_store_config.default_embedding_model` is used.
3. If neither is available the server will fall back to the system default (ibm-granite/granite-embedding-125m-english). 3. If neither is available the server will fall back to the system default (all-MiniLM-L6-v2).
#### Environment variables #### Environment variables
@ -721,4 +721,4 @@ export LLAMA_STACK_DEFAULT_EMBEDDING_MODEL="sentence-transformers/all-MiniLM-L6-
llama stack run --config run.yaml llama stack run --config run.yaml
``` ```
> Tip: If you omit `vector_store_config` entirely and don't set `LLAMA_STACK_DEFAULT_EMBEDDING_MODEL`, the system will fall back to the default `ibm-granite/granite-embedding-125m-english` model with 384 dimensions for vector store creation. > Tip: If you omit `vector_store_config` entirely and don't set `LLAMA_STACK_DEFAULT_EMBEDDING_MODEL`, the system will fall back to the default `all-MiniLM-L6-v2` model with 384 dimensions for vector store creation.

View file

@ -98,8 +98,8 @@ class VectorIORouter(VectorIO):
if config.default_embedding_model is not None: if config.default_embedding_model is not None:
return config.default_embedding_model, config.default_embedding_dimension or 384 return config.default_embedding_model, config.default_embedding_dimension or 384
# fallback to granite model - see issue #2418 for context # fallback to existing default model for compatibility
return "ibm-granite/granite-embedding-125m-english", 384 return "all-MiniLM-L6-v2", 384
async def register_vector_db( async def register_vector_db(
self, self,
@ -158,10 +158,6 @@ class VectorIORouter(VectorIO):
# Determine which embedding model to use based on new precedence # Determine which embedding model to use based on new precedence
embedding_model, embedding_dimension = await self._resolve_embedding_model(embedding_model) embedding_model, embedding_dimension = await self._resolve_embedding_model(embedding_model)
if embedding_dimension is None:
# try to fetch dimension from model metadata as fallback
embedding_model_info = await self._get_first_embedding_model() # may still help
embedding_dimension = embedding_model_info[1] if embedding_model_info else 384
vector_db_id = f"vs_{uuid.uuid4()}" vector_db_id = f"vs_{uuid.uuid4()}"
registered_vector_db = await self.routing_table.register_vector_db( registered_vector_db = await self.routing_table.register_vector_db(