diff --git a/llama_stack/distribution/routers/routing_tables.py b/llama_stack/distribution/routers/routing_tables.py index 2a48b8e7e..01edf4e5a 100644 --- a/llama_stack/distribution/routers/routing_tables.py +++ b/llama_stack/distribution/routers/routing_tables.py @@ -308,7 +308,14 @@ class MemoryBanksRoutingTable(CommonRoutingTableImpl, MemoryBanks): ) model = await self.get_object_by_identifier("model", params.embedding_model) if model is None: - raise ValueError(f"Model {params.embedding_model} not found") + if params.embedding_model == "all-MiniLM-L6-v2": + raise ValueError( + "Embeddings are now served via Inference providers. " + "Please upgrade your run.yaml to include inline::sentence-transformer as an additional inference provider. " + "See https://github.com/meta-llama/llama-stack/blob/main/llama_stack/templates/together/run.yaml for an example." + ) + else: + raise ValueError(f"Model {params.embedding_model} not found") if model.model_type != ModelType.embedding: raise ValueError( f"Model {params.embedding_model} is not an embedding model"