From 4c8a0fa8dc1fcb316de64f5ec71521192f6ff11f Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Thu, 27 Feb 2025 22:49:06 -0800
Subject: [PATCH] fix: ensure ollama embedding model is registered properly in
 the template

---
 llama_stack/distribution/routers/routing_tables.py | 9 +--------
 llama_stack/templates/ollama/ollama.py             | 2 +-
 llama_stack/templates/ollama/run.yaml              | 6 ++++++
 3 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/llama_stack/distribution/routers/routing_tables.py b/llama_stack/distribution/routers/routing_tables.py
index c2434e517..80e9ecb7c 100644
--- a/llama_stack/distribution/routers/routing_tables.py
+++ b/llama_stack/distribution/routers/routing_tables.py
@@ -318,14 +318,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl, VectorDBs):
                 )
         model = await self.get_object_by_identifier("model", embedding_model)
         if model is None:
-            if embedding_model == "all-MiniLM-L6-v2":
-                raise ValueError(
-                    "Embeddings are now served via Inference providers. "
-                    "Please upgrade your run.yaml to include inline::sentence-transformer as an additional inference provider. "
-                    "See https://github.com/meta-llama/llama-stack/blob/main/llama_stack/templates/together/run.yaml for an example."
-                )
-            else:
-                raise ValueError(f"Model {embedding_model} not found")
+            raise ValueError(f"Model {embedding_model} not found")
         if model.model_type != ModelType.embedding:
             raise ValueError(f"Model {embedding_model} is not an embedding model")
         if "embedding_dimension" not in model.metadata:
diff --git a/llama_stack/templates/ollama/ollama.py b/llama_stack/templates/ollama/ollama.py
index 83c7b1a63..3c24a41ba 100644
--- a/llama_stack/templates/ollama/ollama.py
+++ b/llama_stack/templates/ollama/ollama.py
@@ -93,7 +93,7 @@ def get_distribution_template() -> DistributionTemplate:
                     "inference": [inference_provider],
                     "vector_io": [vector_io_provider_sqlite],
                 },
-                default_models=[inference_model],
+                default_models=[inference_model, embedding_model],
                 default_tool_groups=default_tool_groups,
             ),
             "run-with-safety.yaml": RunConfigSettings(
diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml
index 0c82552c6..a2428688e 100644
--- a/llama_stack/templates/ollama/run.yaml
+++ b/llama_stack/templates/ollama/run.yaml
@@ -90,6 +90,12 @@ models:
   model_id: ${env.INFERENCE_MODEL}
   provider_id: ollama
   model_type: llm
+- metadata:
+    embedding_dimension: 384
+  model_id: all-MiniLM-L6-v2
+  provider_id: ollama
+  provider_model_id: all-minilm:latest
+  model_type: embedding
 shields: []
 vector_dbs: []
 datasets: []