From 4c8a0fa8dc1fcb316de64f5ec71521192f6ff11f Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Thu, 27 Feb 2025 22:49:06 -0800 Subject: [PATCH] fix: ensure ollama embedding model is registered properly in the template --- llama_stack/distribution/routers/routing_tables.py | 9 +-------- llama_stack/templates/ollama/ollama.py | 2 +- llama_stack/templates/ollama/run.yaml | 6 ++++++ 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/llama_stack/distribution/routers/routing_tables.py b/llama_stack/distribution/routers/routing_tables.py index c2434e517..80e9ecb7c 100644 --- a/llama_stack/distribution/routers/routing_tables.py +++ b/llama_stack/distribution/routers/routing_tables.py @@ -318,14 +318,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl, VectorDBs): ) model = await self.get_object_by_identifier("model", embedding_model) if model is None: - if embedding_model == "all-MiniLM-L6-v2": - raise ValueError( - "Embeddings are now served via Inference providers. " - "Please upgrade your run.yaml to include inline::sentence-transformer as an additional inference provider. " - "See https://github.com/meta-llama/llama-stack/blob/main/llama_stack/templates/together/run.yaml for an example." - ) - else: - raise ValueError(f"Model {embedding_model} not found") + raise ValueError(f"Model {embedding_model} not found") if model.model_type != ModelType.embedding: raise ValueError(f"Model {embedding_model} is not an embedding model") if "embedding_dimension" not in model.metadata: diff --git a/llama_stack/templates/ollama/ollama.py b/llama_stack/templates/ollama/ollama.py index 83c7b1a63..3c24a41ba 100644 --- a/llama_stack/templates/ollama/ollama.py +++ b/llama_stack/templates/ollama/ollama.py @@ -93,7 +93,7 @@ def get_distribution_template() -> DistributionTemplate: "inference": [inference_provider], "vector_io": [vector_io_provider_sqlite], }, - default_models=[inference_model], + default_models=[inference_model, embedding_model], default_tool_groups=default_tool_groups, ), "run-with-safety.yaml": RunConfigSettings( diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml index 0c82552c6..a2428688e 100644 --- a/llama_stack/templates/ollama/run.yaml +++ b/llama_stack/templates/ollama/run.yaml @@ -90,6 +90,12 @@ models: model_id: ${env.INFERENCE_MODEL} provider_id: ollama model_type: llm +- metadata: + embedding_dimension: 384 + model_id: all-MiniLM-L6-v2 + provider_id: ollama + provider_model_id: all-minilm:latest + model_type: embedding shields: [] vector_dbs: [] datasets: []