add embedding model by default to distribution templates (#617)

# What does this PR do? Adds the sentence transformer provider and the `all-MiniLM-L6-v2` embedding model to the default models to register in the run.yaml for all providers. ## Test Plan llama stack build --template together --image-type conda llama stack run ~/.llama/distributions/llamastack-together/together-run.yaml
2025-12-05 10:23:44 +00:00 · 2024-12-13 12:48:00 -08:00 · 2024-12-13 12:48:00 -08:00 · 516e1a3e59
commit 516e1a3e59
parent e893b22868
41 changed files with 473 additions and 64 deletions
--- a/llama_stack/distribution/routers/routers.py
+++ b/llama_stack/distribution/routers/routers.py
@ -109,7 +109,7 @@ class InferenceRouter(Inference):
        model = await self.routing_table.get_model(model_id)
        if model is None:
            raise ValueError(f"Model '{model_id}' not found")
-        if model.model_type == ModelType.embedding_model:
+        if model.model_type == ModelType.embedding:
            raise ValueError(
                f"Model '{model_id}' is an embedding model and does not support chat completions"
            )
@ -142,7 +142,7 @@ class InferenceRouter(Inference):
        model = await self.routing_table.get_model(model_id)
        if model is None:
            raise ValueError(f"Model '{model_id}' not found")
-        if model.model_type == ModelType.embedding_model:
+        if model.model_type == ModelType.embedding:
            raise ValueError(
                f"Model '{model_id}' is an embedding model and does not support chat completions"
            )