add embedding model by default to distribution templates (#617)

# What does this PR do? Adds the sentence transformer provider and the `all-MiniLM-L6-v2` embedding model to the default models to register in the run.yaml for all providers. ## Test Plan llama stack build --template together --image-type conda llama stack run ~/.llama/distributions/llamastack-together/together-run.yaml
2025-12-03 09:53:45 +00:00 · 2024-12-13 12:48:00 -08:00 · 2024-12-13 12:48:00 -08:00 · 516e1a3e59
commit 516e1a3e59
parent e893b22868
41 changed files with 473 additions and 64 deletions
--- a/llama_stack/providers/inline/inference/meta_reference/inference.py
+++ b/llama_stack/providers/inline/inference/meta_reference/inference.py
@ -83,7 +83,7 @@ class MetaReferenceInferenceImpl(

    async def register_model(self, model: Model) -> Model:
        model = await self.model_registry_helper.register_model(model)
-        if model.model_type == ModelType.embedding_model:
+        if model.model_type == ModelType.embedding:
            self._load_sentence_transformer_model(model.provider_resource_id)
        return model

--- a/llama_stack/providers/inline/inference/sentence_transformers/config.py
+++ b/llama_stack/providers/inline/inference/sentence_transformers/config.py
@ -4,7 +4,13 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

+from typing import Any, Dict
+
 from pydantic import BaseModel


-class SentenceTransformersInferenceConfig(BaseModel): ...
+class SentenceTransformersInferenceConfig(BaseModel):
+
+    @classmethod
+    def sample_run_config(cls) -> Dict[str, Any]:
+        return {}
--- a/llama_stack/providers/remote/inference/ollama/ollama.py
+++ b/llama_stack/providers/remote/inference/ollama/ollama.py
@ -337,7 +337,7 @@ class OllamaInferenceAdapter(Inference, ModelsProtocolPrivate):

    async def register_model(self, model: Model) -> Model:
        # ollama does not have embedding models running. Check if the model is in list of available models.
-        if model.model_type == ModelType.embedding_model:
+        if model.model_type == ModelType.embedding:
            response = await self.client.list()
            available_models = [m["model"] for m in response["models"]]
            if model.provider_resource_id not in available_models:
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@ -207,7 +207,7 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
        model = await self.model_store.get_model(model_id)

        kwargs = {}
-        assert model.model_type == ModelType.embedding_model
+        assert model.model_type == ModelType.embedding
        assert model.metadata.get("embedding_dimensions")
        kwargs["dimensions"] = model.metadata.get("embedding_dimensions")
        assert all(
--- a/llama_stack/providers/tests/inference/fixtures.py
+++ b/llama_stack/providers/tests/inference/fixtures.py
@ -238,7 +238,7 @@ async def inference_stack(request, inference_model):
    model_type = ModelType.llm
    metadata = {}
    if os.getenv("EMBEDDING_DIMENSION"):
-        model_type = ModelType.embedding_model
+        model_type = ModelType.embedding
        metadata["embedding_dimension"] = get_env_or_fail("EMBEDDING_DIMENSION")

    test_stack = await construct_stack_for_test(
--- a/llama_stack/providers/tests/inference/test_embeddings.py
+++ b/llama_stack/providers/tests/inference/test_embeddings.py
@ -18,7 +18,7 @@ class TestEmbeddings:
        inference_impl, models_impl = inference_stack
        model = await models_impl.get_model(inference_model)

-        if model.model_type != ModelType.embedding_model:
+        if model.model_type != ModelType.embedding:
            pytest.skip("This test is only applicable for embedding models")

        response = await inference_impl.embeddings(
@ -39,7 +39,7 @@ class TestEmbeddings:
        inference_impl, models_impl = inference_stack
        model = await models_impl.get_model(inference_model)

-        if model.model_type != ModelType.embedding_model:
+        if model.model_type != ModelType.embedding:
            pytest.skip("This test is only applicable for embedding models")

        texts = ["Hello, world!", "This is a test", "Testing embeddings"]
--- a/llama_stack/providers/tests/memory/fixtures.py
+++ b/llama_stack/providers/tests/memory/fixtures.py
@ -125,7 +125,7 @@ async def memory_stack(inference_model, request):
        models=[
            ModelInput(
                model_id=inference_model,
-                model_type=ModelType.embedding_model,
+                model_type=ModelType.embedding,
                metadata={
                    "embedding_dimension": get_env_or_fail("EMBEDDING_DIMENSION"),
                },
--- a/llama_stack/providers/utils/inference/model_registry.py
+++ b/llama_stack/providers/utils/inference/model_registry.py
@ -78,7 +78,7 @@ class ModelRegistryHelper(ModelsProtocolPrivate):
            return None

    async def register_model(self, model: Model) -> Model:
-        if model.model_type == ModelType.embedding_model:
+        if model.model_type == ModelType.embedding:
            # embedding models are always registered by their provider model id and does not need to be mapped to a llama model
            provider_resource_id = model.provider_resource_id
        else: