add embedding model by default to distribution templates (#617)

# What does this PR do? Adds the sentence transformer provider and the `all-MiniLM-L6-v2` embedding model to the default models to register in the run.yaml for all providers. ## Test Plan llama stack build --template together --image-type conda llama stack run ~/.llama/distributions/llamastack-together/together-run.yaml
2025-12-05 02:17:31 +00:00 · 2024-12-13 12:48:00 -08:00 · 2024-12-13 12:48:00 -08:00 · 516e1a3e59
commit 516e1a3e59
parent e893b22868
41 changed files with 473 additions and 64 deletions
--- a/llama_stack/templates/meta-reference-gpu/run.yaml
+++ b/llama_stack/templates/meta-reference-gpu/run.yaml
@ -19,6 +19,9 @@ providers:
      model: ${env.INFERENCE_MODEL}
      max_seq_len: 4096
      checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:null}
+  - provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+    config: {}
  memory:
  - provider_id: faiss
    provider_type: inline::faiss
@ -77,6 +80,13 @@ models:
  model_id: ${env.INFERENCE_MODEL}
  provider_id: meta-reference-inference
  provider_model_id: null
+  model_type: llm
+- metadata:
+    embedding_dimension: 384
+  model_id: all-MiniLM-L6-v2
+  provider_id: sentence-transformers
+  provider_model_id: null
+  model_type: embedding
 shields: []
 memory_banks: []
 datasets: []