add embedding model by default to distribution templates (#617)

# What does this PR do? Adds the sentence transformer provider and the `all-MiniLM-L6-v2` embedding model to the default models to register in the run.yaml for all providers. ## Test Plan llama stack build --template together --image-type conda llama stack run ~/.llama/distributions/llamastack-together/together-run.yaml
2024-12-13 12:48:00 -08:00 · 2024-12-13 12:48:00 -08:00 · 516e1a3e59
commit 516e1a3e59
parent e893b22868
41 changed files with 473 additions and 64 deletions
--- a/llama_stack/templates/vllm-gpu/run.yaml
+++ b/llama_stack/templates/vllm-gpu/run.yaml
@ -21,6 +21,9 @@ providers:
      max_tokens: ${env.MAX_TOKENS:4096}
      enforce_eager: ${env.ENFORCE_EAGER:False}
      gpu_memory_utilization: ${env.GPU_MEMORY_UTILIZATION:0.7}
+  - provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+    config: {}
  memory:
  - provider_id: faiss
    provider_type: inline::faiss
@ -79,6 +82,13 @@ models:
  model_id: ${env.INFERENCE_MODEL}
  provider_id: vllm
  provider_model_id: null
+  model_type: llm
+- metadata:
+    embedding_dimension: 384
+  model_id: all-MiniLM-L6-v2
+  provider_id: sentence-transformers
+  provider_model_id: null
+  model_type: embedding
 shields: []
 memory_banks: []
 datasets: []