Move run-*.yaml to templates/ so they can be packaged

2025-08-01 16:24:44 +00:00 · 2024-11-18 14:54:20 -08:00 · 2024-11-18 14:54:20 -08:00 · 5dce17668c
commit 5dce17668c
parent dd732f037f
23 changed files with 682 additions and 722 deletions
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -12,6 +12,11 @@ We actively welcome your pull requests.
 5. Make sure your code lints.
 6. If you haven't already, complete the Contributor License Agreement ("CLA").
 ### Updating Provider Configurations
 If you have made changes to a provider's configuration in any form (introducing a new config key, or changing models, etc.), you should run `python llama_stack/scripts/distro_codegen.py` to re-generate various YAML files as well as the documentation. You should not change `docs/source/.../distributions/` files manually as they are auto-generated.
 ### Building the Documentation
 If you are making changes to the documentation at [https://llama-stack.readthedocs.io/en/latest/](https://llama-stack.readthedocs.io/en/latest/), you can use the following command to build the documentation and preview your changes. You will need [Sphinx](https://www.sphinx-doc.org/en/master/) and the readthedocs theme.
--- a/distributions/fireworks/run.yaml
+++ b/distributions/fireworks/run.yaml
@ -1,91 +0,0 @@
 version: '2'
 image_name: fireworks
 docker_image: null
 conda_env: null
 apis:
 - agents
 - inference
 - memory
 - safety
 - telemetry
 providers:
  inference:
  - provider_id: fireworks
    provider_type: remote::fireworks
    config:
      url: https://api.fireworks.ai/inference
      api_key: ${env.FIREWORKS_API_KEY}
  memory:
  - provider_id: faiss
    provider_type: inline::faiss
    config:
      kvstore:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/faiss_store.db
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
    config: {}
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
      persistence_store:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/agents_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config: {}
 metadata_store:
  namespace: null
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db
 models:
 - metadata: {}
  model_id: fireworks/llama-v3p1-8b-instruct
  provider_id: null
  provider_model_id: null
 - metadata: {}
  model_id: fireworks/llama-v3p1-70b-instruct
  provider_id: null
  provider_model_id: null
 - metadata: {}
  model_id: fireworks/llama-v3p1-405b-instruct
  provider_id: null
  provider_model_id: null
 - metadata: {}
  model_id: fireworks/llama-v3p2-1b-instruct
  provider_id: null
  provider_model_id: null
 - metadata: {}
  model_id: fireworks/llama-v3p2-3b-instruct
  provider_id: null
  provider_model_id: null
 - metadata: {}
  model_id: fireworks/llama-v3p2-11b-vision-instruct
  provider_id: null
  provider_model_id: null
 - metadata: {}
  model_id: fireworks/llama-v3p2-90b-vision-instruct
  provider_id: null
  provider_model_id: null
 - metadata: {}
  model_id: fireworks/llama-guard-3-8b
  provider_id: null
  provider_model_id: null
 - metadata: {}
  model_id: fireworks/llama-guard-3-11b-vision
  provider_id: null
  provider_model_id: null
 shields:
 - params: null
  shield_id: meta-llama/Llama-Guard-3-8B
  provider_id: null
  provider_shield_id: null
 memory_banks: []
 datasets: []
 scoring_fns: []
 eval_tasks: []
--- a/distributions/fireworks/run.yaml
+++ b/distributions/fireworks/run.yaml
@ -0,0 +1 @@
 ../../llama_stack/templates/fireworks/run.yaml
--- a/distributions/meta-reference-gpu/run-with-safety.yaml
+++ b/distributions/meta-reference-gpu/run-with-safety.yaml
@ -1,70 +0,0 @@
 version: '2'
 image_name: meta-reference-gpu
 docker_image: null
 conda_env: null
 apis:
 - agents
 - inference
 - memory
 - safety
 - telemetry
 providers:
  inference:
  - provider_id: meta-reference-inference
    provider_type: inline::meta-reference
    config:
      model: ${env.INFERENCE_MODEL}
      max_seq_len: 4096
      checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:null}
  - provider_id: meta-reference-safety
    provider_type: inline::meta-reference
    config:
      model: ${env.SAFETY_MODEL}
      max_seq_len: 4096
      checkpoint_dir: ${env.SAFETY_CHECKPOINT_DIR:null}
  memory:
  - provider_id: faiss
    provider_type: inline::faiss
    config:
      kvstore:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/faiss_store.db
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
    config: {}
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
      persistence_store:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/agents_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config: {}
 metadata_store:
  namespace: null
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
  provider_id: meta-reference-inference
  provider_model_id: null
 - metadata: {}
  model_id: ${env.SAFETY_MODEL}
  provider_id: meta-reference-safety
  provider_model_id: null
 shields:
 - params: null
  shield_id: ${env.SAFETY_MODEL}
  provider_id: null
  provider_shield_id: null
 memory_banks: []
 datasets: []
 scoring_fns: []
 eval_tasks: []
--- a/distributions/meta-reference-gpu/run-with-safety.yaml
+++ b/distributions/meta-reference-gpu/run-with-safety.yaml
@ -0,0 +1 @@
 ../../llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
--- a/distributions/meta-reference-gpu/run.yaml
+++ b/distributions/meta-reference-gpu/run.yaml
@ -1,56 +0,0 @@
 version: '2'
 image_name: meta-reference-gpu
 docker_image: null
 conda_env: null
 apis:
 - agents
 - inference
 - memory
 - safety
 - telemetry
 providers:
  inference:
  - provider_id: meta-reference-inference
    provider_type: inline::meta-reference
    config:
      model: ${env.INFERENCE_MODEL}
      max_seq_len: 4096
      checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:null}
  memory:
  - provider_id: faiss
    provider_type: inline::faiss
    config:
      kvstore:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/faiss_store.db
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
    config: {}
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
      persistence_store:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/agents_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config: {}
 metadata_store:
  namespace: null
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
  provider_id: meta-reference-inference
  provider_model_id: null
 shields: []
 memory_banks: []
 datasets: []
 scoring_fns: []
 eval_tasks: []
--- a/distributions/meta-reference-gpu/run.yaml
+++ b/distributions/meta-reference-gpu/run.yaml
@ -0,0 +1 @@
 ../../llama_stack/templates/meta-reference-gpu/run.yaml
--- a/distributions/ollama/run-with-safety.yaml
+++ b/distributions/ollama/run-with-safety.yaml
@ -1,62 +0,0 @@
 version: '2'
 image_name: ollama
 docker_image: null
 conda_env: null
 apis:
 - agents
 - inference
 - memory
 - safety
 - telemetry
 providers:
  inference:
  - provider_id: ollama
    provider_type: remote::ollama
    config:
      url: ${env.OLLAMA_URL:http://localhost:11434}
  memory:
  - provider_id: faiss
    provider_type: inline::faiss
    config:
      kvstore:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
    config: {}
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
      persistence_store:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config: {}
 metadata_store:
  namespace: null
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
  provider_id: ollama
  provider_model_id: null
 - metadata: {}
  model_id: ${env.SAFETY_MODEL}
  provider_id: ollama
  provider_model_id: null
 shields:
 - params: null
  shield_id: ${env.SAFETY_MODEL}
  provider_id: null
  provider_shield_id: null
 memory_banks: []
 datasets: []
 scoring_fns: []
 eval_tasks: []
--- a/distributions/ollama/run-with-safety.yaml
+++ b/distributions/ollama/run-with-safety.yaml
@ -0,0 +1 @@
 ../../llama_stack/templates/ollama/run-with-safety.yaml
--- a/distributions/ollama/run.yaml
+++ b/distributions/ollama/run.yaml
@ -1,54 +0,0 @@
 version: '2'
 image_name: ollama
 docker_image: null
 conda_env: null
 apis:
 - agents
 - inference
 - memory
 - safety
 - telemetry
 providers:
  inference:
  - provider_id: ollama
    provider_type: remote::ollama
    config:
      url: ${env.OLLAMA_URL:http://localhost:11434}
  memory:
  - provider_id: faiss
    provider_type: inline::faiss
    config:
      kvstore:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
    config: {}
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
      persistence_store:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config: {}
 metadata_store:
  namespace: null
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
  provider_id: ollama
  provider_model_id: null
 shields: []
 memory_banks: []
 datasets: []
 scoring_fns: []
 eval_tasks: []
--- a/distributions/ollama/run.yaml
+++ b/distributions/ollama/run.yaml
@ -0,0 +1 @@
 ../../llama_stack/templates/ollama/run.yaml
--- a/distributions/remote-vllm/run-with-safety.yaml
+++ b/distributions/remote-vllm/run-with-safety.yaml
@ -1,70 +0,0 @@
 version: '2'
 image_name: remote-vllm
 docker_image: null
 conda_env: null
 apis:
 - agents
 - inference
 - memory
 - safety
 - telemetry
 providers:
  inference:
  - provider_id: vllm-inference
    provider_type: remote::vllm
    config:
      url: ${env.VLLM_URL}
      max_tokens: ${env.VLLM_MAX_TOKENS:4096}
      api_token: ${env.VLLM_API_TOKEN:fake}
  - provider_id: vllm-safety
    provider_type: remote::vllm
    config:
      url: ${env.SAFETY_VLLM_URL}
      max_tokens: ${env.VLLM_MAX_TOKENS:4096}
      api_token: ${env.VLLM_API_TOKEN:fake}
  memory:
  - provider_id: faiss
    provider_type: inline::faiss
    config:
      kvstore:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/faiss_store.db
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
    config: {}
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
      persistence_store:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/agents_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config: {}
 metadata_store:
  namespace: null
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
  provider_id: vllm-inference
  provider_model_id: null
 - metadata: {}
  model_id: ${env.SAFETY_MODEL}
  provider_id: vllm-safety
  provider_model_id: null
 shields:
 - params: null
  shield_id: ${env.SAFETY_MODEL}
  provider_id: null
  provider_shield_id: null
 memory_banks: []
 datasets: []
 scoring_fns: []
 eval_tasks: []
--- a/distributions/remote-vllm/run-with-safety.yaml
+++ b/distributions/remote-vllm/run-with-safety.yaml
@ -0,0 +1 @@
 ../../llama_stack/templates/remote-vllm/run-with-safety.yaml
--- a/distributions/remote-vllm/run.yaml
+++ b/distributions/remote-vllm/run.yaml
@ -1,56 +0,0 @@
 version: '2'
 image_name: remote-vllm
 docker_image: null
 conda_env: null
 apis:
 - agents
 - inference
 - memory
 - safety
 - telemetry
 providers:
  inference:
  - provider_id: vllm-inference
    provider_type: remote::vllm
    config:
      url: ${env.VLLM_URL}
      max_tokens: ${env.VLLM_MAX_TOKENS:4096}
      api_token: ${env.VLLM_API_TOKEN:fake}
  memory:
  - provider_id: faiss
    provider_type: inline::faiss
    config:
      kvstore:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/faiss_store.db
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
    config: {}
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
      persistence_store:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/agents_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config: {}
 metadata_store:
  namespace: null
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
  provider_id: vllm-inference
  provider_model_id: null
 shields: []
 memory_banks: []
 datasets: []
 scoring_fns: []
 eval_tasks: []
--- a/distributions/remote-vllm/run.yaml
+++ b/distributions/remote-vllm/run.yaml
@ -0,0 +1 @@
 ../../llama_stack/templates/remote-vllm/run.yaml
--- a/distributions/tgi/run-with-safety.yaml
+++ b/distributions/tgi/run-with-safety.yaml
@ -1,66 +0,0 @@
 version: '2'
 image_name: tgi
 docker_image: llamastack/distribution-tgi:test-0.0.52rc3
 conda_env: null
 apis:
 - agents
 - inference
 - memory
 - safety
 - telemetry
 providers:
  inference:
  - provider_id: tgi-inference
    provider_type: remote::tgi
    config:
      url: ${env.TGI_URL}
  - provider_id: tgi-safety
    provider_type: remote::tgi
    config:
      url: ${env.TGI_SAFETY_URL}
  memory:
  - provider_id: faiss
    provider_type: inline::faiss
    config:
      kvstore:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/faiss_store.db
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
    config: {}
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
      persistence_store:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/agents_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config: {}
 metadata_store:
  namespace: null
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
  provider_id: tgi-inference
  provider_model_id: null
 - metadata: {}
  model_id: ${env.SAFETY_MODEL}
  provider_id: tgi-safety
  provider_model_id: null
 shields:
 - params: null
  shield_id: ${env.SAFETY_MODEL}
  provider_id: null
  provider_shield_id: null
 memory_banks: []
 datasets: []
 scoring_fns: []
 eval_tasks: []
--- a/distributions/tgi/run-with-safety.yaml
+++ b/distributions/tgi/run-with-safety.yaml
@ -0,0 +1 @@
 ../../llama_stack/templates/tgi/run-with-safety.yaml
--- a/distributions/tgi/run.yaml
+++ b/distributions/tgi/run.yaml
@ -1,54 +0,0 @@
 version: '2'
 image_name: tgi
 docker_image: llamastack/distribution-tgi:test-0.0.52rc3
 conda_env: null
 apis:
 - agents
 - inference
 - memory
 - safety
 - telemetry
 providers:
  inference:
  - provider_id: tgi-inference
    provider_type: remote::tgi
    config:
      url: ${env.TGI_URL}
  memory:
  - provider_id: faiss
    provider_type: inline::faiss
    config:
      kvstore:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/faiss_store.db
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
    config: {}
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
      persistence_store:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/agents_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config: {}
 metadata_store:
  namespace: null
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
  provider_id: tgi-inference
  provider_model_id: null
 shields: []
 memory_banks: []
 datasets: []
 scoring_fns: []
 eval_tasks: []
--- a/distributions/tgi/run.yaml
+++ b/distributions/tgi/run.yaml
@ -0,0 +1 @@
 ../../llama_stack/templates/tgi/run.yaml
--- a/distributions/together/run.yaml
+++ b/distributions/together/run.yaml
@ -1,87 +0,0 @@
 version: '2'
 image_name: together
 docker_image: null
 conda_env: null
 apis:
 - agents
 - inference
 - memory
 - safety
 - telemetry
 providers:
  inference:
  - provider_id: together
    provider_type: remote::together
    config:
      url: https://api.together.xyz/v1
      api_key: ${env.TOGETHER_API_KEY}
  memory:
  - provider_id: faiss
    provider_type: inline::faiss
    config:
      kvstore:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/faiss_store.db
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
    config: {}
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
      persistence_store:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/agents_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config: {}
 metadata_store:
  namespace: null
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db
 models:
 - metadata: {}
  model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
  provider_id: null
  provider_model_id: null
 - metadata: {}
  model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
  provider_id: null
  provider_model_id: null
 - metadata: {}
  model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
  provider_id: null
  provider_model_id: null
 - metadata: {}
  model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
  provider_id: null
  provider_model_id: null
 - metadata: {}
  model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
  provider_id: null
  provider_model_id: null
 - metadata: {}
  model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
  provider_id: null
  provider_model_id: null
 - metadata: {}
  model_id: meta-llama/Meta-Llama-Guard-3-8B
  provider_id: null
  provider_model_id: null
 - metadata: {}
  model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
  provider_id: null
  provider_model_id: null
 shields:
 - params: null
  shield_id: meta-llama/Llama-Guard-3-1B
  provider_id: null
  provider_shield_id: null
 memory_banks: []
 datasets: []
 scoring_fns: []
 eval_tasks: []
--- a/distributions/together/run.yaml
+++ b/distributions/together/run.yaml
@ -0,0 +1 @@
 ../../llama_stack/templates/together/run.yaml
--- a/llama_stack/providers/remote/inference/ollama/docker_compose.yaml
+++ b/llama_stack/providers/remote/inference/ollama/docker_compose.yaml
@ -1,55 +0,0 @@
 services:
  ${SERVICE_NAME:-ollama}:
    image: ollama/ollama:latest
    ports:
      - ${OLLAMA_PORT:-11434}:${OLLAMA_PORT:-11434}
    volumes:
      - $HOME/.ollama:/root/.ollama
    devices:
      - nvidia.com/gpu=all
    runtime: nvidia
    healthcheck:
      test: ["CMD", "curl", "-f", "http://ollama:11434"]
      interval: 10s
      timeout: 5s
      retries: 5
  ${SERVICE_NAME:-ollama}-init:
    image: ollama/ollama
    depends_on:
      - ${SERVICE_NAME:-ollama}:
          condition: service_healthy
    environment:
      - OLLAMA_HOST=ollama
      - OLLAMA_MODELS=${OLLAMA_MODELS}
    volumes:
      - $HOME/.ollama:/root/.ollama
    entrypoint: >
      sh -c '
        max_attempts=30;
        attempt=0;
        echo "Waiting for Ollama server...";
        until curl -s http://ollama:11434 > /dev/null; do
          attempt=$((attempt + 1));
          if [ $attempt -ge $max_attempts ]; then
            echo "Timeout waiting for Ollama server";
            exit 1;
          fi;
          echo "Attempt $attempt: Server not ready yet...";
          sleep 5;
        done;
        echo "Server ready. Pulling models...";
        models="${OLLAMA_MODELS}";
        for model in $models; do
          echo "Pulling $model...";
          if ! ollama pull "$model"; then
            echo "Failed to pull $model";
            exit 1;
          fi;
        done;
        echo "All models pulled successfully"
      '
--- a/llama_stack/scripts/distro_codegen.py
+++ b/llama_stack/scripts/distro_codegen.py
@ -40,7 +40,7 @@ def process_template(template_dir: Path, progress) -> None:
            template = template_func()
            template.save_distribution(
-                yaml_output_dir=REPO_ROOT / "distributions" / template.name,
+                yaml_output_dir=REPO_ROOT / "llama_stack" / "templates" / template.name,
                doc_output_dir=REPO_ROOT
                / "docs/source/getting_started/distributions"
                / f"{template.distro_type}_distro",
--- a/llama_stack/templates/fireworks/run.yaml
+++ b/llama_stack/templates/fireworks/run.yaml
@ -0,0 +1,91 @@
 version: '2'
 image_name: fireworks
 docker_image: null
 conda_env: null
 apis:
 - agents
 - inference
 - memory
 - safety
 - telemetry
 providers:
  inference:
  - provider_id: fireworks
    provider_type: remote::fireworks
    config:
      url: https://api.fireworks.ai/inference
      api_key: ${env.FIREWORKS_API_KEY}
  memory:
  - provider_id: faiss
    provider_type: inline::faiss
    config:
      kvstore:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/faiss_store.db
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
    config: {}
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
      persistence_store:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/agents_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config: {}
 metadata_store:
  namespace: null
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db
 models:
 - metadata: {}
  model_id: fireworks/llama-v3p1-8b-instruct
  provider_id: null
  provider_model_id: null
 - metadata: {}
  model_id: fireworks/llama-v3p1-70b-instruct
  provider_id: null
  provider_model_id: null
 - metadata: {}
  model_id: fireworks/llama-v3p1-405b-instruct
  provider_id: null
  provider_model_id: null
 - metadata: {}
  model_id: fireworks/llama-v3p2-1b-instruct
  provider_id: null
  provider_model_id: null
 - metadata: {}
  model_id: fireworks/llama-v3p2-3b-instruct
  provider_id: null
  provider_model_id: null
 - metadata: {}
  model_id: fireworks/llama-v3p2-11b-vision-instruct
  provider_id: null
  provider_model_id: null
 - metadata: {}
  model_id: fireworks/llama-v3p2-90b-vision-instruct
  provider_id: null
  provider_model_id: null
 - metadata: {}
  model_id: fireworks/llama-guard-3-8b
  provider_id: null
  provider_model_id: null
 - metadata: {}
  model_id: fireworks/llama-guard-3-11b-vision
  provider_id: null
  provider_model_id: null
 shields:
 - params: null
  shield_id: meta-llama/Llama-Guard-3-8B
  provider_id: null
  provider_shield_id: null
 memory_banks: []
 datasets: []
 scoring_fns: []
 eval_tasks: []
--- a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
+++ b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
@ -0,0 +1,70 @@
 version: '2'
 image_name: meta-reference-gpu
 docker_image: null
 conda_env: null
 apis:
 - agents
 - inference
 - memory
 - safety
 - telemetry
 providers:
  inference:
  - provider_id: meta-reference-inference
    provider_type: inline::meta-reference
    config:
      model: ${env.INFERENCE_MODEL}
      max_seq_len: 4096
      checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:null}
  - provider_id: meta-reference-safety
    provider_type: inline::meta-reference
    config:
      model: ${env.SAFETY_MODEL}
      max_seq_len: 4096
      checkpoint_dir: ${env.SAFETY_CHECKPOINT_DIR:null}
  memory:
  - provider_id: faiss
    provider_type: inline::faiss
    config:
      kvstore:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/faiss_store.db
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
    config: {}
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
      persistence_store:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/agents_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config: {}
 metadata_store:
  namespace: null
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
  provider_id: meta-reference-inference
  provider_model_id: null
 - metadata: {}
  model_id: ${env.SAFETY_MODEL}
  provider_id: meta-reference-safety
  provider_model_id: null
 shields:
 - params: null
  shield_id: ${env.SAFETY_MODEL}
  provider_id: null
  provider_shield_id: null
 memory_banks: []
 datasets: []
 scoring_fns: []
 eval_tasks: []
--- a/llama_stack/templates/meta-reference-gpu/run.yaml
+++ b/llama_stack/templates/meta-reference-gpu/run.yaml
@ -0,0 +1,56 @@
 version: '2'
 image_name: meta-reference-gpu
 docker_image: null
 conda_env: null
 apis:
 - agents
 - inference
 - memory
 - safety
 - telemetry
 providers:
  inference:
  - provider_id: meta-reference-inference
    provider_type: inline::meta-reference
    config:
      model: ${env.INFERENCE_MODEL}
      max_seq_len: 4096
      checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:null}
  memory:
  - provider_id: faiss
    provider_type: inline::faiss
    config:
      kvstore:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/faiss_store.db
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
    config: {}
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
      persistence_store:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/agents_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config: {}
 metadata_store:
  namespace: null
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
  provider_id: meta-reference-inference
  provider_model_id: null
 shields: []
 memory_banks: []
 datasets: []
 scoring_fns: []
 eval_tasks: []
--- a/llama_stack/templates/ollama/run-with-safety.yaml
+++ b/llama_stack/templates/ollama/run-with-safety.yaml
@ -0,0 +1,62 @@
 version: '2'
 image_name: ollama
 docker_image: null
 conda_env: null
 apis:
 - agents
 - inference
 - memory
 - safety
 - telemetry
 providers:
  inference:
  - provider_id: ollama
    provider_type: remote::ollama
    config:
      url: ${env.OLLAMA_URL:http://localhost:11434}
  memory:
  - provider_id: faiss
    provider_type: inline::faiss
    config:
      kvstore:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
    config: {}
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
      persistence_store:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config: {}
 metadata_store:
  namespace: null
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
  provider_id: ollama
  provider_model_id: null
 - metadata: {}
  model_id: ${env.SAFETY_MODEL}
  provider_id: ollama
  provider_model_id: null
 shields:
 - params: null
  shield_id: ${env.SAFETY_MODEL}
  provider_id: null
  provider_shield_id: null
 memory_banks: []
 datasets: []
 scoring_fns: []
 eval_tasks: []
--- a/llama_stack/templates/ollama/run.yaml
+++ b/llama_stack/templates/ollama/run.yaml
@ -0,0 +1,54 @@
 version: '2'
 image_name: ollama
 docker_image: null
 conda_env: null
 apis:
 - agents
 - inference
 - memory
 - safety
 - telemetry
 providers:
  inference:
  - provider_id: ollama
    provider_type: remote::ollama
    config:
      url: ${env.OLLAMA_URL:http://localhost:11434}
  memory:
  - provider_id: faiss
    provider_type: inline::faiss
    config:
      kvstore:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
    config: {}
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
      persistence_store:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config: {}
 metadata_store:
  namespace: null
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
  provider_id: ollama
  provider_model_id: null
 shields: []
 memory_banks: []
 datasets: []
 scoring_fns: []
 eval_tasks: []
--- a/llama_stack/templates/remote-vllm/run-with-safety.yaml
+++ b/llama_stack/templates/remote-vllm/run-with-safety.yaml
@ -0,0 +1,70 @@
 version: '2'
 image_name: remote-vllm
 docker_image: null
 conda_env: null
 apis:
 - agents
 - inference
 - memory
 - safety
 - telemetry
 providers:
  inference:
  - provider_id: vllm-inference
    provider_type: remote::vllm
    config:
      url: ${env.VLLM_URL}
      max_tokens: ${env.VLLM_MAX_TOKENS:4096}
      api_token: ${env.VLLM_API_TOKEN:fake}
  - provider_id: vllm-safety
    provider_type: remote::vllm
    config:
      url: ${env.SAFETY_VLLM_URL}
      max_tokens: ${env.VLLM_MAX_TOKENS:4096}
      api_token: ${env.VLLM_API_TOKEN:fake}
  memory:
  - provider_id: faiss
    provider_type: inline::faiss
    config:
      kvstore:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/faiss_store.db
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
    config: {}
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
      persistence_store:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/agents_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config: {}
 metadata_store:
  namespace: null
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
  provider_id: vllm-inference
  provider_model_id: null
 - metadata: {}
  model_id: ${env.SAFETY_MODEL}
  provider_id: vllm-safety
  provider_model_id: null
 shields:
 - params: null
  shield_id: ${env.SAFETY_MODEL}
  provider_id: null
  provider_shield_id: null
 memory_banks: []
 datasets: []
 scoring_fns: []
 eval_tasks: []
--- a/llama_stack/templates/remote-vllm/run.yaml
+++ b/llama_stack/templates/remote-vllm/run.yaml
@ -0,0 +1,56 @@
 version: '2'
 image_name: remote-vllm
 docker_image: null
 conda_env: null
 apis:
 - agents
 - inference
 - memory
 - safety
 - telemetry
 providers:
  inference:
  - provider_id: vllm-inference
    provider_type: remote::vllm
    config:
      url: ${env.VLLM_URL}
      max_tokens: ${env.VLLM_MAX_TOKENS:4096}
      api_token: ${env.VLLM_API_TOKEN:fake}
  memory:
  - provider_id: faiss
    provider_type: inline::faiss
    config:
      kvstore:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/faiss_store.db
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
    config: {}
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
      persistence_store:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/agents_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config: {}
 metadata_store:
  namespace: null
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
  provider_id: vllm-inference
  provider_model_id: null
 shields: []
 memory_banks: []
 datasets: []
 scoring_fns: []
 eval_tasks: []
--- a/llama_stack/templates/tgi/run-with-safety.yaml
+++ b/llama_stack/templates/tgi/run-with-safety.yaml
@ -0,0 +1,66 @@
 version: '2'
 image_name: tgi
 docker_image: llamastack/distribution-tgi:test-0.0.52rc3
 conda_env: null
 apis:
 - agents
 - inference
 - memory
 - safety
 - telemetry
 providers:
  inference:
  - provider_id: tgi-inference
    provider_type: remote::tgi
    config:
      url: ${env.TGI_URL}
  - provider_id: tgi-safety
    provider_type: remote::tgi
    config:
      url: ${env.TGI_SAFETY_URL}
  memory:
  - provider_id: faiss
    provider_type: inline::faiss
    config:
      kvstore:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/faiss_store.db
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
    config: {}
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
      persistence_store:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/agents_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config: {}
 metadata_store:
  namespace: null
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
  provider_id: tgi-inference
  provider_model_id: null
 - metadata: {}
  model_id: ${env.SAFETY_MODEL}
  provider_id: tgi-safety
  provider_model_id: null
 shields:
 - params: null
  shield_id: ${env.SAFETY_MODEL}
  provider_id: null
  provider_shield_id: null
 memory_banks: []
 datasets: []
 scoring_fns: []
 eval_tasks: []
--- a/llama_stack/templates/tgi/run.yaml
+++ b/llama_stack/templates/tgi/run.yaml
@ -0,0 +1,54 @@
 version: '2'
 image_name: tgi
 docker_image: llamastack/distribution-tgi:test-0.0.52rc3
 conda_env: null
 apis:
 - agents
 - inference
 - memory
 - safety
 - telemetry
 providers:
  inference:
  - provider_id: tgi-inference
    provider_type: remote::tgi
    config:
      url: ${env.TGI_URL}
  memory:
  - provider_id: faiss
    provider_type: inline::faiss
    config:
      kvstore:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/faiss_store.db
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
    config: {}
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
      persistence_store:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/agents_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config: {}
 metadata_store:
  namespace: null
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
  provider_id: tgi-inference
  provider_model_id: null
 shields: []
 memory_banks: []
 datasets: []
 scoring_fns: []
 eval_tasks: []
--- a/llama_stack/templates/together/run.yaml
+++ b/llama_stack/templates/together/run.yaml
@ -0,0 +1,87 @@
 version: '2'
 image_name: together
 docker_image: null
 conda_env: null
 apis:
 - agents
 - inference
 - memory
 - safety
 - telemetry
 providers:
  inference:
  - provider_id: together
    provider_type: remote::together
    config:
      url: https://api.together.xyz/v1
      api_key: ${env.TOGETHER_API_KEY}
  memory:
  - provider_id: faiss
    provider_type: inline::faiss
    config:
      kvstore:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/faiss_store.db
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
    config: {}
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
      persistence_store:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/agents_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config: {}
 metadata_store:
  namespace: null
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db
 models:
 - metadata: {}
  model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
  provider_id: null
  provider_model_id: null
 - metadata: {}
  model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
  provider_id: null
  provider_model_id: null
 - metadata: {}
  model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
  provider_id: null
  provider_model_id: null
 - metadata: {}
  model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
  provider_id: null
  provider_model_id: null
 - metadata: {}
  model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
  provider_id: null
  provider_model_id: null
 - metadata: {}
  model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
  provider_id: null
  provider_model_id: null
 - metadata: {}
  model_id: meta-llama/Meta-Llama-Guard-3-8B
  provider_id: null
  provider_model_id: null
 - metadata: {}
  model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
  provider_id: null
  provider_model_id: null
 shields:
 - params: null
  shield_id: meta-llama/Llama-Guard-3-1B
  provider_id: null
  provider_shield_id: null
 memory_banks: []
 datasets: []
 scoring_fns: []
 eval_tasks: []
		`@ -0,0 +1 @@`
							`../../llama_stack/templates/fireworks/run.yaml`
		`@ -0,0 +1 @@`
							`../../llama_stack/templates/meta-reference-gpu/run-with-safety.yaml`
		`@ -0,0 +1 @@`
							`../../llama_stack/templates/ollama/run-with-safety.yaml`
		`@ -0,0 +1 @@`
							`../../llama_stack/templates/remote-vllm/run-with-safety.yaml`
		`@ -0,0 +1 @@`
							`../../llama_stack/templates/tgi/run-with-safety.yaml`
		`@ -0,0 +1 @@`
							`../../llama_stack/templates/together/run.yaml`