From 07f9bf723fdf3ae054af959df7d776f8b1653b97 Mon Sep 17 00:00:00 2001
From: Xi Yan <xiyan@meta.com>
Date: Fri, 25 Oct 2024 12:51:22 -0700
Subject: [PATCH]  fix broken --list-templates with adding build.yaml files for
 packaging (#327)

* add build files to templates

* fix templates

* manifest

* symlink

* symlink

* precommit

* change everything to docker build.yaml

* remove image_type in templates

* fix build from templates CLI

* fix readmes
---
 MANIFEST.in                                   |  2 +-
 distributions/bedrock/build.yaml              | 11 +------
 distributions/databricks/build.yaml           | 11 +------
 distributions/fireworks/README.md             |  2 +-
 distributions/fireworks/build.yaml            | 11 +------
 distributions/hf-endpoint/build.yaml          | 11 +------
 distributions/hf-serverless/build.yaml        | 11 +------
 distributions/meta-reference-gpu/build.yaml   | 15 +--------
 .../meta-reference-quantized-gpu/build.yaml   | 15 +--------
 distributions/ollama/README.md                |  2 +-
 distributions/ollama/build.yaml               | 14 +-------
 distributions/tgi/README.md                   |  2 +-
 distributions/tgi/build.yaml                  | 14 +-------
 distributions/together/README.md              |  2 +-
 distributions/together/build.yaml             | 11 +------
 distributions/vllm/build.yaml                 | 11 +------
 docs/cli_reference.md                         | 10 +++---
 docs/getting_started.md                       |  6 +---
 llama_stack/cli/stack/build.py                | 32 +++++++++----------
 llama_stack/distribution/build.py             |  7 ++--
 .../quantization/scripts/build_conda.sh       |  6 ++++
 llama_stack/templates/bedrock/build.yaml      |  9 ++++++
 llama_stack/templates/databricks/build.yaml   |  9 ++++++
 llama_stack/templates/fireworks/build.yaml    |  9 ++++++
 llama_stack/templates/hf-endpoint/build.yaml  |  9 ++++++
 .../templates/hf-serverless/build.yaml        |  9 ++++++
 .../templates/meta-reference-gpu/build.yaml   | 13 ++++++++
 .../meta-reference-quantized-gpu/build.yaml   | 13 ++++++++
 llama_stack/templates/ollama/build.yaml       | 12 +++++++
 llama_stack/templates/tgi/build.yaml          | 12 +++++++
 llama_stack/templates/together/build.yaml     |  9 ++++++
 llama_stack/templates/vllm/build.yaml         |  9 ++++++
 32 files changed, 161 insertions(+), 158 deletions(-)
 mode change 100644 => 120000 distributions/bedrock/build.yaml
 mode change 100644 => 120000 distributions/databricks/build.yaml
 mode change 100644 => 120000 distributions/fireworks/build.yaml
 mode change 100644 => 120000 distributions/hf-endpoint/build.yaml
 mode change 100644 => 120000 distributions/hf-serverless/build.yaml
 mode change 100644 => 120000 distributions/meta-reference-gpu/build.yaml
 mode change 100644 => 120000 distributions/meta-reference-quantized-gpu/build.yaml
 mode change 100644 => 120000 distributions/ollama/build.yaml
 mode change 100644 => 120000 distributions/tgi/build.yaml
 mode change 100644 => 120000 distributions/together/build.yaml
 mode change 100644 => 120000 distributions/vllm/build.yaml
 create mode 100644 llama_stack/templates/bedrock/build.yaml
 create mode 100644 llama_stack/templates/databricks/build.yaml
 create mode 100644 llama_stack/templates/fireworks/build.yaml
 create mode 100644 llama_stack/templates/hf-endpoint/build.yaml
 create mode 100644 llama_stack/templates/hf-serverless/build.yaml
 create mode 100644 llama_stack/templates/meta-reference-gpu/build.yaml
 create mode 100644 llama_stack/templates/meta-reference-quantized-gpu/build.yaml
 create mode 100644 llama_stack/templates/ollama/build.yaml
 create mode 100644 llama_stack/templates/tgi/build.yaml
 create mode 100644 llama_stack/templates/together/build.yaml
 create mode 100644 llama_stack/templates/vllm/build.yaml

diff --git a/MANIFEST.in b/MANIFEST.in
index 7426a3abd..0517b86a8 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,4 +1,4 @@
 include requirements.txt
 include llama_stack/distribution/*.sh
 include llama_stack/cli/scripts/*.sh
-include distributions/*/build.yaml
+include llama_stack/templates/*/build.yaml
diff --git a/distributions/bedrock/build.yaml b/distributions/bedrock/build.yaml
deleted file mode 100644
index ae7b27d49..000000000
--- a/distributions/bedrock/build.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-name: bedrock
-distribution_spec:
-  description: Use Amazon Bedrock APIs.
-  providers:
-    inference: remote::bedrock
-    memory: meta-reference
-    safety: meta-reference
-    agents: meta-reference
-    telemetry: meta-reference
-image_type: conda
diff --git a/distributions/bedrock/build.yaml b/distributions/bedrock/build.yaml
new file mode 120000
index 000000000..72402ef8d
--- /dev/null
+++ b/distributions/bedrock/build.yaml
@@ -0,0 +1 @@
+../../llama_stack/templates/bedrock/build.yaml
\ No newline at end of file
diff --git a/distributions/databricks/build.yaml b/distributions/databricks/build.yaml
deleted file mode 100644
index 2188dd0a0..000000000
--- a/distributions/databricks/build.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-name: databricks
-distribution_spec:
-  description: Use Databricks for running LLM inference
-  providers:
-    inference: remote::databricks
-    memory: meta-reference
-    safety: meta-reference
-    agents: meta-reference
-    telemetry: meta-reference
-image_type: conda
diff --git a/distributions/databricks/build.yaml b/distributions/databricks/build.yaml
new file mode 120000
index 000000000..66342fe6f
--- /dev/null
+++ b/distributions/databricks/build.yaml
@@ -0,0 +1 @@
+../../llama_stack/templates/databricks/build.yaml
\ No newline at end of file
diff --git a/distributions/fireworks/README.md b/distributions/fireworks/README.md
index fcf74d809..e3987e1e2 100644
--- a/distributions/fireworks/README.md
+++ b/distributions/fireworks/README.md
@@ -49,7 +49,7 @@ inference:
 **Via Conda**
 
 ```bash
-llama stack build --config ./build.yaml
+llama stack build --template fireworks --image-type conda
 # -- modify run.yaml to a valid Fireworks server endpoint
 llama stack run ./run.yaml
 ```
diff --git a/distributions/fireworks/build.yaml b/distributions/fireworks/build.yaml
deleted file mode 100644
index 2e5cf0753..000000000
--- a/distributions/fireworks/build.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-name: fireworks
-distribution_spec:
-  description: Use Fireworks.ai for running LLM inference
-  providers:
-    inference: remote::fireworks
-    memory: meta-reference
-    safety: meta-reference
-    agents: meta-reference
-    telemetry: meta-reference
-image_type: docker
diff --git a/distributions/fireworks/build.yaml b/distributions/fireworks/build.yaml
new file mode 120000
index 000000000..32a5bd869
--- /dev/null
+++ b/distributions/fireworks/build.yaml
@@ -0,0 +1 @@
+../../llama_stack/templates/fireworks/build.yaml
\ No newline at end of file
diff --git a/distributions/hf-endpoint/build.yaml b/distributions/hf-endpoint/build.yaml
deleted file mode 100644
index 750bebcb5..000000000
--- a/distributions/hf-endpoint/build.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-name: hf-endpoint
-distribution_spec:
-  description: "Like local, but use Hugging Face Inference Endpoints for running LLM inference.\nSee https://hf.co/docs/api-endpoints."
-  providers:
-    inference: remote::hf::endpoint
-    memory: meta-reference
-    safety: meta-reference
-    agents: meta-reference
-    telemetry: meta-reference
-image_type: conda
diff --git a/distributions/hf-endpoint/build.yaml b/distributions/hf-endpoint/build.yaml
new file mode 120000
index 000000000..a73c70c05
--- /dev/null
+++ b/distributions/hf-endpoint/build.yaml
@@ -0,0 +1 @@
+../../llama_stack/templates/hf-endpoint/build.yaml
\ No newline at end of file
diff --git a/distributions/hf-serverless/build.yaml b/distributions/hf-serverless/build.yaml
deleted file mode 100644
index f6da3ad4d..000000000
--- a/distributions/hf-serverless/build.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-name: hf-serverless
-distribution_spec:
-  description: "Like local, but use Hugging Face Inference API (serverless) for running LLM inference.\nSee https://hf.co/docs/api-inference."
-  providers:
-    inference: remote::hf::serverless
-    memory: meta-reference
-    safety: meta-reference
-    agents: meta-reference
-    telemetry: meta-reference
-image_type: conda
diff --git a/distributions/hf-serverless/build.yaml b/distributions/hf-serverless/build.yaml
new file mode 120000
index 000000000..f2db0fd55
--- /dev/null
+++ b/distributions/hf-serverless/build.yaml
@@ -0,0 +1 @@
+../../llama_stack/templates/hf-serverless/build.yaml
\ No newline at end of file
diff --git a/distributions/meta-reference-gpu/build.yaml b/distributions/meta-reference-gpu/build.yaml
deleted file mode 100644
index 08e034154..000000000
--- a/distributions/meta-reference-gpu/build.yaml
+++ /dev/null
@@ -1,14 +0,0 @@
-name: meta-reference-gpu
-distribution_spec:
-  docker_image: pytorch/pytorch:2.5.0-cuda12.4-cudnn9-runtime
-  description: Use code from `llama_stack` itself to serve all llama stack APIs
-  providers:
-    inference: meta-reference
-    memory:
-    - meta-reference
-    - remote::chromadb
-    - remote::pgvector
-    safety: meta-reference
-    agents: meta-reference
-    telemetry: meta-reference
-image_type: docker
diff --git a/distributions/meta-reference-gpu/build.yaml b/distributions/meta-reference-gpu/build.yaml
new file mode 120000
index 000000000..4418195eb
--- /dev/null
+++ b/distributions/meta-reference-gpu/build.yaml
@@ -0,0 +1 @@
+../../llama_stack/templates/meta-reference-gpu/build.yaml
\ No newline at end of file
diff --git a/distributions/meta-reference-quantized-gpu/build.yaml b/distributions/meta-reference-quantized-gpu/build.yaml
deleted file mode 100644
index e9ddb4aad..000000000
--- a/distributions/meta-reference-quantized-gpu/build.yaml
+++ /dev/null
@@ -1,14 +0,0 @@
-name: meta-reference-quantized-gpu
-distribution_spec:
-  docker_image: pytorch/pytorch:2.5.0-cuda12.4-cudnn9-runtime
-  description: Use code from `llama_stack` itself to serve all llama stack APIs
-  providers:
-    inference: meta-reference-quantized
-    memory:
-    - meta-reference
-    - remote::chromadb
-    - remote::pgvector
-    safety: meta-reference
-    agents: meta-reference
-    telemetry: meta-reference
-image_type: docker
diff --git a/distributions/meta-reference-quantized-gpu/build.yaml b/distributions/meta-reference-quantized-gpu/build.yaml
new file mode 120000
index 000000000..f3dbe996f
--- /dev/null
+++ b/distributions/meta-reference-quantized-gpu/build.yaml
@@ -0,0 +1 @@
+../../llama_stack/templates/meta-reference-quantized-gpu/build.yaml
\ No newline at end of file
diff --git a/distributions/ollama/README.md b/distributions/ollama/README.md
index d59c3f9e1..70bc27a85 100644
--- a/distributions/ollama/README.md
+++ b/distributions/ollama/README.md
@@ -86,6 +86,6 @@ inference:
 **Via Conda**
 
 ```
-llama stack build --config ./build.yaml
+llama stack build --template ollama --image-type conda
 llama stack run ./gpu/run.yaml
 ```
diff --git a/distributions/ollama/build.yaml b/distributions/ollama/build.yaml
deleted file mode 100644
index c27f40929..000000000
--- a/distributions/ollama/build.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
-name: ollama
-distribution_spec:
-  description: Use ollama for running LLM inference
-  providers:
-    inference: remote::ollama
-    memory:
-    - meta-reference
-    - remote::chromadb
-    - remote::pgvector
-    safety: meta-reference
-    agents: meta-reference
-    telemetry: meta-reference
-image_type: docker
diff --git a/distributions/ollama/build.yaml b/distributions/ollama/build.yaml
new file mode 120000
index 000000000..8772548e0
--- /dev/null
+++ b/distributions/ollama/build.yaml
@@ -0,0 +1 @@
+../../llama_stack/templates/ollama/build.yaml
\ No newline at end of file
diff --git a/distributions/tgi/README.md b/distributions/tgi/README.md
index 86d2636d7..886252ecd 100644
--- a/distributions/tgi/README.md
+++ b/distributions/tgi/README.md
@@ -88,7 +88,7 @@ inference:
 **Via Conda**
 
 ```bash
-llama stack build --config ./build.yaml
+llama stack build --template tgi --image-type conda
 # -- start a TGI server endpoint
 llama stack run ./gpu/run.yaml
 ```
diff --git a/distributions/tgi/build.yaml b/distributions/tgi/build.yaml
deleted file mode 100644
index 2c0ca1d33..000000000
--- a/distributions/tgi/build.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
-name: tgi
-distribution_spec:
-  description: Use TGI for running LLM inference
-  providers:
-    inference: remote::tgi
-    memory:
-    - meta-reference
-    - remote::chromadb
-    - remote::pgvector
-    safety: meta-reference
-    agents: meta-reference
-    telemetry: meta-reference
-image_type: docker
diff --git a/distributions/tgi/build.yaml b/distributions/tgi/build.yaml
new file mode 120000
index 000000000..73e59ad84
--- /dev/null
+++ b/distributions/tgi/build.yaml
@@ -0,0 +1 @@
+../../llama_stack/templates/tgi/build.yaml
\ No newline at end of file
diff --git a/distributions/together/README.md b/distributions/together/README.md
index 227c7a450..b964673e0 100644
--- a/distributions/together/README.md
+++ b/distributions/together/README.md
@@ -62,7 +62,7 @@ memory:
 **Via Conda**
 
 ```bash
-llama stack build --config ./build.yaml
+llama stack build --template together --image-type conda
 # -- modify run.yaml to a valid Together server endpoint
 llama stack run ./run.yaml
 ```
diff --git a/distributions/together/build.yaml b/distributions/together/build.yaml
deleted file mode 100644
index 49eab859d..000000000
--- a/distributions/together/build.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-name: together
-distribution_spec:
-  description: Use Together.ai for running LLM inference
-  providers:
-    inference: remote::together
-    memory: remote::weaviate
-    safety: remote::together
-    agents: meta-reference
-    telemetry: meta-reference
-image_type: docker
diff --git a/distributions/together/build.yaml b/distributions/together/build.yaml
new file mode 120000
index 000000000..3877a9c96
--- /dev/null
+++ b/distributions/together/build.yaml
@@ -0,0 +1 @@
+../../llama_stack/templates/together/build.yaml
\ No newline at end of file
diff --git a/distributions/vllm/build.yaml b/distributions/vllm/build.yaml
deleted file mode 100644
index f41352eb1..000000000
--- a/distributions/vllm/build.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-name: vllm
-distribution_spec:
-  description: Like local, but use vLLM for running LLM inference
-  providers:
-    inference: vllm
-    memory: meta-reference
-    safety: meta-reference
-    agents: meta-reference
-    telemetry: meta-reference
-image_type: conda
\ No newline at end of file
diff --git a/distributions/vllm/build.yaml b/distributions/vllm/build.yaml
new file mode 120000
index 000000000..dfc9401b6
--- /dev/null
+++ b/distributions/vllm/build.yaml
@@ -0,0 +1 @@
+../../llama_stack/templates/vllm/build.yaml
\ No newline at end of file
diff --git a/docs/cli_reference.md b/docs/cli_reference.md
index f0f67192f..ddc8e6b3e 100644
--- a/docs/cli_reference.md
+++ b/docs/cli_reference.md
@@ -279,11 +279,11 @@ llama stack build --list-templates
 You may then pick a template to build your distribution with providers fitted to your liking.
 
 ```
-llama stack build --template local-tgi --name my-tgi-stack
+llama stack build --template local-tgi --name my-tgi-stack --image-type conda
 ```
 
 ```
-$ llama stack build --template local-tgi --name my-tgi-stack
+$ llama stack build --template local-tgi --name my-tgi-stack --image-type conda
 ...
 ...
 Build spec configuration saved at ~/.conda/envs/llamastack-my-tgi-stack/my-tgi-stack-build.yaml
@@ -293,10 +293,10 @@ You may now run `llama stack configure my-tgi-stack` or `llama stack configure ~
 #### Building from config file
 - In addition to templates, you may customize the build to your liking through editing config files and build from config files with the following command.
 
-- The config file will be of contents like the ones in `llama_stack/distributions/templates/`.
+- The config file will be of contents like the ones in `llama_stack/templates/`.
 
 ```
-$ cat llama_stack/distribution/templates/local-ollama-build.yaml
+$ cat build.yaml
 
 name: local-ollama
 distribution_spec:
@@ -311,7 +311,7 @@ image_type: conda
 ```
 
 ```
-llama stack build --config llama_stack/distribution/templates/local-ollama-build.yaml
+llama stack build --config build.yaml
 ```
 
 #### How to build distribution with Docker image
diff --git a/docs/getting_started.md b/docs/getting_started.md
index 4f06f5d47..2a90301d0 100644
--- a/docs/getting_started.md
+++ b/docs/getting_started.md
@@ -35,11 +35,7 @@ You have two ways to start up Llama stack server:
 
 1. **Starting up server via docker**:
 
-	We provide 2 pre-built Docker image of Llama Stack distribution, which can be found in the following links.
-	- [llamastack-local-gpu](https://hub.docker.com/repository/docker/llamastack/llamastack-local-gpu/general)
-	- This is a packaged version with our local meta-reference implementations, where you will be running inference locally with downloaded Llama model checkpoints.
-	- [llamastack-local-cpu](https://hub.docker.com/repository/docker/llamastack/llamastack-local-cpu/general)
-	- This is a lite version with remote inference where you can hook up to your favourite remote inference framework (e.g. ollama, fireworks, together, tgi) for running inference without GPU.
+	We provide pre-built Docker image of Llama Stack distribution, which can be found in the following links in the [distributions](../distributions/) folder.
 
 	> [!NOTE]
 	> For GPU inference, you need to set these environment variables for specifying local directory containing your model checkpoints, and enable GPU inference to start running docker container.
diff --git a/llama_stack/cli/stack/build.py b/llama_stack/cli/stack/build.py
index 098932f8a..40fca4c6d 100644
--- a/llama_stack/cli/stack/build.py
+++ b/llama_stack/cli/stack/build.py
@@ -12,9 +12,7 @@ import os
 from functools import lru_cache
 from pathlib import Path
 
-TEMPLATES_PATH = (
-    Path(os.path.relpath(__file__)).parent.parent.parent.parent / "distributions"
-)
+TEMPLATES_PATH = Path(os.path.relpath(__file__)).parent.parent.parent / "templates"
 
 
 @lru_cache()
@@ -26,7 +24,6 @@ def available_templates_specs() -> List[BuildConfig]:
         with open(p, "r") as f:
             build_config = BuildConfig(**yaml.safe_load(f))
             template_specs.append(build_config)
-
     return template_specs
 
 
@@ -99,19 +96,22 @@ class StackBuild(Subcommand):
                     "You must specify a name for the build using --name when using a template"
                 )
                 return
-            build_path = TEMPLATES_PATH / f"{args.template}-build.yaml"
-            if not build_path.exists():
-                self.parser.error(
-                    f"Could not find template {args.template}. Please run `llama stack build --list-templates` to check out the available templates"
-                )
-                return
-            with open(build_path, "r") as f:
-                build_config = BuildConfig(**yaml.safe_load(f))
-                build_config.name = args.name
-                if args.image_type:
-                    build_config.image_type = args.image_type
-                self._run_stack_build_command_from_build_config(build_config)
+            available_templates = available_templates_specs()
+            for build_config in available_templates:
+                if build_config.name == args.template:
+                    build_config.name = args.name
+                    if args.image_type:
+                        build_config.image_type = args.image_type
+                    else:
+                        self.parser.error(
+                            f"Please specify a image-type (docker | conda) for {args.template}"
+                        )
+                    self._run_stack_build_command_from_build_config(build_config)
+                    return
 
+            self.parser.error(
+                f"Could not find template {args.template}. Please run `llama stack build --list-templates` to check out the available templates"
+            )
             return
 
         # try to see if we can find a pre-existing build config file through name
diff --git a/llama_stack/distribution/build.py b/llama_stack/distribution/build.py
index 13c545723..e3a9d9186 100644
--- a/llama_stack/distribution/build.py
+++ b/llama_stack/distribution/build.py
@@ -8,18 +8,19 @@ from enum import Enum
 from typing import List, Optional
 
 import pkg_resources
-
-from llama_stack.distribution.utils.exec import run_with_pty
 from pydantic import BaseModel
 
 from termcolor import cprint
 
+from llama_stack.distribution.utils.exec import run_with_pty
+
 from llama_stack.distribution.datatypes import *  # noqa: F403
 from pathlib import Path
 
-from llama_stack.distribution.utils.config_dirs import BUILDS_BASE_DIR
 from llama_stack.distribution.distribution import get_provider_registry
 
+from llama_stack.distribution.utils.config_dirs import BUILDS_BASE_DIR
+
 
 # These are the dependencies needed by the distribution server.
 # `llama-stack` is automatically installed by the installation script.
diff --git a/llama_stack/providers/impls/meta_reference/inference/quantization/scripts/build_conda.sh b/llama_stack/providers/impls/meta_reference/inference/quantization/scripts/build_conda.sh
index d3028f8e8..ae0ed0bac 100644
--- a/llama_stack/providers/impls/meta_reference/inference/quantization/scripts/build_conda.sh
+++ b/llama_stack/providers/impls/meta_reference/inference/quantization/scripts/build_conda.sh
@@ -1,5 +1,11 @@
 #!/bin/bash
 
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
 if [[ $# -ne 1 ]]; then
     echo "Error: Please provide the name of CONDA environment you wish to create"
     exit 1
diff --git a/llama_stack/templates/bedrock/build.yaml b/llama_stack/templates/bedrock/build.yaml
new file mode 100644
index 000000000..a3ff27949
--- /dev/null
+++ b/llama_stack/templates/bedrock/build.yaml
@@ -0,0 +1,9 @@
+name: bedrock
+distribution_spec:
+  description: Use Amazon Bedrock APIs.
+  providers:
+    inference: remote::bedrock
+    memory: meta-reference
+    safety: meta-reference
+    agents: meta-reference
+    telemetry: meta-reference
diff --git a/llama_stack/templates/databricks/build.yaml b/llama_stack/templates/databricks/build.yaml
new file mode 100644
index 000000000..f6c8b50a1
--- /dev/null
+++ b/llama_stack/templates/databricks/build.yaml
@@ -0,0 +1,9 @@
+name: databricks
+distribution_spec:
+  description: Use Databricks for running LLM inference
+  providers:
+    inference: remote::databricks
+    memory: meta-reference
+    safety: meta-reference
+    agents: meta-reference
+    telemetry: meta-reference
diff --git a/llama_stack/templates/fireworks/build.yaml b/llama_stack/templates/fireworks/build.yaml
new file mode 100644
index 000000000..37129bef0
--- /dev/null
+++ b/llama_stack/templates/fireworks/build.yaml
@@ -0,0 +1,9 @@
+name: fireworks
+distribution_spec:
+  description: Use Fireworks.ai for running LLM inference
+  providers:
+    inference: remote::fireworks
+    memory: meta-reference
+    safety: meta-reference
+    agents: meta-reference
+    telemetry: meta-reference
diff --git a/llama_stack/templates/hf-endpoint/build.yaml b/llama_stack/templates/hf-endpoint/build.yaml
new file mode 100644
index 000000000..6c84e5ccf
--- /dev/null
+++ b/llama_stack/templates/hf-endpoint/build.yaml
@@ -0,0 +1,9 @@
+name: hf-endpoint
+distribution_spec:
+  description: "Like local, but use Hugging Face Inference Endpoints for running LLM inference.\nSee https://hf.co/docs/api-endpoints."
+  providers:
+    inference: remote::hf::endpoint
+    memory: meta-reference
+    safety: meta-reference
+    agents: meta-reference
+    telemetry: meta-reference
diff --git a/llama_stack/templates/hf-serverless/build.yaml b/llama_stack/templates/hf-serverless/build.yaml
new file mode 100644
index 000000000..32561c1fa
--- /dev/null
+++ b/llama_stack/templates/hf-serverless/build.yaml
@@ -0,0 +1,9 @@
+name: hf-serverless
+distribution_spec:
+  description: "Like local, but use Hugging Face Inference API (serverless) for running LLM inference.\nSee https://hf.co/docs/api-inference."
+  providers:
+    inference: remote::hf::serverless
+    memory: meta-reference
+    safety: meta-reference
+    agents: meta-reference
+    telemetry: meta-reference
diff --git a/llama_stack/templates/meta-reference-gpu/build.yaml b/llama_stack/templates/meta-reference-gpu/build.yaml
new file mode 100644
index 000000000..d0fe93aa3
--- /dev/null
+++ b/llama_stack/templates/meta-reference-gpu/build.yaml
@@ -0,0 +1,13 @@
+name: meta-reference-gpu
+distribution_spec:
+  docker_image: pytorch/pytorch:2.5.0-cuda12.4-cudnn9-runtime
+  description: Use code from `llama_stack` itself to serve all llama stack APIs
+  providers:
+    inference: meta-reference
+    memory:
+    - meta-reference
+    - remote::chromadb
+    - remote::pgvector
+    safety: meta-reference
+    agents: meta-reference
+    telemetry: meta-reference
diff --git a/llama_stack/templates/meta-reference-quantized-gpu/build.yaml b/llama_stack/templates/meta-reference-quantized-gpu/build.yaml
new file mode 100644
index 000000000..20500ea5a
--- /dev/null
+++ b/llama_stack/templates/meta-reference-quantized-gpu/build.yaml
@@ -0,0 +1,13 @@
+name: meta-reference-quantized-gpu
+distribution_spec:
+  docker_image: pytorch/pytorch:2.5.0-cuda12.4-cudnn9-runtime
+  description: Use code from `llama_stack` itself to serve all llama stack APIs
+  providers:
+    inference: meta-reference-quantized
+    memory:
+    - meta-reference
+    - remote::chromadb
+    - remote::pgvector
+    safety: meta-reference
+    agents: meta-reference
+    telemetry: meta-reference
diff --git a/llama_stack/templates/ollama/build.yaml b/llama_stack/templates/ollama/build.yaml
new file mode 100644
index 000000000..06de2fc3c
--- /dev/null
+++ b/llama_stack/templates/ollama/build.yaml
@@ -0,0 +1,12 @@
+name: ollama
+distribution_spec:
+  description: Use ollama for running LLM inference
+  providers:
+    inference: remote::ollama
+    memory:
+    - meta-reference
+    - remote::chromadb
+    - remote::pgvector
+    safety: meta-reference
+    agents: meta-reference
+    telemetry: meta-reference
diff --git a/llama_stack/templates/tgi/build.yaml b/llama_stack/templates/tgi/build.yaml
new file mode 100644
index 000000000..c5e618bb6
--- /dev/null
+++ b/llama_stack/templates/tgi/build.yaml
@@ -0,0 +1,12 @@
+name: tgi
+distribution_spec:
+  description: Use TGI for running LLM inference
+  providers:
+    inference: remote::tgi
+    memory:
+    - meta-reference
+    - remote::chromadb
+    - remote::pgvector
+    safety: meta-reference
+    agents: meta-reference
+    telemetry: meta-reference
diff --git a/llama_stack/templates/together/build.yaml b/llama_stack/templates/together/build.yaml
new file mode 100644
index 000000000..5232aeb93
--- /dev/null
+++ b/llama_stack/templates/together/build.yaml
@@ -0,0 +1,9 @@
+name: together
+distribution_spec:
+  description: Use Together.ai for running LLM inference
+  providers:
+    inference: remote::together
+    memory: remote::weaviate
+    safety: remote::together
+    agents: meta-reference
+    telemetry: meta-reference
diff --git a/llama_stack/templates/vllm/build.yaml b/llama_stack/templates/vllm/build.yaml
new file mode 100644
index 000000000..d842896db
--- /dev/null
+++ b/llama_stack/templates/vllm/build.yaml
@@ -0,0 +1,9 @@
+name: vllm
+distribution_spec:
+  description: Like local, but use vLLM for running LLM inference
+  providers:
+    inference: vllm
+    memory: meta-reference
+    safety: meta-reference
+    agents: meta-reference
+    telemetry: meta-reference