From 56f9b7d5d6654462ac851a4c0183e91cc892456e Mon Sep 17 00:00:00 2001
From: Xi Yan <xiyan@meta.com>
Date: Fri, 25 Oct 2024 11:54:09 -0700
Subject: [PATCH] move build.yaml to templates, symlink in distributions

---
 distributions/bedrock/build.yaml                  | 11 +----------
 distributions/databricks/build.yaml               | 11 +----------
 distributions/fireworks/build.yaml                | 11 +----------
 distributions/hf-endpoint/build.yaml              | 11 +----------
 distributions/hf-serverless/build.yaml            | 11 +----------
 distributions/meta-reference-gpu/build.yaml       | 15 +--------------
 .../meta-reference-quantized-gpu/build.yaml       | 15 +--------------
 distributions/ollama/build.yaml                   | 14 +-------------
 distributions/tgi/build.yaml                      | 14 +-------------
 distributions/together/build.yaml                 | 11 +----------
 distributions/vllm/build.yaml                     | 11 +----------
 llama_stack/cli/stack/build.py                    | 10 +---------
 llama_stack/templates/bedrock/build.yaml          | 11 ++++++++++-
 llama_stack/templates/databricks/build.yaml       | 10 ++++++++++
 llama_stack/templates/fireworks/build.yaml        | 11 ++++++++++-
 llama_stack/templates/hf-endpoint/build.yaml      | 11 ++++++++++-
 llama_stack/templates/hf-serverless/build.yaml    | 11 ++++++++++-
 .../templates/meta-reference-gpu/build.yaml       | 15 ++++++++++++++-
 .../meta-reference-quantized-gpu/build.yaml       | 14 ++++++++++++++
 llama_stack/templates/ollama/build.yaml           | 14 +++++++++++++-
 llama_stack/templates/tgi/build.yaml              | 14 +++++++++++++-
 llama_stack/templates/together/build.yaml         | 11 ++++++++++-
 llama_stack/templates/vllm/build.yaml             | 11 ++++++++++-
 23 files changed, 136 insertions(+), 142 deletions(-)
 mode change 100644 => 120000 distributions/bedrock/build.yaml
 mode change 100644 => 120000 distributions/databricks/build.yaml
 mode change 100644 => 120000 distributions/fireworks/build.yaml
 mode change 100644 => 120000 distributions/hf-endpoint/build.yaml
 mode change 100644 => 120000 distributions/hf-serverless/build.yaml
 mode change 100644 => 120000 distributions/meta-reference-gpu/build.yaml
 mode change 100644 => 120000 distributions/meta-reference-quantized-gpu/build.yaml
 mode change 100644 => 120000 distributions/ollama/build.yaml
 mode change 100644 => 120000 distributions/tgi/build.yaml
 mode change 100644 => 120000 distributions/together/build.yaml
 mode change 100644 => 120000 distributions/vllm/build.yaml
 mode change 120000 => 100644 llama_stack/templates/bedrock/build.yaml
 create mode 100644 llama_stack/templates/databricks/build.yaml
 mode change 120000 => 100644 llama_stack/templates/fireworks/build.yaml
 mode change 120000 => 100644 llama_stack/templates/hf-endpoint/build.yaml
 mode change 120000 => 100644 llama_stack/templates/hf-serverless/build.yaml
 mode change 120000 => 100644 llama_stack/templates/meta-reference-gpu/build.yaml
 create mode 100644 llama_stack/templates/meta-reference-quantized-gpu/build.yaml
 mode change 120000 => 100644 llama_stack/templates/ollama/build.yaml
 mode change 120000 => 100644 llama_stack/templates/tgi/build.yaml
 mode change 120000 => 100644 llama_stack/templates/together/build.yaml
 mode change 120000 => 100644 llama_stack/templates/vllm/build.yaml

diff --git a/distributions/bedrock/build.yaml b/distributions/bedrock/build.yaml
deleted file mode 100644
index ae7b27d49..000000000
--- a/distributions/bedrock/build.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-name: bedrock
-distribution_spec:
-  description: Use Amazon Bedrock APIs.
-  providers:
-    inference: remote::bedrock
-    memory: meta-reference
-    safety: meta-reference
-    agents: meta-reference
-    telemetry: meta-reference
-image_type: conda
diff --git a/distributions/bedrock/build.yaml b/distributions/bedrock/build.yaml
new file mode 120000
index 000000000..72402ef8d
--- /dev/null
+++ b/distributions/bedrock/build.yaml
@@ -0,0 +1 @@
+../../llama_stack/templates/bedrock/build.yaml
\ No newline at end of file
diff --git a/distributions/databricks/build.yaml b/distributions/databricks/build.yaml
deleted file mode 100644
index 2188dd0a0..000000000
--- a/distributions/databricks/build.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-name: databricks
-distribution_spec:
-  description: Use Databricks for running LLM inference
-  providers:
-    inference: remote::databricks
-    memory: meta-reference
-    safety: meta-reference
-    agents: meta-reference
-    telemetry: meta-reference
-image_type: conda
diff --git a/distributions/databricks/build.yaml b/distributions/databricks/build.yaml
new file mode 120000
index 000000000..66342fe6f
--- /dev/null
+++ b/distributions/databricks/build.yaml
@@ -0,0 +1 @@
+../../llama_stack/templates/databricks/build.yaml
\ No newline at end of file
diff --git a/distributions/fireworks/build.yaml b/distributions/fireworks/build.yaml
deleted file mode 100644
index 2e5cf0753..000000000
--- a/distributions/fireworks/build.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-name: fireworks
-distribution_spec:
-  description: Use Fireworks.ai for running LLM inference
-  providers:
-    inference: remote::fireworks
-    memory: meta-reference
-    safety: meta-reference
-    agents: meta-reference
-    telemetry: meta-reference
-image_type: docker
diff --git a/distributions/fireworks/build.yaml b/distributions/fireworks/build.yaml
new file mode 120000
index 000000000..32a5bd869
--- /dev/null
+++ b/distributions/fireworks/build.yaml
@@ -0,0 +1 @@
+../../llama_stack/templates/fireworks/build.yaml
\ No newline at end of file
diff --git a/distributions/hf-endpoint/build.yaml b/distributions/hf-endpoint/build.yaml
deleted file mode 100644
index 750bebcb5..000000000
--- a/distributions/hf-endpoint/build.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-name: hf-endpoint
-distribution_spec:
-  description: "Like local, but use Hugging Face Inference Endpoints for running LLM inference.\nSee https://hf.co/docs/api-endpoints."
-  providers:
-    inference: remote::hf::endpoint
-    memory: meta-reference
-    safety: meta-reference
-    agents: meta-reference
-    telemetry: meta-reference
-image_type: conda
diff --git a/distributions/hf-endpoint/build.yaml b/distributions/hf-endpoint/build.yaml
new file mode 120000
index 000000000..a73c70c05
--- /dev/null
+++ b/distributions/hf-endpoint/build.yaml
@@ -0,0 +1 @@
+../../llama_stack/templates/hf-endpoint/build.yaml
\ No newline at end of file
diff --git a/distributions/hf-serverless/build.yaml b/distributions/hf-serverless/build.yaml
deleted file mode 100644
index f6da3ad4d..000000000
--- a/distributions/hf-serverless/build.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-name: hf-serverless
-distribution_spec:
-  description: "Like local, but use Hugging Face Inference API (serverless) for running LLM inference.\nSee https://hf.co/docs/api-inference."
-  providers:
-    inference: remote::hf::serverless
-    memory: meta-reference
-    safety: meta-reference
-    agents: meta-reference
-    telemetry: meta-reference
-image_type: conda
diff --git a/distributions/hf-serverless/build.yaml b/distributions/hf-serverless/build.yaml
new file mode 120000
index 000000000..f2db0fd55
--- /dev/null
+++ b/distributions/hf-serverless/build.yaml
@@ -0,0 +1 @@
+../../llama_stack/templates/hf-serverless/build.yaml
\ No newline at end of file
diff --git a/distributions/meta-reference-gpu/build.yaml b/distributions/meta-reference-gpu/build.yaml
deleted file mode 100644
index 5b1521a92..000000000
--- a/distributions/meta-reference-gpu/build.yaml
+++ /dev/null
@@ -1,14 +0,0 @@
-name: meta-reference-gpu
-distribution_spec:
-  docker_image: pytorch/pytorch
-  description: Use code from `llama_stack` itself to serve all llama stack APIs
-  providers:
-    inference: meta-reference
-    memory:
-    - meta-reference
-    - remote::chromadb
-    - remote::pgvector
-    safety: meta-reference
-    agents: meta-reference
-    telemetry: meta-reference
-image_type: docker
diff --git a/distributions/meta-reference-gpu/build.yaml b/distributions/meta-reference-gpu/build.yaml
new file mode 120000
index 000000000..4418195eb
--- /dev/null
+++ b/distributions/meta-reference-gpu/build.yaml
@@ -0,0 +1 @@
+../../llama_stack/templates/meta-reference-gpu/build.yaml
\ No newline at end of file
diff --git a/distributions/meta-reference-quantized-gpu/build.yaml b/distributions/meta-reference-quantized-gpu/build.yaml
deleted file mode 100644
index e9ddb4aad..000000000
--- a/distributions/meta-reference-quantized-gpu/build.yaml
+++ /dev/null
@@ -1,14 +0,0 @@
-name: meta-reference-quantized-gpu
-distribution_spec:
-  docker_image: pytorch/pytorch:2.5.0-cuda12.4-cudnn9-runtime
-  description: Use code from `llama_stack` itself to serve all llama stack APIs
-  providers:
-    inference: meta-reference-quantized
-    memory:
-    - meta-reference
-    - remote::chromadb
-    - remote::pgvector
-    safety: meta-reference
-    agents: meta-reference
-    telemetry: meta-reference
-image_type: docker
diff --git a/distributions/meta-reference-quantized-gpu/build.yaml b/distributions/meta-reference-quantized-gpu/build.yaml
new file mode 120000
index 000000000..f3dbe996f
--- /dev/null
+++ b/distributions/meta-reference-quantized-gpu/build.yaml
@@ -0,0 +1 @@
+../../llama_stack/templates/meta-reference-quantized-gpu/build.yaml
\ No newline at end of file
diff --git a/distributions/ollama/build.yaml b/distributions/ollama/build.yaml
deleted file mode 100644
index c27f40929..000000000
--- a/distributions/ollama/build.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
-name: ollama
-distribution_spec:
-  description: Use ollama for running LLM inference
-  providers:
-    inference: remote::ollama
-    memory:
-    - meta-reference
-    - remote::chromadb
-    - remote::pgvector
-    safety: meta-reference
-    agents: meta-reference
-    telemetry: meta-reference
-image_type: docker
diff --git a/distributions/ollama/build.yaml b/distributions/ollama/build.yaml
new file mode 120000
index 000000000..8772548e0
--- /dev/null
+++ b/distributions/ollama/build.yaml
@@ -0,0 +1 @@
+../../llama_stack/templates/ollama/build.yaml
\ No newline at end of file
diff --git a/distributions/tgi/build.yaml b/distributions/tgi/build.yaml
deleted file mode 100644
index 2c0ca1d33..000000000
--- a/distributions/tgi/build.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
-name: tgi
-distribution_spec:
-  description: Use TGI for running LLM inference
-  providers:
-    inference: remote::tgi
-    memory:
-    - meta-reference
-    - remote::chromadb
-    - remote::pgvector
-    safety: meta-reference
-    agents: meta-reference
-    telemetry: meta-reference
-image_type: docker
diff --git a/distributions/tgi/build.yaml b/distributions/tgi/build.yaml
new file mode 120000
index 000000000..73e59ad84
--- /dev/null
+++ b/distributions/tgi/build.yaml
@@ -0,0 +1 @@
+../../llama_stack/templates/tgi/build.yaml
\ No newline at end of file
diff --git a/distributions/together/build.yaml b/distributions/together/build.yaml
deleted file mode 100644
index 49eab859d..000000000
--- a/distributions/together/build.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-name: together
-distribution_spec:
-  description: Use Together.ai for running LLM inference
-  providers:
-    inference: remote::together
-    memory: remote::weaviate
-    safety: remote::together
-    agents: meta-reference
-    telemetry: meta-reference
-image_type: docker
diff --git a/distributions/together/build.yaml b/distributions/together/build.yaml
new file mode 120000
index 000000000..3877a9c96
--- /dev/null
+++ b/distributions/together/build.yaml
@@ -0,0 +1 @@
+../../llama_stack/templates/together/build.yaml
\ No newline at end of file
diff --git a/distributions/vllm/build.yaml b/distributions/vllm/build.yaml
deleted file mode 100644
index f41352eb1..000000000
--- a/distributions/vllm/build.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-name: vllm
-distribution_spec:
-  description: Like local, but use vLLM for running LLM inference
-  providers:
-    inference: vllm
-    memory: meta-reference
-    safety: meta-reference
-    agents: meta-reference
-    telemetry: meta-reference
-image_type: conda
\ No newline at end of file
diff --git a/distributions/vllm/build.yaml b/distributions/vllm/build.yaml
new file mode 120000
index 000000000..dfc9401b6
--- /dev/null
+++ b/distributions/vllm/build.yaml
@@ -0,0 +1 @@
+../../llama_stack/templates/vllm/build.yaml
\ No newline at end of file
diff --git a/llama_stack/cli/stack/build.py b/llama_stack/cli/stack/build.py
index fde471986..2273e67fb 100644
--- a/llama_stack/cli/stack/build.py
+++ b/llama_stack/cli/stack/build.py
@@ -12,15 +12,7 @@ import os
 from functools import lru_cache
 from pathlib import Path
 
-TEMPLATES_PATH = (
-    Path(os.path.relpath(__file__)).parent.parent.parent.parent / "distributions----"
-)
-
-# build.yaml templates exist in the llama-stack/distributions while wheel installs llama-stack/llama_stack
-# we copied the distributions folder to llama-stack/llama_stack/cli/distributions for wheel builds,
-# so we need to check both locations
-if not TEMPLATES_PATH.exists():
-    TEMPLATES_PATH = Path(os.path.relpath(__file__)).parent.parent.parent / "templates"
+TEMPLATES_PATH = Path(os.path.relpath(__file__)).parent.parent.parent / "templates"
 
 
 @lru_cache()
diff --git a/llama_stack/templates/bedrock/build.yaml b/llama_stack/templates/bedrock/build.yaml
deleted file mode 120000
index 28a253c78..000000000
--- a/llama_stack/templates/bedrock/build.yaml
+++ /dev/null
@@ -1 +0,0 @@
-../../distributions/bedrock/build.yaml
\ No newline at end of file
diff --git a/llama_stack/templates/bedrock/build.yaml b/llama_stack/templates/bedrock/build.yaml
new file mode 100644
index 000000000..ae7b27d49
--- /dev/null
+++ b/llama_stack/templates/bedrock/build.yaml
@@ -0,0 +1,10 @@
+name: bedrock
+distribution_spec:
+  description: Use Amazon Bedrock APIs.
+  providers:
+    inference: remote::bedrock
+    memory: meta-reference
+    safety: meta-reference
+    agents: meta-reference
+    telemetry: meta-reference
+image_type: conda
diff --git a/llama_stack/templates/databricks/build.yaml b/llama_stack/templates/databricks/build.yaml
new file mode 100644
index 000000000..2188dd0a0
--- /dev/null
+++ b/llama_stack/templates/databricks/build.yaml
@@ -0,0 +1,10 @@
+name: databricks
+distribution_spec:
+  description: Use Databricks for running LLM inference
+  providers:
+    inference: remote::databricks
+    memory: meta-reference
+    safety: meta-reference
+    agents: meta-reference
+    telemetry: meta-reference
+image_type: conda
diff --git a/llama_stack/templates/fireworks/build.yaml b/llama_stack/templates/fireworks/build.yaml
deleted file mode 120000
index 14e6cb2fe..000000000
--- a/llama_stack/templates/fireworks/build.yaml
+++ /dev/null
@@ -1 +0,0 @@
-../../distributions/fireworks/build.yaml
\ No newline at end of file
diff --git a/llama_stack/templates/fireworks/build.yaml b/llama_stack/templates/fireworks/build.yaml
new file mode 100644
index 000000000..2e5cf0753
--- /dev/null
+++ b/llama_stack/templates/fireworks/build.yaml
@@ -0,0 +1,10 @@
+name: fireworks
+distribution_spec:
+  description: Use Fireworks.ai for running LLM inference
+  providers:
+    inference: remote::fireworks
+    memory: meta-reference
+    safety: meta-reference
+    agents: meta-reference
+    telemetry: meta-reference
+image_type: docker
diff --git a/llama_stack/templates/hf-endpoint/build.yaml b/llama_stack/templates/hf-endpoint/build.yaml
deleted file mode 120000
index 469d4da77..000000000
--- a/llama_stack/templates/hf-endpoint/build.yaml
+++ /dev/null
@@ -1 +0,0 @@
-../../distributions/hf-endpoint/build.yaml
\ No newline at end of file
diff --git a/llama_stack/templates/hf-endpoint/build.yaml b/llama_stack/templates/hf-endpoint/build.yaml
new file mode 100644
index 000000000..750bebcb5
--- /dev/null
+++ b/llama_stack/templates/hf-endpoint/build.yaml
@@ -0,0 +1,10 @@
+name: hf-endpoint
+distribution_spec:
+  description: "Like local, but use Hugging Face Inference Endpoints for running LLM inference.\nSee https://hf.co/docs/api-endpoints."
+  providers:
+    inference: remote::hf::endpoint
+    memory: meta-reference
+    safety: meta-reference
+    agents: meta-reference
+    telemetry: meta-reference
+image_type: conda
diff --git a/llama_stack/templates/hf-serverless/build.yaml b/llama_stack/templates/hf-serverless/build.yaml
deleted file mode 120000
index 6f14f7bc4..000000000
--- a/llama_stack/templates/hf-serverless/build.yaml
+++ /dev/null
@@ -1 +0,0 @@
-../../distributions/hf-serverless/build.yaml
\ No newline at end of file
diff --git a/llama_stack/templates/hf-serverless/build.yaml b/llama_stack/templates/hf-serverless/build.yaml
new file mode 100644
index 000000000..f6da3ad4d
--- /dev/null
+++ b/llama_stack/templates/hf-serverless/build.yaml
@@ -0,0 +1,10 @@
+name: hf-serverless
+distribution_spec:
+  description: "Like local, but use Hugging Face Inference API (serverless) for running LLM inference.\nSee https://hf.co/docs/api-inference."
+  providers:
+    inference: remote::hf::serverless
+    memory: meta-reference
+    safety: meta-reference
+    agents: meta-reference
+    telemetry: meta-reference
+image_type: conda
diff --git a/llama_stack/templates/meta-reference-gpu/build.yaml b/llama_stack/templates/meta-reference-gpu/build.yaml
deleted file mode 120000
index 7a7c35fb5..000000000
--- a/llama_stack/templates/meta-reference-gpu/build.yaml
+++ /dev/null
@@ -1 +0,0 @@
-../../distributions/meta-reference-gpu/build.yaml
\ No newline at end of file
diff --git a/llama_stack/templates/meta-reference-gpu/build.yaml b/llama_stack/templates/meta-reference-gpu/build.yaml
new file mode 100644
index 000000000..5b1521a92
--- /dev/null
+++ b/llama_stack/templates/meta-reference-gpu/build.yaml
@@ -0,0 +1,14 @@
+name: meta-reference-gpu
+distribution_spec:
+  docker_image: pytorch/pytorch
+  description: Use code from `llama_stack` itself to serve all llama stack APIs
+  providers:
+    inference: meta-reference
+    memory:
+    - meta-reference
+    - remote::chromadb
+    - remote::pgvector
+    safety: meta-reference
+    agents: meta-reference
+    telemetry: meta-reference
+image_type: docker
diff --git a/llama_stack/templates/meta-reference-quantized-gpu/build.yaml b/llama_stack/templates/meta-reference-quantized-gpu/build.yaml
new file mode 100644
index 000000000..e9ddb4aad
--- /dev/null
+++ b/llama_stack/templates/meta-reference-quantized-gpu/build.yaml
@@ -0,0 +1,14 @@
+name: meta-reference-quantized-gpu
+distribution_spec:
+  docker_image: pytorch/pytorch:2.5.0-cuda12.4-cudnn9-runtime
+  description: Use code from `llama_stack` itself to serve all llama stack APIs
+  providers:
+    inference: meta-reference-quantized
+    memory:
+    - meta-reference
+    - remote::chromadb
+    - remote::pgvector
+    safety: meta-reference
+    agents: meta-reference
+    telemetry: meta-reference
+image_type: docker
diff --git a/llama_stack/templates/ollama/build.yaml b/llama_stack/templates/ollama/build.yaml
deleted file mode 120000
index d8bf8a15d..000000000
--- a/llama_stack/templates/ollama/build.yaml
+++ /dev/null
@@ -1 +0,0 @@
-../../distributions/ollama/build.yaml
\ No newline at end of file
diff --git a/llama_stack/templates/ollama/build.yaml b/llama_stack/templates/ollama/build.yaml
new file mode 100644
index 000000000..c27f40929
--- /dev/null
+++ b/llama_stack/templates/ollama/build.yaml
@@ -0,0 +1,13 @@
+name: ollama
+distribution_spec:
+  description: Use ollama for running LLM inference
+  providers:
+    inference: remote::ollama
+    memory:
+    - meta-reference
+    - remote::chromadb
+    - remote::pgvector
+    safety: meta-reference
+    agents: meta-reference
+    telemetry: meta-reference
+image_type: docker
diff --git a/llama_stack/templates/tgi/build.yaml b/llama_stack/templates/tgi/build.yaml
deleted file mode 120000
index b3144911c..000000000
--- a/llama_stack/templates/tgi/build.yaml
+++ /dev/null
@@ -1 +0,0 @@
-../../distributions/tgi/build.yaml
\ No newline at end of file
diff --git a/llama_stack/templates/tgi/build.yaml b/llama_stack/templates/tgi/build.yaml
new file mode 100644
index 000000000..2c0ca1d33
--- /dev/null
+++ b/llama_stack/templates/tgi/build.yaml
@@ -0,0 +1,13 @@
+name: tgi
+distribution_spec:
+  description: Use TGI for running LLM inference
+  providers:
+    inference: remote::tgi
+    memory:
+    - meta-reference
+    - remote::chromadb
+    - remote::pgvector
+    safety: meta-reference
+    agents: meta-reference
+    telemetry: meta-reference
+image_type: docker
diff --git a/llama_stack/templates/together/build.yaml b/llama_stack/templates/together/build.yaml
deleted file mode 120000
index 69bc4510a..000000000
--- a/llama_stack/templates/together/build.yaml
+++ /dev/null
@@ -1 +0,0 @@
-../../distributions/together/build.yaml
\ No newline at end of file
diff --git a/llama_stack/templates/together/build.yaml b/llama_stack/templates/together/build.yaml
new file mode 100644
index 000000000..49eab859d
--- /dev/null
+++ b/llama_stack/templates/together/build.yaml
@@ -0,0 +1,10 @@
+name: together
+distribution_spec:
+  description: Use Together.ai for running LLM inference
+  providers:
+    inference: remote::together
+    memory: remote::weaviate
+    safety: remote::together
+    agents: meta-reference
+    telemetry: meta-reference
+image_type: docker
diff --git a/llama_stack/templates/vllm/build.yaml b/llama_stack/templates/vllm/build.yaml
deleted file mode 120000
index 8152145d3..000000000
--- a/llama_stack/templates/vllm/build.yaml
+++ /dev/null
@@ -1 +0,0 @@
-../../distributions/vllm/build.yaml
\ No newline at end of file
diff --git a/llama_stack/templates/vllm/build.yaml b/llama_stack/templates/vllm/build.yaml
new file mode 100644
index 000000000..f41352eb1
--- /dev/null
+++ b/llama_stack/templates/vllm/build.yaml
@@ -0,0 +1,10 @@
+name: vllm
+distribution_spec:
+  description: Like local, but use vLLM for running LLM inference
+  providers:
+    inference: vllm
+    memory: meta-reference
+    safety: meta-reference
+    agents: meta-reference
+    telemetry: meta-reference
+image_type: conda
\ No newline at end of file