From 19adb4070abdebc826bf3de4ec3061fcec03d3e5 Mon Sep 17 00:00:00 2001
From: Xi Yan <xiyan@meta.com>
Date: Fri, 25 Oct 2024 12:08:32 -0700
Subject: [PATCH] add build files to templates

---
 llama_stack/templates/bedrock/build.yaml           | 10 ++++++++++
 llama_stack/templates/databricks/build.yaml        | 10 ++++++++++
 llama_stack/templates/fireworks/build.yaml         | 10 ++++++++++
 llama_stack/templates/hf-endpoint/build.yaml       | 10 ++++++++++
 llama_stack/templates/hf-serverless/build.yaml     | 10 ++++++++++
 .../templates/meta-reference-gpu/build.yaml        | 14 ++++++++++++++
 .../meta-reference-quantized-gpu/build.yaml        | 14 ++++++++++++++
 llama_stack/templates/ollama/build.yaml            | 13 +++++++++++++
 llama_stack/templates/tgi/build.yaml               | 13 +++++++++++++
 llama_stack/templates/together/build.yaml          | 10 ++++++++++
 llama_stack/templates/vllm/build.yaml              | 10 ++++++++++
 11 files changed, 124 insertions(+)
 create mode 100644 llama_stack/templates/bedrock/build.yaml
 create mode 100644 llama_stack/templates/databricks/build.yaml
 create mode 100644 llama_stack/templates/fireworks/build.yaml
 create mode 100644 llama_stack/templates/hf-endpoint/build.yaml
 create mode 100644 llama_stack/templates/hf-serverless/build.yaml
 create mode 100644 llama_stack/templates/meta-reference-gpu/build.yaml
 create mode 100644 llama_stack/templates/meta-reference-quantized-gpu/build.yaml
 create mode 100644 llama_stack/templates/ollama/build.yaml
 create mode 100644 llama_stack/templates/tgi/build.yaml
 create mode 100644 llama_stack/templates/together/build.yaml
 create mode 100644 llama_stack/templates/vllm/build.yaml

diff --git a/llama_stack/templates/bedrock/build.yaml b/llama_stack/templates/bedrock/build.yaml
new file mode 100644
index 000000000..ae7b27d49
--- /dev/null
+++ b/llama_stack/templates/bedrock/build.yaml
@@ -0,0 +1,10 @@
+name: bedrock
+distribution_spec:
+  description: Use Amazon Bedrock APIs.
+  providers:
+    inference: remote::bedrock
+    memory: meta-reference
+    safety: meta-reference
+    agents: meta-reference
+    telemetry: meta-reference
+image_type: conda
diff --git a/llama_stack/templates/databricks/build.yaml b/llama_stack/templates/databricks/build.yaml
new file mode 100644
index 000000000..2188dd0a0
--- /dev/null
+++ b/llama_stack/templates/databricks/build.yaml
@@ -0,0 +1,10 @@
+name: databricks
+distribution_spec:
+  description: Use Databricks for running LLM inference
+  providers:
+    inference: remote::databricks
+    memory: meta-reference
+    safety: meta-reference
+    agents: meta-reference
+    telemetry: meta-reference
+image_type: conda
diff --git a/llama_stack/templates/fireworks/build.yaml b/llama_stack/templates/fireworks/build.yaml
new file mode 100644
index 000000000..2e5cf0753
--- /dev/null
+++ b/llama_stack/templates/fireworks/build.yaml
@@ -0,0 +1,10 @@
+name: fireworks
+distribution_spec:
+  description: Use Fireworks.ai for running LLM inference
+  providers:
+    inference: remote::fireworks
+    memory: meta-reference
+    safety: meta-reference
+    agents: meta-reference
+    telemetry: meta-reference
+image_type: docker
diff --git a/llama_stack/templates/hf-endpoint/build.yaml b/llama_stack/templates/hf-endpoint/build.yaml
new file mode 100644
index 000000000..750bebcb5
--- /dev/null
+++ b/llama_stack/templates/hf-endpoint/build.yaml
@@ -0,0 +1,10 @@
+name: hf-endpoint
+distribution_spec:
+  description: "Like local, but use Hugging Face Inference Endpoints for running LLM inference.\nSee https://hf.co/docs/api-endpoints."
+  providers:
+    inference: remote::hf::endpoint
+    memory: meta-reference
+    safety: meta-reference
+    agents: meta-reference
+    telemetry: meta-reference
+image_type: conda
diff --git a/llama_stack/templates/hf-serverless/build.yaml b/llama_stack/templates/hf-serverless/build.yaml
new file mode 100644
index 000000000..f6da3ad4d
--- /dev/null
+++ b/llama_stack/templates/hf-serverless/build.yaml
@@ -0,0 +1,10 @@
+name: hf-serverless
+distribution_spec:
+  description: "Like local, but use Hugging Face Inference API (serverless) for running LLM inference.\nSee https://hf.co/docs/api-inference."
+  providers:
+    inference: remote::hf::serverless
+    memory: meta-reference
+    safety: meta-reference
+    agents: meta-reference
+    telemetry: meta-reference
+image_type: conda
diff --git a/llama_stack/templates/meta-reference-gpu/build.yaml b/llama_stack/templates/meta-reference-gpu/build.yaml
new file mode 100644
index 000000000..08e034154
--- /dev/null
+++ b/llama_stack/templates/meta-reference-gpu/build.yaml
@@ -0,0 +1,14 @@
+name: meta-reference-gpu
+distribution_spec:
+  docker_image: pytorch/pytorch:2.5.0-cuda12.4-cudnn9-runtime
+  description: Use code from `llama_stack` itself to serve all llama stack APIs
+  providers:
+    inference: meta-reference
+    memory:
+    - meta-reference
+    - remote::chromadb
+    - remote::pgvector
+    safety: meta-reference
+    agents: meta-reference
+    telemetry: meta-reference
+image_type: docker
diff --git a/llama_stack/templates/meta-reference-quantized-gpu/build.yaml b/llama_stack/templates/meta-reference-quantized-gpu/build.yaml
new file mode 100644
index 000000000..e9ddb4aad
--- /dev/null
+++ b/llama_stack/templates/meta-reference-quantized-gpu/build.yaml
@@ -0,0 +1,14 @@
+name: meta-reference-quantized-gpu
+distribution_spec:
+  docker_image: pytorch/pytorch:2.5.0-cuda12.4-cudnn9-runtime
+  description: Use code from `llama_stack` itself to serve all llama stack APIs
+  providers:
+    inference: meta-reference-quantized
+    memory:
+    - meta-reference
+    - remote::chromadb
+    - remote::pgvector
+    safety: meta-reference
+    agents: meta-reference
+    telemetry: meta-reference
+image_type: docker
diff --git a/llama_stack/templates/ollama/build.yaml b/llama_stack/templates/ollama/build.yaml
new file mode 100644
index 000000000..c27f40929
--- /dev/null
+++ b/llama_stack/templates/ollama/build.yaml
@@ -0,0 +1,13 @@
+name: ollama
+distribution_spec:
+  description: Use ollama for running LLM inference
+  providers:
+    inference: remote::ollama
+    memory:
+    - meta-reference
+    - remote::chromadb
+    - remote::pgvector
+    safety: meta-reference
+    agents: meta-reference
+    telemetry: meta-reference
+image_type: docker
diff --git a/llama_stack/templates/tgi/build.yaml b/llama_stack/templates/tgi/build.yaml
new file mode 100644
index 000000000..2c0ca1d33
--- /dev/null
+++ b/llama_stack/templates/tgi/build.yaml
@@ -0,0 +1,13 @@
+name: tgi
+distribution_spec:
+  description: Use TGI for running LLM inference
+  providers:
+    inference: remote::tgi
+    memory:
+    - meta-reference
+    - remote::chromadb
+    - remote::pgvector
+    safety: meta-reference
+    agents: meta-reference
+    telemetry: meta-reference
+image_type: docker
diff --git a/llama_stack/templates/together/build.yaml b/llama_stack/templates/together/build.yaml
new file mode 100644
index 000000000..49eab859d
--- /dev/null
+++ b/llama_stack/templates/together/build.yaml
@@ -0,0 +1,10 @@
+name: together
+distribution_spec:
+  description: Use Together.ai for running LLM inference
+  providers:
+    inference: remote::together
+    memory: remote::weaviate
+    safety: remote::together
+    agents: meta-reference
+    telemetry: meta-reference
+image_type: docker
diff --git a/llama_stack/templates/vllm/build.yaml b/llama_stack/templates/vllm/build.yaml
new file mode 100644
index 000000000..814fafd32
--- /dev/null
+++ b/llama_stack/templates/vllm/build.yaml
@@ -0,0 +1,10 @@
+name: vllm
+distribution_spec:
+  description: Like local, but use vLLM for running LLM inference
+  providers:
+    inference: vllm
+    memory: meta-reference
+    safety: meta-reference
+    agents: meta-reference
+    telemetry: meta-reference
+image_type: conda