From 5eb53a7b779c52ff25c0047bc22288e05b4434bb Mon Sep 17 00:00:00 2001
From: Xi Yan <xiyan@meta.com>
Date: Wed, 16 Oct 2024 19:14:08 -0700
Subject: [PATCH] wip

---
 .../local-bedrock-conda-example-build.yaml    |  0
 .../local-cpu-docker-build.yaml}              |  0
 .../local-databricks-build.yaml               |  0
 .../local-fireworks-build.yaml                |  0
 .../local-gpu-docker-build.yaml}              |  4 +-
 .../local-hf-endpoint-build.yaml              |  0
 .../local-hf-serverless-build.yaml            |  0
 .../local-ollama-build.yaml                   |  0
 .../{ => build_configs}/local-tgi-build.yaml  |  0
 .../local-tgi-chroma-docker.yaml}             |  8 +--
 .../local-together-build.yaml                 |  0
 .../{ => build_configs}/local-vllm-build.yaml |  0
 .../docker/llamastack-local-cpu/run.yaml      | 42 ----------------
 .../run.yaml => run_configs/local-run.yaml}   | 27 ++++++----
 .../templates/run_configs/local-tgi-run.yaml  | 50 +++++++++++++++++++
 15 files changed, 72 insertions(+), 59 deletions(-)
 rename llama_stack/distribution/templates/{ => build_configs}/local-bedrock-conda-example-build.yaml (100%)
 rename llama_stack/distribution/templates/{docker/llamastack-local-cpu/build.yaml => build_configs/local-cpu-docker-build.yaml} (100%)
 rename llama_stack/distribution/templates/{ => build_configs}/local-databricks-build.yaml (100%)
 rename llama_stack/distribution/templates/{ => build_configs}/local-fireworks-build.yaml (100%)
 rename llama_stack/distribution/templates/{local-build.yaml => build_configs/local-gpu-docker-build.yaml} (87%)
 rename llama_stack/distribution/templates/{ => build_configs}/local-hf-endpoint-build.yaml (100%)
 rename llama_stack/distribution/templates/{ => build_configs}/local-hf-serverless-build.yaml (100%)
 rename llama_stack/distribution/templates/{ => build_configs}/local-ollama-build.yaml (100%)
 rename llama_stack/distribution/templates/{ => build_configs}/local-tgi-build.yaml (100%)
 rename llama_stack/distribution/templates/{docker/llamastack-local-gpu/build.yaml => build_configs/local-tgi-chroma-docker.yaml} (53%)
 rename llama_stack/distribution/templates/{ => build_configs}/local-together-build.yaml (100%)
 rename llama_stack/distribution/templates/{ => build_configs}/local-vllm-build.yaml (100%)
 delete mode 100644 llama_stack/distribution/templates/docker/llamastack-local-cpu/run.yaml
 rename llama_stack/distribution/templates/{docker/llamastack-local-gpu/run.yaml => run_configs/local-run.yaml} (67%)
 create mode 100644 llama_stack/distribution/templates/run_configs/local-tgi-run.yaml

diff --git a/llama_stack/distribution/templates/local-bedrock-conda-example-build.yaml b/llama_stack/distribution/templates/build_configs/local-bedrock-conda-example-build.yaml
similarity index 100%
rename from llama_stack/distribution/templates/local-bedrock-conda-example-build.yaml
rename to llama_stack/distribution/templates/build_configs/local-bedrock-conda-example-build.yaml
diff --git a/llama_stack/distribution/templates/docker/llamastack-local-cpu/build.yaml b/llama_stack/distribution/templates/build_configs/local-cpu-docker-build.yaml
similarity index 100%
rename from llama_stack/distribution/templates/docker/llamastack-local-cpu/build.yaml
rename to llama_stack/distribution/templates/build_configs/local-cpu-docker-build.yaml
diff --git a/llama_stack/distribution/templates/local-databricks-build.yaml b/llama_stack/distribution/templates/build_configs/local-databricks-build.yaml
similarity index 100%
rename from llama_stack/distribution/templates/local-databricks-build.yaml
rename to llama_stack/distribution/templates/build_configs/local-databricks-build.yaml
diff --git a/llama_stack/distribution/templates/local-fireworks-build.yaml b/llama_stack/distribution/templates/build_configs/local-fireworks-build.yaml
similarity index 100%
rename from llama_stack/distribution/templates/local-fireworks-build.yaml
rename to llama_stack/distribution/templates/build_configs/local-fireworks-build.yaml
diff --git a/llama_stack/distribution/templates/local-build.yaml b/llama_stack/distribution/templates/build_configs/local-gpu-docker-build.yaml
similarity index 87%
rename from llama_stack/distribution/templates/local-build.yaml
rename to llama_stack/distribution/templates/build_configs/local-gpu-docker-build.yaml
index f10461256..01af1021e 100644
--- a/llama_stack/distribution/templates/local-build.yaml
+++ b/llama_stack/distribution/templates/build_configs/local-gpu-docker-build.yaml
@@ -1,4 +1,4 @@
-name: local
+name: local-gpu
 distribution_spec:
   description: Use code from `llama_stack` itself to serve all llama stack APIs
   providers:
@@ -7,4 +7,4 @@ distribution_spec:
     safety: meta-reference
     agents: meta-reference
     telemetry: meta-reference
-image_type: conda
+image_type: docker
diff --git a/llama_stack/distribution/templates/local-hf-endpoint-build.yaml b/llama_stack/distribution/templates/build_configs/local-hf-endpoint-build.yaml
similarity index 100%
rename from llama_stack/distribution/templates/local-hf-endpoint-build.yaml
rename to llama_stack/distribution/templates/build_configs/local-hf-endpoint-build.yaml
diff --git a/llama_stack/distribution/templates/local-hf-serverless-build.yaml b/llama_stack/distribution/templates/build_configs/local-hf-serverless-build.yaml
similarity index 100%
rename from llama_stack/distribution/templates/local-hf-serverless-build.yaml
rename to llama_stack/distribution/templates/build_configs/local-hf-serverless-build.yaml
diff --git a/llama_stack/distribution/templates/local-ollama-build.yaml b/llama_stack/distribution/templates/build_configs/local-ollama-build.yaml
similarity index 100%
rename from llama_stack/distribution/templates/local-ollama-build.yaml
rename to llama_stack/distribution/templates/build_configs/local-ollama-build.yaml
diff --git a/llama_stack/distribution/templates/local-tgi-build.yaml b/llama_stack/distribution/templates/build_configs/local-tgi-build.yaml
similarity index 100%
rename from llama_stack/distribution/templates/local-tgi-build.yaml
rename to llama_stack/distribution/templates/build_configs/local-tgi-build.yaml
diff --git a/llama_stack/distribution/templates/docker/llamastack-local-gpu/build.yaml b/llama_stack/distribution/templates/build_configs/local-tgi-chroma-docker.yaml
similarity index 53%
rename from llama_stack/distribution/templates/docker/llamastack-local-gpu/build.yaml
rename to llama_stack/distribution/templates/build_configs/local-tgi-chroma-docker.yaml
index 11d1ac01c..30715c551 100644
--- a/llama_stack/distribution/templates/docker/llamastack-local-gpu/build.yaml
+++ b/llama_stack/distribution/templates/build_configs/local-tgi-chroma-docker.yaml
@@ -1,11 +1,11 @@
-name: local-gpu
+name: local-tgi-chroma
 distribution_spec:
-  description: local meta reference
+  description: remote tgi inference + chromadb memory
   docker_image: null
   providers:
-    inference: meta-reference
+    inference: remote::tgi
     safety: meta-reference
     agents: meta-reference
-    memory: meta-reference
+    memory: remote::chromadb
     telemetry: meta-reference
 image_type: docker
diff --git a/llama_stack/distribution/templates/local-together-build.yaml b/llama_stack/distribution/templates/build_configs/local-together-build.yaml
similarity index 100%
rename from llama_stack/distribution/templates/local-together-build.yaml
rename to llama_stack/distribution/templates/build_configs/local-together-build.yaml
diff --git a/llama_stack/distribution/templates/local-vllm-build.yaml b/llama_stack/distribution/templates/build_configs/local-vllm-build.yaml
similarity index 100%
rename from llama_stack/distribution/templates/local-vllm-build.yaml
rename to llama_stack/distribution/templates/build_configs/local-vllm-build.yaml
diff --git a/llama_stack/distribution/templates/docker/llamastack-local-cpu/run.yaml b/llama_stack/distribution/templates/docker/llamastack-local-cpu/run.yaml
deleted file mode 100644
index 6b107d972..000000000
--- a/llama_stack/distribution/templates/docker/llamastack-local-cpu/run.yaml
+++ /dev/null
@@ -1,42 +0,0 @@
-version: '2'
-built_at: '2024-10-08T17:42:07.505267'
-image_name: local-cpu
-docker_image: local-cpu
-conda_env: null
-apis:
-- agents
-- inference
-- models
-- memory
-- safety
-- shields
-- memory_banks
-providers:
-  inference:
-  - provider_id: remote::ollama
-    provider_type: remote::ollama
-    config:
-      host: localhost
-      port: 6000
-  safety:
-  - provider_id: meta-reference
-    provider_type: meta-reference
-    config:
-      llama_guard_shield: null
-      prompt_guard_shield: null
-  memory:
-  - provider_id: meta-reference
-    provider_type: meta-reference
-    config: {}
-  agents:
-  - provider_id: meta-reference
-    provider_type: meta-reference
-    config:
-      persistence_store:
-        namespace: null
-        type: sqlite
-        db_path: ~/.llama/runtime/kvstore.db
-  telemetry:
-  - provider_id: meta-reference
-    provider_type: meta-reference
-    config: {}
diff --git a/llama_stack/distribution/templates/docker/llamastack-local-gpu/run.yaml b/llama_stack/distribution/templates/run_configs/local-run.yaml
similarity index 67%
rename from llama_stack/distribution/templates/docker/llamastack-local-gpu/run.yaml
rename to llama_stack/distribution/templates/run_configs/local-run.yaml
index 8fb02711b..e12f6e852 100644
--- a/llama_stack/distribution/templates/docker/llamastack-local-gpu/run.yaml
+++ b/llama_stack/distribution/templates/run_configs/local-run.yaml
@@ -1,16 +1,16 @@
 version: '2'
-built_at: '2024-10-08T17:42:33.690666'
-image_name: local-gpu
-docker_image: local-gpu
-conda_env: null
+built_at: '2024-10-08T17:40:45.325529'
+image_name: local
+docker_image: null
+conda_env: local
 apis:
-- memory
-- inference
-- agents
 - shields
-- safety
+- agents
 - models
+- memory
 - memory_banks
+- inference
+- safety
 providers:
   inference:
   - provider_id: meta-reference
@@ -25,8 +25,13 @@ providers:
   - provider_id: meta-reference
     provider_type: meta-reference
     config:
-      llama_guard_shield: null
-      prompt_guard_shield: null
+      llama_guard_shield:
+        model: Llama-Guard-3-1B
+        excluded_categories: []
+        disable_input_check: false
+        disable_output_check: false
+      prompt_guard_shield:
+        model: Prompt-Guard-86M
   memory:
   - provider_id: meta-reference
     provider_type: meta-reference
@@ -38,7 +43,7 @@ providers:
       persistence_store:
         namespace: null
         type: sqlite
-        db_path: ~/.llama/runtime/kvstore.db
+        db_path: /home/xiyan/.llama/runtime/kvstore.db
   telemetry:
   - provider_id: meta-reference
     provider_type: meta-reference
diff --git a/llama_stack/distribution/templates/run_configs/local-tgi-run.yaml b/llama_stack/distribution/templates/run_configs/local-tgi-run.yaml
new file mode 100644
index 000000000..e12f6e852
--- /dev/null
+++ b/llama_stack/distribution/templates/run_configs/local-tgi-run.yaml
@@ -0,0 +1,50 @@
+version: '2'
+built_at: '2024-10-08T17:40:45.325529'
+image_name: local
+docker_image: null
+conda_env: local
+apis:
+- shields
+- agents
+- models
+- memory
+- memory_banks
+- inference
+- safety
+providers:
+  inference:
+  - provider_id: meta-reference
+    provider_type: meta-reference
+    config:
+      model: Llama3.1-8B-Instruct
+      quantization: null
+      torch_seed: null
+      max_seq_len: 4096
+      max_batch_size: 1
+  safety:
+  - provider_id: meta-reference
+    provider_type: meta-reference
+    config:
+      llama_guard_shield:
+        model: Llama-Guard-3-1B
+        excluded_categories: []
+        disable_input_check: false
+        disable_output_check: false
+      prompt_guard_shield:
+        model: Prompt-Guard-86M
+  memory:
+  - provider_id: meta-reference
+    provider_type: meta-reference
+    config: {}
+  agents:
+  - provider_id: meta-reference
+    provider_type: meta-reference
+    config:
+      persistence_store:
+        namespace: null
+        type: sqlite
+        db_path: /home/xiyan/.llama/runtime/kvstore.db
+  telemetry:
+  - provider_id: meta-reference
+    provider_type: meta-reference
+    config: {}