From d1f3b032c9ee9e88dabe7b7125b3e3ffb7aa36a7 Mon Sep 17 00:00:00 2001
From: Xi Yan <xiyan@meta.com>
Date: Thu, 16 Jan 2025 16:07:53 -0800
Subject: [PATCH] cerebras template update for memory (#792)

# What does this PR do?

- we no longer have meta-reference as memory provider, update cerebras
template


## Test Plan

```
python llama_stack/scripts/distro_codegen.py
```

## Sources

Please link relevant resources if necessary.


## Before submitting

- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Ran pre-commit to handle lint / formatting issues.
- [ ] Read the [contributor
guideline](https://github.com/meta-llama/llama-stack/blob/main/CONTRIBUTING.md),
      Pull Request section?
- [ ] Updated relevant documentation.
- [ ] Wrote necessary unit or integration tests.
---
 distributions/dependencies.json                       |  1 +
 .../distributions/self_hosted_distro/cerebras.md      |  2 +-
 llama_stack/templates/bedrock/build.yaml              |  1 -
 llama_stack/templates/bedrock/run.yaml                |  1 -
 llama_stack/templates/cerebras/build.yaml             |  5 +++--
 llama_stack/templates/cerebras/cerebras.py            |  2 +-
 llama_stack/templates/cerebras/run.yaml               | 11 ++++++++---
 llama_stack/templates/fireworks/build.yaml            |  1 -
 llama_stack/templates/fireworks/run-with-safety.yaml  |  1 -
 llama_stack/templates/fireworks/run.yaml              |  1 -
 llama_stack/templates/hf-endpoint/build.yaml          |  1 -
 .../templates/hf-endpoint/run-with-safety.yaml        |  1 -
 llama_stack/templates/hf-endpoint/run.yaml            |  1 -
 llama_stack/templates/hf-serverless/build.yaml        |  1 -
 .../templates/hf-serverless/run-with-safety.yaml      |  1 -
 llama_stack/templates/hf-serverless/run.yaml          |  1 -
 llama_stack/templates/meta-reference-gpu/build.yaml   |  1 -
 .../templates/meta-reference-gpu/run-with-safety.yaml |  1 -
 llama_stack/templates/meta-reference-gpu/run.yaml     |  1 -
 .../templates/meta-reference-quantized-gpu/build.yaml |  1 -
 .../templates/meta-reference-quantized-gpu/run.yaml   |  1 -
 llama_stack/templates/nvidia/build.yaml               |  1 -
 llama_stack/templates/nvidia/run.yaml                 |  1 -
 llama_stack/templates/ollama/build.yaml               |  1 -
 llama_stack/templates/ollama/run-with-safety.yaml     |  1 -
 llama_stack/templates/ollama/run.yaml                 |  1 -
 llama_stack/templates/remote-vllm/build.yaml          |  1 -
 .../templates/remote-vllm/run-with-safety.yaml        |  1 -
 llama_stack/templates/remote-vllm/run.yaml            |  1 -
 llama_stack/templates/tgi/build.yaml                  |  1 -
 llama_stack/templates/tgi/run-with-safety.yaml        |  1 -
 llama_stack/templates/tgi/run.yaml                    |  1 -
 llama_stack/templates/together/build.yaml             |  1 -
 llama_stack/templates/together/run-with-safety.yaml   |  1 -
 llama_stack/templates/together/run.yaml               |  1 -
 llama_stack/templates/vllm-gpu/build.yaml             |  1 -
 llama_stack/templates/vllm-gpu/run.yaml               |  1 -
 37 files changed, 14 insertions(+), 39 deletions(-)

diff --git a/distributions/dependencies.json b/distributions/dependencies.json
index ab3a367f1..d6d60ef7c 100644
--- a/distributions/dependencies.json
+++ b/distributions/dependencies.json
@@ -346,6 +346,7 @@
     "blobfile",
     "cerebras_cloud_sdk",
     "chardet",
+    "chromadb-client",
     "datasets",
     "faiss-cpu",
     "fastapi",
diff --git a/docs/source/distributions/self_hosted_distro/cerebras.md b/docs/source/distributions/self_hosted_distro/cerebras.md
index 302d121dd..22e4125bd 100644
--- a/docs/source/distributions/self_hosted_distro/cerebras.md
+++ b/docs/source/distributions/self_hosted_distro/cerebras.md
@@ -8,7 +8,7 @@ The `llamastack/distribution-cerebras` distribution consists of the following pr
 | datasetio | `remote::huggingface`, `inline::localfs` |
 | eval | `inline::meta-reference` |
 | inference | `remote::cerebras` |
-| memory | `inline::meta-reference` |
+| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
 | safety | `inline::llama-guard` |
 | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
 | telemetry | `inline::meta-reference` |
diff --git a/llama_stack/templates/bedrock/build.yaml b/llama_stack/templates/bedrock/build.yaml
index a68a8f6fc..794e54306 100644
--- a/llama_stack/templates/bedrock/build.yaml
+++ b/llama_stack/templates/bedrock/build.yaml
@@ -1,5 +1,4 @@
 version: '2'
-name: bedrock
 distribution_spec:
   description: Use AWS Bedrock for running LLM inference and safety
   providers:
diff --git a/llama_stack/templates/bedrock/run.yaml b/llama_stack/templates/bedrock/run.yaml
index 1d0721773..3a6922ae7 100644
--- a/llama_stack/templates/bedrock/run.yaml
+++ b/llama_stack/templates/bedrock/run.yaml
@@ -1,6 +1,5 @@
 version: '2'
 image_name: bedrock
-conda_env: bedrock
 apis:
 - agents
 - datasetio
diff --git a/llama_stack/templates/cerebras/build.yaml b/llama_stack/templates/cerebras/build.yaml
index 0fe568d09..9f187d3c7 100644
--- a/llama_stack/templates/cerebras/build.yaml
+++ b/llama_stack/templates/cerebras/build.yaml
@@ -1,5 +1,4 @@
 version: '2'
-name: cerebras
 distribution_spec:
   description: Use Cerebras for running LLM inference
   providers:
@@ -8,7 +7,9 @@ distribution_spec:
     safety:
     - inline::llama-guard
     memory:
-    - inline::meta-reference
+    - inline::faiss
+    - remote::chromadb
+    - remote::pgvector
     agents:
     - inline::meta-reference
     eval:
diff --git a/llama_stack/templates/cerebras/cerebras.py b/llama_stack/templates/cerebras/cerebras.py
index 6571170dd..17fc26632 100644
--- a/llama_stack/templates/cerebras/cerebras.py
+++ b/llama_stack/templates/cerebras/cerebras.py
@@ -27,7 +27,7 @@ def get_distribution_template() -> DistributionTemplate:
     providers = {
         "inference": ["remote::cerebras"],
         "safety": ["inline::llama-guard"],
-        "memory": ["inline::meta-reference"],
+        "memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
         "agents": ["inline::meta-reference"],
         "eval": ["inline::meta-reference"],
         "datasetio": ["remote::huggingface", "inline::localfs"],
diff --git a/llama_stack/templates/cerebras/run.yaml b/llama_stack/templates/cerebras/run.yaml
index 42146ad4b..e0beab9cc 100644
--- a/llama_stack/templates/cerebras/run.yaml
+++ b/llama_stack/templates/cerebras/run.yaml
@@ -1,6 +1,5 @@
 version: '2'
 image_name: cerebras
-conda_env: cerebras
 apis:
 - agents
 - datasetio
@@ -26,13 +25,19 @@ providers:
     provider_type: inline::llama-guard
     config: {}
   memory:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
+  - provider_id: faiss
+    provider_type: inline::faiss
     config:
       kvstore:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/faiss_store.db
+  - provider_id: chromadb
+    provider_type: remote::chromadb
+    config: {}
+  - provider_id: pgvector
+    provider_type: remote::pgvector
+    config: {}
   agents:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
diff --git a/llama_stack/templates/fireworks/build.yaml b/llama_stack/templates/fireworks/build.yaml
index e76cc86f1..504c913bd 100644
--- a/llama_stack/templates/fireworks/build.yaml
+++ b/llama_stack/templates/fireworks/build.yaml
@@ -1,5 +1,4 @@
 version: '2'
-name: fireworks
 distribution_spec:
   description: Use Fireworks.AI for running LLM inference
   providers:
diff --git a/llama_stack/templates/fireworks/run-with-safety.yaml b/llama_stack/templates/fireworks/run-with-safety.yaml
index a279ab820..8fefbd98a 100644
--- a/llama_stack/templates/fireworks/run-with-safety.yaml
+++ b/llama_stack/templates/fireworks/run-with-safety.yaml
@@ -1,6 +1,5 @@
 version: '2'
 image_name: fireworks
-conda_env: fireworks
 apis:
 - agents
 - datasetio
diff --git a/llama_stack/templates/fireworks/run.yaml b/llama_stack/templates/fireworks/run.yaml
index 79fafe66c..53128f456 100644
--- a/llama_stack/templates/fireworks/run.yaml
+++ b/llama_stack/templates/fireworks/run.yaml
@@ -1,6 +1,5 @@
 version: '2'
 image_name: fireworks
-conda_env: fireworks
 apis:
 - agents
 - datasetio
diff --git a/llama_stack/templates/hf-endpoint/build.yaml b/llama_stack/templates/hf-endpoint/build.yaml
index c18689855..43486030e 100644
--- a/llama_stack/templates/hf-endpoint/build.yaml
+++ b/llama_stack/templates/hf-endpoint/build.yaml
@@ -1,5 +1,4 @@
 version: '2'
-name: hf-endpoint
 distribution_spec:
   description: Use (an external) Hugging Face Inference Endpoint for running LLM inference
   providers:
diff --git a/llama_stack/templates/hf-endpoint/run-with-safety.yaml b/llama_stack/templates/hf-endpoint/run-with-safety.yaml
index a9d895d23..6a52ca861 100644
--- a/llama_stack/templates/hf-endpoint/run-with-safety.yaml
+++ b/llama_stack/templates/hf-endpoint/run-with-safety.yaml
@@ -1,6 +1,5 @@
 version: '2'
 image_name: hf-endpoint
-conda_env: hf-endpoint
 apis:
 - agents
 - datasetio
diff --git a/llama_stack/templates/hf-endpoint/run.yaml b/llama_stack/templates/hf-endpoint/run.yaml
index e9b58c962..c019c587a 100644
--- a/llama_stack/templates/hf-endpoint/run.yaml
+++ b/llama_stack/templates/hf-endpoint/run.yaml
@@ -1,6 +1,5 @@
 version: '2'
 image_name: hf-endpoint
-conda_env: hf-endpoint
 apis:
 - agents
 - datasetio
diff --git a/llama_stack/templates/hf-serverless/build.yaml b/llama_stack/templates/hf-serverless/build.yaml
index a6b551e4a..e1328bd58 100644
--- a/llama_stack/templates/hf-serverless/build.yaml
+++ b/llama_stack/templates/hf-serverless/build.yaml
@@ -1,5 +1,4 @@
 version: '2'
-name: hf-serverless
 distribution_spec:
   description: Use (an external) Hugging Face Inference Endpoint for running LLM inference
   providers:
diff --git a/llama_stack/templates/hf-serverless/run-with-safety.yaml b/llama_stack/templates/hf-serverless/run-with-safety.yaml
index 415cec648..0a64de358 100644
--- a/llama_stack/templates/hf-serverless/run-with-safety.yaml
+++ b/llama_stack/templates/hf-serverless/run-with-safety.yaml
@@ -1,6 +1,5 @@
 version: '2'
 image_name: hf-serverless
-conda_env: hf-serverless
 apis:
 - agents
 - datasetio
diff --git a/llama_stack/templates/hf-serverless/run.yaml b/llama_stack/templates/hf-serverless/run.yaml
index ef9dedeed..f91e45fb6 100644
--- a/llama_stack/templates/hf-serverless/run.yaml
+++ b/llama_stack/templates/hf-serverless/run.yaml
@@ -1,6 +1,5 @@
 version: '2'
 image_name: hf-serverless
-conda_env: hf-serverless
 apis:
 - agents
 - datasetio
diff --git a/llama_stack/templates/meta-reference-gpu/build.yaml b/llama_stack/templates/meta-reference-gpu/build.yaml
index ba8413fa6..9ad7b26bf 100644
--- a/llama_stack/templates/meta-reference-gpu/build.yaml
+++ b/llama_stack/templates/meta-reference-gpu/build.yaml
@@ -1,5 +1,4 @@
 version: '2'
-name: meta-reference-gpu
 distribution_spec:
   description: Use Meta Reference for running LLM inference
   providers:
diff --git a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
index 4946fdab7..591afa2be 100644
--- a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
+++ b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
@@ -1,6 +1,5 @@
 version: '2'
 image_name: meta-reference-gpu
-conda_env: meta-reference-gpu
 apis:
 - agents
 - datasetio
diff --git a/llama_stack/templates/meta-reference-gpu/run.yaml b/llama_stack/templates/meta-reference-gpu/run.yaml
index 52345f3c1..cc22a514b 100644
--- a/llama_stack/templates/meta-reference-gpu/run.yaml
+++ b/llama_stack/templates/meta-reference-gpu/run.yaml
@@ -1,6 +1,5 @@
 version: '2'
 image_name: meta-reference-gpu
-conda_env: meta-reference-gpu
 apis:
 - agents
 - datasetio
diff --git a/llama_stack/templates/meta-reference-quantized-gpu/build.yaml b/llama_stack/templates/meta-reference-quantized-gpu/build.yaml
index 41ab44e38..e6b64ea1e 100644
--- a/llama_stack/templates/meta-reference-quantized-gpu/build.yaml
+++ b/llama_stack/templates/meta-reference-quantized-gpu/build.yaml
@@ -1,5 +1,4 @@
 version: '2'
-name: meta-reference-quantized-gpu
 distribution_spec:
   description: Use Meta Reference with fp8, int4 quantization for running LLM inference
   providers:
diff --git a/llama_stack/templates/meta-reference-quantized-gpu/run.yaml b/llama_stack/templates/meta-reference-quantized-gpu/run.yaml
index 02a5bacaa..ff0affafb 100644
--- a/llama_stack/templates/meta-reference-quantized-gpu/run.yaml
+++ b/llama_stack/templates/meta-reference-quantized-gpu/run.yaml
@@ -1,6 +1,5 @@
 version: '2'
 image_name: meta-reference-quantized-gpu
-conda_env: meta-reference-quantized-gpu
 apis:
 - agents
 - datasetio
diff --git a/llama_stack/templates/nvidia/build.yaml b/llama_stack/templates/nvidia/build.yaml
index 813502ada..56124552b 100644
--- a/llama_stack/templates/nvidia/build.yaml
+++ b/llama_stack/templates/nvidia/build.yaml
@@ -1,5 +1,4 @@
 version: '2'
-name: nvidia
 distribution_spec:
   description: Use NVIDIA NIM for running LLM inference
   providers:
diff --git a/llama_stack/templates/nvidia/run.yaml b/llama_stack/templates/nvidia/run.yaml
index d07eb25eb..1887a55d0 100644
--- a/llama_stack/templates/nvidia/run.yaml
+++ b/llama_stack/templates/nvidia/run.yaml
@@ -1,6 +1,5 @@
 version: '2'
 image_name: nvidia
-conda_env: nvidia
 apis:
 - agents
 - datasetio
diff --git a/llama_stack/templates/ollama/build.yaml b/llama_stack/templates/ollama/build.yaml
index cbd9101cf..5f2e010ee 100644
--- a/llama_stack/templates/ollama/build.yaml
+++ b/llama_stack/templates/ollama/build.yaml
@@ -1,5 +1,4 @@
 version: '2'
-name: ollama
 distribution_spec:
   description: Use (an external) Ollama server for running LLM inference
   providers:
diff --git a/llama_stack/templates/ollama/run-with-safety.yaml b/llama_stack/templates/ollama/run-with-safety.yaml
index 0792beddd..a808590c3 100644
--- a/llama_stack/templates/ollama/run-with-safety.yaml
+++ b/llama_stack/templates/ollama/run-with-safety.yaml
@@ -1,6 +1,5 @@
 version: '2'
 image_name: ollama
-conda_env: ollama
 apis:
 - agents
 - datasetio
diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml
index 176465299..aa7b54a87 100644
--- a/llama_stack/templates/ollama/run.yaml
+++ b/llama_stack/templates/ollama/run.yaml
@@ -1,6 +1,5 @@
 version: '2'
 image_name: ollama
-conda_env: ollama
 apis:
 - agents
 - datasetio
diff --git a/llama_stack/templates/remote-vllm/build.yaml b/llama_stack/templates/remote-vllm/build.yaml
index 246e53db0..2659c8190 100644
--- a/llama_stack/templates/remote-vllm/build.yaml
+++ b/llama_stack/templates/remote-vllm/build.yaml
@@ -1,5 +1,4 @@
 version: '2'
-name: remote-vllm
 distribution_spec:
   description: Use (an external) vLLM server for running LLM inference
   providers:
diff --git a/llama_stack/templates/remote-vllm/run-with-safety.yaml b/llama_stack/templates/remote-vllm/run-with-safety.yaml
index 1babd04ac..4bf73bbda 100644
--- a/llama_stack/templates/remote-vllm/run-with-safety.yaml
+++ b/llama_stack/templates/remote-vllm/run-with-safety.yaml
@@ -1,6 +1,5 @@
 version: '2'
 image_name: remote-vllm
-conda_env: remote-vllm
 apis:
 - agents
 - inference
diff --git a/llama_stack/templates/remote-vllm/run.yaml b/llama_stack/templates/remote-vllm/run.yaml
index a3a571423..1743793a8 100644
--- a/llama_stack/templates/remote-vllm/run.yaml
+++ b/llama_stack/templates/remote-vllm/run.yaml
@@ -1,6 +1,5 @@
 version: '2'
 image_name: remote-vllm
-conda_env: remote-vllm
 apis:
 - agents
 - inference
diff --git a/llama_stack/templates/tgi/build.yaml b/llama_stack/templates/tgi/build.yaml
index 399d4a616..3bcacffb0 100644
--- a/llama_stack/templates/tgi/build.yaml
+++ b/llama_stack/templates/tgi/build.yaml
@@ -1,5 +1,4 @@
 version: '2'
-name: tgi
 distribution_spec:
   description: Use (an external) TGI server for running LLM inference
   providers:
diff --git a/llama_stack/templates/tgi/run-with-safety.yaml b/llama_stack/templates/tgi/run-with-safety.yaml
index 4134101f6..070daedc1 100644
--- a/llama_stack/templates/tgi/run-with-safety.yaml
+++ b/llama_stack/templates/tgi/run-with-safety.yaml
@@ -1,6 +1,5 @@
 version: '2'
 image_name: tgi
-conda_env: tgi
 apis:
 - agents
 - datasetio
diff --git a/llama_stack/templates/tgi/run.yaml b/llama_stack/templates/tgi/run.yaml
index b0b78e33b..9cfba37aa 100644
--- a/llama_stack/templates/tgi/run.yaml
+++ b/llama_stack/templates/tgi/run.yaml
@@ -1,6 +1,5 @@
 version: '2'
 image_name: tgi
-conda_env: tgi
 apis:
 - agents
 - datasetio
diff --git a/llama_stack/templates/together/build.yaml b/llama_stack/templates/together/build.yaml
index 96f9f758e..ad970f405 100644
--- a/llama_stack/templates/together/build.yaml
+++ b/llama_stack/templates/together/build.yaml
@@ -1,5 +1,4 @@
 version: '2'
-name: together
 distribution_spec:
   description: Use Together.AI for running LLM inference
   providers:
diff --git a/llama_stack/templates/together/run-with-safety.yaml b/llama_stack/templates/together/run-with-safety.yaml
index c415b0ec0..4e162aab3 100644
--- a/llama_stack/templates/together/run-with-safety.yaml
+++ b/llama_stack/templates/together/run-with-safety.yaml
@@ -1,6 +1,5 @@
 version: '2'
 image_name: together
-conda_env: together
 apis:
 - agents
 - datasetio
diff --git a/llama_stack/templates/together/run.yaml b/llama_stack/templates/together/run.yaml
index ed65ded57..3c4844447 100644
--- a/llama_stack/templates/together/run.yaml
+++ b/llama_stack/templates/together/run.yaml
@@ -1,6 +1,5 @@
 version: '2'
 image_name: together
-conda_env: together
 apis:
 - agents
 - datasetio
diff --git a/llama_stack/templates/vllm-gpu/build.yaml b/llama_stack/templates/vllm-gpu/build.yaml
index 959f91d3e..e068fa97e 100644
--- a/llama_stack/templates/vllm-gpu/build.yaml
+++ b/llama_stack/templates/vllm-gpu/build.yaml
@@ -1,5 +1,4 @@
 version: '2'
-name: vllm-gpu
 distribution_spec:
   description: Use a built-in vLLM engine for running LLM inference
   providers:
diff --git a/llama_stack/templates/vllm-gpu/run.yaml b/llama_stack/templates/vllm-gpu/run.yaml
index 48ec57cfb..1cb44b052 100644
--- a/llama_stack/templates/vllm-gpu/run.yaml
+++ b/llama_stack/templates/vllm-gpu/run.yaml
@@ -1,6 +1,5 @@
 version: '2'
 image_name: vllm-gpu
-conda_env: vllm-gpu
 apis:
 - agents
 - datasetio