cerebras template update for memory (#792)

# What does this PR do? - we no longer have meta-reference as memory provider, update cerebras template ## Test Plan ``` python llama_stack/scripts/distro_codegen.py ``` ## Sources Please link relevant resources if necessary. ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [ ] Ran pre-commit to handle lint / formatting issues. - [ ] Read the [contributor guideline](https://github.com/meta-llama/llama-stack/blob/main/CONTRIBUTING.md), Pull Request section? - [ ] Updated relevant documentation. - [ ] Wrote necessary unit or integration tests.
2025-01-16 16:07:53 -08:00 · 2025-01-16 16:07:53 -08:00 · d1f3b032c9
commit d1f3b032c9
parent 48b12b9777
37 changed files with 14 additions and 39 deletions
--- a/distributions/dependencies.json
+++ b/distributions/dependencies.json
@ -346,6 +346,7 @@
    "blobfile",
    "cerebras_cloud_sdk",
    "chardet",
    "chromadb-client",
    "datasets",
    "faiss-cpu",
    "fastapi",
--- a/docs/source/distributions/self_hosted_distro/cerebras.md
+++ b/docs/source/distributions/self_hosted_distro/cerebras.md
@ -8,7 +8,7 @@ The `llamastack/distribution-cerebras` distribution consists of the following pr
 | datasetio | `remote::huggingface`, `inline::localfs` |
 | eval | `inline::meta-reference` |
 | inference | `remote::cerebras` |
-| memory | `inline::meta-reference` |
+| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
 | safety | `inline::llama-guard` |
 | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
 | telemetry | `inline::meta-reference` |
--- a/llama_stack/templates/bedrock/build.yaml
+++ b/llama_stack/templates/bedrock/build.yaml
@ -1,5 +1,4 @@
 version: '2'
 name: bedrock
 distribution_spec:
  description: Use AWS Bedrock for running LLM inference and safety
  providers:
--- a/llama_stack/templates/bedrock/run.yaml
+++ b/llama_stack/templates/bedrock/run.yaml
@ -1,6 +1,5 @@
 version: '2'
 image_name: bedrock
 conda_env: bedrock
 apis:
 - agents
 - datasetio
--- a/llama_stack/templates/cerebras/build.yaml
+++ b/llama_stack/templates/cerebras/build.yaml
@ -1,5 +1,4 @@
 version: '2'
 name: cerebras
 distribution_spec:
  description: Use Cerebras for running LLM inference
  providers:
@ -8,7 +7,9 @@ distribution_spec:
    safety:
    - inline::llama-guard
    memory:
-    - inline::meta-reference
+    - inline::faiss
    - remote::chromadb
    - remote::pgvector
    agents:
    - inline::meta-reference
    eval:
--- a/llama_stack/templates/cerebras/cerebras.py
+++ b/llama_stack/templates/cerebras/cerebras.py
@ -27,7 +27,7 @@ def get_distribution_template() -> DistributionTemplate:
    providers = {
        "inference": ["remote::cerebras"],
        "safety": ["inline::llama-guard"],
-        "memory": ["inline::meta-reference"],
+        "memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
        "agents": ["inline::meta-reference"],
        "eval": ["inline::meta-reference"],
        "datasetio": ["remote::huggingface", "inline::localfs"],
--- a/llama_stack/templates/cerebras/run.yaml
+++ b/llama_stack/templates/cerebras/run.yaml
@ -1,6 +1,5 @@
 version: '2'
 image_name: cerebras
 conda_env: cerebras
 apis:
 - agents
 - datasetio
@ -26,13 +25,19 @@ providers:
    provider_type: inline::llama-guard
    config: {}
  memory:
-  - provider_id: meta-reference
+  - provider_id: faiss
-    provider_type: inline::meta-reference
+    provider_type: inline::faiss
    config:
      kvstore:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/faiss_store.db
  - provider_id: chromadb
    provider_type: remote::chromadb
    config: {}
  - provider_id: pgvector
    provider_type: remote::pgvector
    config: {}
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
--- a/llama_stack/templates/fireworks/build.yaml
+++ b/llama_stack/templates/fireworks/build.yaml
@ -1,5 +1,4 @@
 version: '2'
 name: fireworks
 distribution_spec:
  description: Use Fireworks.AI for running LLM inference
  providers:
--- a/llama_stack/templates/fireworks/run-with-safety.yaml
+++ b/llama_stack/templates/fireworks/run-with-safety.yaml
@ -1,6 +1,5 @@
 version: '2'
 image_name: fireworks
 conda_env: fireworks
 apis:
 - agents
 - datasetio
--- a/llama_stack/templates/fireworks/run.yaml
+++ b/llama_stack/templates/fireworks/run.yaml
@ -1,6 +1,5 @@
 version: '2'
 image_name: fireworks
 conda_env: fireworks
 apis:
 - agents
 - datasetio
--- a/llama_stack/templates/hf-endpoint/build.yaml
+++ b/llama_stack/templates/hf-endpoint/build.yaml
@ -1,5 +1,4 @@
 version: '2'
 name: hf-endpoint
 distribution_spec:
  description: Use (an external) Hugging Face Inference Endpoint for running LLM inference
  providers:
--- a/llama_stack/templates/hf-endpoint/run-with-safety.yaml
+++ b/llama_stack/templates/hf-endpoint/run-with-safety.yaml
@ -1,6 +1,5 @@
 version: '2'
 image_name: hf-endpoint
 conda_env: hf-endpoint
 apis:
 - agents
 - datasetio
--- a/llama_stack/templates/hf-endpoint/run.yaml
+++ b/llama_stack/templates/hf-endpoint/run.yaml
@ -1,6 +1,5 @@
 version: '2'
 image_name: hf-endpoint
 conda_env: hf-endpoint
 apis:
 - agents
 - datasetio
--- a/llama_stack/templates/hf-serverless/build.yaml
+++ b/llama_stack/templates/hf-serverless/build.yaml
@ -1,5 +1,4 @@
 version: '2'
 name: hf-serverless
 distribution_spec:
  description: Use (an external) Hugging Face Inference Endpoint for running LLM inference
  providers:
--- a/llama_stack/templates/hf-serverless/run-with-safety.yaml
+++ b/llama_stack/templates/hf-serverless/run-with-safety.yaml
@ -1,6 +1,5 @@
 version: '2'
 image_name: hf-serverless
 conda_env: hf-serverless
 apis:
 - agents
 - datasetio
--- a/llama_stack/templates/hf-serverless/run.yaml
+++ b/llama_stack/templates/hf-serverless/run.yaml
@ -1,6 +1,5 @@
 version: '2'
 image_name: hf-serverless
 conda_env: hf-serverless
 apis:
 - agents
 - datasetio
--- a/llama_stack/templates/meta-reference-gpu/build.yaml
+++ b/llama_stack/templates/meta-reference-gpu/build.yaml
@ -1,5 +1,4 @@
 version: '2'
 name: meta-reference-gpu
 distribution_spec:
  description: Use Meta Reference for running LLM inference
  providers:
--- a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
+++ b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
@ -1,6 +1,5 @@
 version: '2'
 image_name: meta-reference-gpu
 conda_env: meta-reference-gpu
 apis:
 - agents
 - datasetio
--- a/llama_stack/templates/meta-reference-gpu/run.yaml
+++ b/llama_stack/templates/meta-reference-gpu/run.yaml
@ -1,6 +1,5 @@
 version: '2'
 image_name: meta-reference-gpu
 conda_env: meta-reference-gpu
 apis:
 - agents
 - datasetio
--- a/llama_stack/templates/meta-reference-quantized-gpu/build.yaml
+++ b/llama_stack/templates/meta-reference-quantized-gpu/build.yaml
@ -1,5 +1,4 @@
 version: '2'
 name: meta-reference-quantized-gpu
 distribution_spec:
  description: Use Meta Reference with fp8, int4 quantization for running LLM inference
  providers:
--- a/llama_stack/templates/meta-reference-quantized-gpu/run.yaml
+++ b/llama_stack/templates/meta-reference-quantized-gpu/run.yaml
@ -1,6 +1,5 @@
 version: '2'
 image_name: meta-reference-quantized-gpu
 conda_env: meta-reference-quantized-gpu
 apis:
 - agents
 - datasetio
--- a/llama_stack/templates/nvidia/build.yaml
+++ b/llama_stack/templates/nvidia/build.yaml
@ -1,5 +1,4 @@
 version: '2'
 name: nvidia
 distribution_spec:
  description: Use NVIDIA NIM for running LLM inference
  providers:
--- a/llama_stack/templates/nvidia/run.yaml
+++ b/llama_stack/templates/nvidia/run.yaml
@ -1,6 +1,5 @@
 version: '2'
 image_name: nvidia
 conda_env: nvidia
 apis:
 - agents
 - datasetio
--- a/llama_stack/templates/ollama/build.yaml
+++ b/llama_stack/templates/ollama/build.yaml
@ -1,5 +1,4 @@
 version: '2'
 name: ollama
 distribution_spec:
  description: Use (an external) Ollama server for running LLM inference
  providers:
--- a/llama_stack/templates/ollama/run-with-safety.yaml
+++ b/llama_stack/templates/ollama/run-with-safety.yaml
@ -1,6 +1,5 @@
 version: '2'
 image_name: ollama
 conda_env: ollama
 apis:
 - agents
 - datasetio
--- a/llama_stack/templates/ollama/run.yaml
+++ b/llama_stack/templates/ollama/run.yaml
@ -1,6 +1,5 @@
 version: '2'
 image_name: ollama
 conda_env: ollama
 apis:
 - agents
 - datasetio
--- a/llama_stack/templates/remote-vllm/build.yaml
+++ b/llama_stack/templates/remote-vllm/build.yaml
@ -1,5 +1,4 @@
 version: '2'
 name: remote-vllm
 distribution_spec:
  description: Use (an external) vLLM server for running LLM inference
  providers:
--- a/llama_stack/templates/remote-vllm/run-with-safety.yaml
+++ b/llama_stack/templates/remote-vllm/run-with-safety.yaml
@ -1,6 +1,5 @@
 version: '2'
 image_name: remote-vllm
 conda_env: remote-vllm
 apis:
 - agents
 - inference
--- a/llama_stack/templates/remote-vllm/run.yaml
+++ b/llama_stack/templates/remote-vllm/run.yaml
@ -1,6 +1,5 @@
 version: '2'
 image_name: remote-vllm
 conda_env: remote-vllm
 apis:
 - agents
 - inference
--- a/llama_stack/templates/tgi/build.yaml
+++ b/llama_stack/templates/tgi/build.yaml
@ -1,5 +1,4 @@
 version: '2'
 name: tgi
 distribution_spec:
  description: Use (an external) TGI server for running LLM inference
  providers:
--- a/llama_stack/templates/tgi/run-with-safety.yaml
+++ b/llama_stack/templates/tgi/run-with-safety.yaml
@ -1,6 +1,5 @@
 version: '2'
 image_name: tgi
 conda_env: tgi
 apis:
 - agents
 - datasetio
--- a/llama_stack/templates/tgi/run.yaml
+++ b/llama_stack/templates/tgi/run.yaml
@ -1,6 +1,5 @@
 version: '2'
 image_name: tgi
 conda_env: tgi
 apis:
 - agents
 - datasetio
--- a/llama_stack/templates/together/build.yaml
+++ b/llama_stack/templates/together/build.yaml
@ -1,5 +1,4 @@
 version: '2'
 name: together
 distribution_spec:
  description: Use Together.AI for running LLM inference
  providers:
--- a/llama_stack/templates/together/run-with-safety.yaml
+++ b/llama_stack/templates/together/run-with-safety.yaml
@ -1,6 +1,5 @@
 version: '2'
 image_name: together
 conda_env: together
 apis:
 - agents
 - datasetio
--- a/llama_stack/templates/together/run.yaml
+++ b/llama_stack/templates/together/run.yaml
@ -1,6 +1,5 @@
 version: '2'
 image_name: together
 conda_env: together
 apis:
 - agents
 - datasetio
--- a/llama_stack/templates/vllm-gpu/build.yaml
+++ b/llama_stack/templates/vllm-gpu/build.yaml
@ -1,5 +1,4 @@
 version: '2'
 name: vllm-gpu
 distribution_spec:
  description: Use a built-in vLLM engine for running LLM inference
  providers:
--- a/llama_stack/templates/vllm-gpu/run.yaml
+++ b/llama_stack/templates/vllm-gpu/run.yaml
@ -1,6 +1,5 @@
 version: '2'
 image_name: vllm-gpu
 conda_env: vllm-gpu
 apis:
 - agents
 - datasetio