From 7103892f5401a1332e3dbd3e4e17f64cf31e3779 Mon Sep 17 00:00:00 2001
From: Xi Yan <xiyan@meta.com>
Date: Tue, 3 Dec 2024 20:49:30 -0800
Subject: [PATCH] all distros

---
 llama_stack/templates/bedrock/bedrock.py        | 17 +++++++++++++++--
 llama_stack/templates/bedrock/run.yaml          |  6 ------
 llama_stack/templates/fireworks/fireworks.py    | 11 ++++++++++-
 llama_stack/templates/fireworks/run.yaml        |  6 ------
 .../templates/hf-endpoint/hf_endpoint.py        | 11 +++++++++--
 llama_stack/templates/hf-endpoint/run.yaml      |  6 ------
 .../templates/hf-serverless/hf_serverless.py    | 10 +++++++++-
 llama_stack/templates/hf-serverless/run.yaml    |  6 ------
 .../meta-reference-gpu/meta_reference.py        | 11 +++++++++--
 .../templates/meta-reference-gpu/run.yaml       |  6 ------
 .../meta_reference.py                           | 11 +++++++++--
 .../meta-reference-quantized-gpu/run.yaml       |  6 ------
 llama_stack/templates/remote-vllm/run.yaml      |  6 ------
 llama_stack/templates/remote-vllm/vllm.py       | 11 +++++++++--
 llama_stack/templates/tgi/run.yaml              |  6 ------
 llama_stack/templates/tgi/tgi.py                | 11 +++++++++--
 16 files changed, 79 insertions(+), 62 deletions(-)

diff --git a/llama_stack/templates/bedrock/bedrock.py b/llama_stack/templates/bedrock/bedrock.py
index b7f4ae97d..c52b56612 100644
--- a/llama_stack/templates/bedrock/bedrock.py
+++ b/llama_stack/templates/bedrock/bedrock.py
@@ -6,6 +6,9 @@
 
 from pathlib import Path
 
+from llama_stack.distribution.datatypes import Provider
+
+from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
 
 
@@ -20,9 +23,15 @@ def get_distribution_template() -> DistributionTemplate:
         "datasetio": ["remote::huggingface", "inline::localfs"],
         "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
     }
+    name = "bedrock"
+    memory_provider = Provider(
+        provider_id="faiss",
+        provider_type="inline::faiss",
+        config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
+    )
 
     return DistributionTemplate(
-        name="bedrock",
+        name=name,
         distro_type="self_hosted",
         description="Use AWS Bedrock for running LLM inference and safety",
         docker_image=None,
@@ -30,7 +39,11 @@ def get_distribution_template() -> DistributionTemplate:
         providers=providers,
         default_models=[],
         run_configs={
-            "run.yaml": RunConfigSettings(),
+            "run.yaml": RunConfigSettings(
+                provider_overrides={
+                    "memory": [memory_provider],
+                },
+            ),
         },
         run_config_env_vars={
             "LLAMASTACK_PORT": (
diff --git a/llama_stack/templates/bedrock/run.yaml b/llama_stack/templates/bedrock/run.yaml
index 81f699d56..77d4f2248 100644
--- a/llama_stack/templates/bedrock/run.yaml
+++ b/llama_stack/templates/bedrock/run.yaml
@@ -24,12 +24,6 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/faiss_store.db
-  - provider_id: chromadb
-    provider_type: remote::chromadb
-    config: {}
-  - provider_id: pgvector
-    provider_type: remote::pgvector
-    config: {}
   safety:
   - provider_id: bedrock
     provider_type: remote::bedrock
diff --git a/llama_stack/templates/fireworks/fireworks.py b/llama_stack/templates/fireworks/fireworks.py
index 830698264..64387e4b7 100644
--- a/llama_stack/templates/fireworks/fireworks.py
+++ b/llama_stack/templates/fireworks/fireworks.py
@@ -9,6 +9,7 @@ from pathlib import Path
 from llama_models.sku_list import all_registered_models
 
 from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
+from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
 from llama_stack.providers.remote.inference.fireworks import FireworksImplConfig
 from llama_stack.providers.remote.inference.fireworks.fireworks import MODEL_ALIASES
 
@@ -27,11 +28,18 @@ def get_distribution_template() -> DistributionTemplate:
         "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
     }
 
+    name = "fireworks"
+
     inference_provider = Provider(
         provider_id="fireworks",
         provider_type="remote::fireworks",
         config=FireworksImplConfig.sample_run_config(),
     )
+    memory_provider = Provider(
+        provider_id="faiss",
+        provider_type="inline::faiss",
+        config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
+    )
 
     core_model_to_hf_repo = {
         m.descriptor(): m.huggingface_repo for m in all_registered_models()
@@ -45,7 +53,7 @@ def get_distribution_template() -> DistributionTemplate:
     ]
 
     return DistributionTemplate(
-        name="fireworks",
+        name=name,
         distro_type="self_hosted",
         description="Use Fireworks.AI for running LLM inference",
         docker_image=None,
@@ -56,6 +64,7 @@ def get_distribution_template() -> DistributionTemplate:
             "run.yaml": RunConfigSettings(
                 provider_overrides={
                     "inference": [inference_provider],
+                    "memory": [memory_provider],
                 },
                 default_models=default_models,
                 default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
diff --git a/llama_stack/templates/fireworks/run.yaml b/llama_stack/templates/fireworks/run.yaml
index 34f93eb54..9296be28f 100644
--- a/llama_stack/templates/fireworks/run.yaml
+++ b/llama_stack/templates/fireworks/run.yaml
@@ -26,12 +26,6 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/faiss_store.db
-  - provider_id: chromadb
-    provider_type: remote::chromadb
-    config: {}
-  - provider_id: pgvector
-    provider_type: remote::pgvector
-    config: {}
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
diff --git a/llama_stack/templates/hf-endpoint/hf_endpoint.py b/llama_stack/templates/hf-endpoint/hf_endpoint.py
index e44d459dc..a645441e2 100644
--- a/llama_stack/templates/hf-endpoint/hf_endpoint.py
+++ b/llama_stack/templates/hf-endpoint/hf_endpoint.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
+from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
 from llama_stack.providers.remote.inference.tgi import InferenceEndpointImplConfig
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
 
@@ -20,12 +21,17 @@ def get_distribution_template() -> DistributionTemplate:
         "datasetio": ["remote::huggingface", "inline::localfs"],
         "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
     }
-
+    name = "hf-endpoint"
     inference_provider = Provider(
         provider_id="hf-endpoint",
         provider_type="remote::hf::endpoint",
         config=InferenceEndpointImplConfig.sample_run_config(),
     )
+    memory_provider = Provider(
+        provider_id="faiss",
+        provider_type="inline::faiss",
+        config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
+    )
 
     inference_model = ModelInput(
         model_id="${env.INFERENCE_MODEL}",
@@ -37,7 +43,7 @@ def get_distribution_template() -> DistributionTemplate:
     )
 
     return DistributionTemplate(
-        name="hf-endpoint",
+        name=name,
         distro_type="self_hosted",
         description="Use (an external) Hugging Face Inference Endpoint for running LLM inference",
         docker_image=None,
@@ -48,6 +54,7 @@ def get_distribution_template() -> DistributionTemplate:
             "run.yaml": RunConfigSettings(
                 provider_overrides={
                     "inference": [inference_provider],
+                    "memory": [memory_provider],
                 },
                 default_models=[inference_model],
             ),
diff --git a/llama_stack/templates/hf-endpoint/run.yaml b/llama_stack/templates/hf-endpoint/run.yaml
index 793d6a4a9..bf0697bba 100644
--- a/llama_stack/templates/hf-endpoint/run.yaml
+++ b/llama_stack/templates/hf-endpoint/run.yaml
@@ -26,12 +26,6 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/faiss_store.db
-  - provider_id: chromadb
-    provider_type: remote::chromadb
-    config: {}
-  - provider_id: pgvector
-    provider_type: remote::pgvector
-    config: {}
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
diff --git a/llama_stack/templates/hf-serverless/hf_serverless.py b/llama_stack/templates/hf-serverless/hf_serverless.py
index c74a83b10..d1d12a3b2 100644
--- a/llama_stack/templates/hf-serverless/hf_serverless.py
+++ b/llama_stack/templates/hf-serverless/hf_serverless.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
+from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
 from llama_stack.providers.remote.inference.tgi import InferenceAPIImplConfig
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
 
@@ -21,11 +22,17 @@ def get_distribution_template() -> DistributionTemplate:
         "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
     }
 
+    name = "hf-serverless"
     inference_provider = Provider(
         provider_id="hf-serverless",
         provider_type="remote::hf::serverless",
         config=InferenceAPIImplConfig.sample_run_config(),
     )
+    memory_provider = Provider(
+        provider_id="faiss",
+        provider_type="inline::faiss",
+        config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
+    )
 
     inference_model = ModelInput(
         model_id="${env.INFERENCE_MODEL}",
@@ -37,7 +44,7 @@ def get_distribution_template() -> DistributionTemplate:
     )
 
     return DistributionTemplate(
-        name="hf-serverless",
+        name=name,
         distro_type="self_hosted",
         description="Use (an external) Hugging Face Inference Endpoint for running LLM inference",
         docker_image=None,
@@ -48,6 +55,7 @@ def get_distribution_template() -> DistributionTemplate:
             "run.yaml": RunConfigSettings(
                 provider_overrides={
                     "inference": [inference_provider],
+                    "memory": [memory_provider],
                 },
                 default_models=[inference_model],
             ),
diff --git a/llama_stack/templates/hf-serverless/run.yaml b/llama_stack/templates/hf-serverless/run.yaml
index 803805a16..13e2d7789 100644
--- a/llama_stack/templates/hf-serverless/run.yaml
+++ b/llama_stack/templates/hf-serverless/run.yaml
@@ -26,12 +26,6 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/faiss_store.db
-  - provider_id: chromadb
-    provider_type: remote::chromadb
-    config: {}
-  - provider_id: pgvector
-    provider_type: remote::pgvector
-    config: {}
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
diff --git a/llama_stack/templates/meta-reference-gpu/meta_reference.py b/llama_stack/templates/meta-reference-gpu/meta_reference.py
index f0fecb47d..649234e46 100644
--- a/llama_stack/templates/meta-reference-gpu/meta_reference.py
+++ b/llama_stack/templates/meta-reference-gpu/meta_reference.py
@@ -10,6 +10,7 @@ from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
 from llama_stack.providers.inline.inference.meta_reference import (
     MetaReferenceInferenceConfig,
 )
+from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
 
 
@@ -24,7 +25,7 @@ def get_distribution_template() -> DistributionTemplate:
         "datasetio": ["remote::huggingface", "inline::localfs"],
         "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
     }
-
+    name = "meta-reference-gpu"
     inference_provider = Provider(
         provider_id="meta-reference-inference",
         provider_type="inline::meta-reference",
@@ -33,6 +34,11 @@ def get_distribution_template() -> DistributionTemplate:
             checkpoint_dir="${env.INFERENCE_CHECKPOINT_DIR:null}",
         ),
     )
+    memory_provider = Provider(
+        provider_id="faiss",
+        provider_type="inline::faiss",
+        config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
+    )
 
     inference_model = ModelInput(
         model_id="${env.INFERENCE_MODEL}",
@@ -44,7 +50,7 @@ def get_distribution_template() -> DistributionTemplate:
     )
 
     return DistributionTemplate(
-        name="meta-reference-gpu",
+        name=name,
         distro_type="self_hosted",
         description="Use Meta Reference for running LLM inference",
         template_path=Path(__file__).parent / "doc_template.md",
@@ -54,6 +60,7 @@ def get_distribution_template() -> DistributionTemplate:
             "run.yaml": RunConfigSettings(
                 provider_overrides={
                     "inference": [inference_provider],
+                    "memory": [memory_provider],
                 },
                 default_models=[inference_model],
             ),
diff --git a/llama_stack/templates/meta-reference-gpu/run.yaml b/llama_stack/templates/meta-reference-gpu/run.yaml
index bcefdf800..3675f4a58 100644
--- a/llama_stack/templates/meta-reference-gpu/run.yaml
+++ b/llama_stack/templates/meta-reference-gpu/run.yaml
@@ -27,12 +27,6 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/faiss_store.db
-  - provider_id: chromadb
-    provider_type: remote::chromadb
-    config: {}
-  - provider_id: pgvector
-    provider_type: remote::pgvector
-    config: {}
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
diff --git a/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py b/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py
index a390e71e6..1d611ae5f 100644
--- a/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py
+++ b/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py
@@ -10,6 +10,7 @@ from llama_stack.distribution.datatypes import ModelInput, Provider
 from llama_stack.providers.inline.inference.meta_reference import (
     MetaReferenceQuantizedInferenceConfig,
 )
+from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
 
 
@@ -24,7 +25,7 @@ def get_distribution_template() -> DistributionTemplate:
         "datasetio": ["remote::huggingface", "inline::localfs"],
         "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
     }
-
+    name = "meta-reference-quantized-gpu"
     inference_provider = Provider(
         provider_id="meta-reference-inference",
         provider_type="inline::meta-reference-quantized",
@@ -33,13 +34,18 @@ def get_distribution_template() -> DistributionTemplate:
             checkpoint_dir="${env.INFERENCE_CHECKPOINT_DIR:null}",
         ),
     )
+    memory_provider = Provider(
+        provider_id="faiss",
+        provider_type="inline::faiss",
+        config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
+    )
 
     inference_model = ModelInput(
         model_id="${env.INFERENCE_MODEL}",
         provider_id="meta-reference-inference",
     )
     return DistributionTemplate(
-        name="meta-reference-quantized-gpu",
+        name=name,
         distro_type="self_hosted",
         description="Use Meta Reference with fp8, int4 quantization for running LLM inference",
         template_path=Path(__file__).parent / "doc_template.md",
@@ -49,6 +55,7 @@ def get_distribution_template() -> DistributionTemplate:
             "run.yaml": RunConfigSettings(
                 provider_overrides={
                     "inference": [inference_provider],
+                    "memory": [memory_provider],
                 },
                 default_models=[inference_model],
             ),
diff --git a/llama_stack/templates/meta-reference-quantized-gpu/run.yaml b/llama_stack/templates/meta-reference-quantized-gpu/run.yaml
index 1579c26f9..081af0f59 100644
--- a/llama_stack/templates/meta-reference-quantized-gpu/run.yaml
+++ b/llama_stack/templates/meta-reference-quantized-gpu/run.yaml
@@ -29,12 +29,6 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-quantized-gpu}/faiss_store.db
-  - provider_id: chromadb
-    provider_type: remote::chromadb
-    config: {}
-  - provider_id: pgvector
-    provider_type: remote::pgvector
-    config: {}
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
diff --git a/llama_stack/templates/remote-vllm/run.yaml b/llama_stack/templates/remote-vllm/run.yaml
index 84b244f15..3457afdd6 100644
--- a/llama_stack/templates/remote-vllm/run.yaml
+++ b/llama_stack/templates/remote-vllm/run.yaml
@@ -24,12 +24,6 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/faiss_store.db
-  - provider_id: chromadb
-    provider_type: remote::chromadb
-    config: {}
-  - provider_id: pgvector
-    provider_type: remote::pgvector
-    config: {}
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
diff --git a/llama_stack/templates/remote-vllm/vllm.py b/llama_stack/templates/remote-vllm/vllm.py
index c3858f7e5..68ab8d348 100644
--- a/llama_stack/templates/remote-vllm/vllm.py
+++ b/llama_stack/templates/remote-vllm/vllm.py
@@ -7,6 +7,7 @@
 from pathlib import Path
 
 from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
+from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
 from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
 
@@ -19,7 +20,7 @@ def get_distribution_template() -> DistributionTemplate:
         "agents": ["inline::meta-reference"],
         "telemetry": ["inline::meta-reference"],
     }
-
+    name = "remote-vllm"
     inference_provider = Provider(
         provider_id="vllm-inference",
         provider_type="remote::vllm",
@@ -27,6 +28,11 @@ def get_distribution_template() -> DistributionTemplate:
             url="${env.VLLM_URL}",
         ),
     )
+    memory_provider = Provider(
+        provider_id="faiss",
+        provider_type="inline::faiss",
+        config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
+    )
 
     inference_model = ModelInput(
         model_id="${env.INFERENCE_MODEL}",
@@ -38,7 +44,7 @@ def get_distribution_template() -> DistributionTemplate:
     )
 
     return DistributionTemplate(
-        name="remote-vllm",
+        name=name,
         distro_type="self_hosted",
         description="Use (an external) vLLM server for running LLM inference",
         template_path=Path(__file__).parent / "doc_template.md",
@@ -48,6 +54,7 @@ def get_distribution_template() -> DistributionTemplate:
             "run.yaml": RunConfigSettings(
                 provider_overrides={
                     "inference": [inference_provider],
+                    "memory": [memory_provider],
                 },
                 default_models=[inference_model],
             ),
diff --git a/llama_stack/templates/tgi/run.yaml b/llama_stack/templates/tgi/run.yaml
index 9eb40a789..c45e114ee 100644
--- a/llama_stack/templates/tgi/run.yaml
+++ b/llama_stack/templates/tgi/run.yaml
@@ -25,12 +25,6 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/faiss_store.db
-  - provider_id: chromadb
-    provider_type: remote::chromadb
-    config: {}
-  - provider_id: pgvector
-    provider_type: remote::pgvector
-    config: {}
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
diff --git a/llama_stack/templates/tgi/tgi.py b/llama_stack/templates/tgi/tgi.py
index 53207d691..edba61804 100644
--- a/llama_stack/templates/tgi/tgi.py
+++ b/llama_stack/templates/tgi/tgi.py
@@ -7,6 +7,7 @@
 from pathlib import Path
 
 from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
+from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
 from llama_stack.providers.remote.inference.tgi import TGIImplConfig
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
 
@@ -22,7 +23,7 @@ def get_distribution_template() -> DistributionTemplate:
         "datasetio": ["remote::huggingface", "inline::localfs"],
         "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
     }
-
+    name = "tgi"
     inference_provider = Provider(
         provider_id="tgi-inference",
         provider_type="remote::tgi",
@@ -30,6 +31,11 @@ def get_distribution_template() -> DistributionTemplate:
             url="${env.TGI_URL}",
         ),
     )
+    memory_provider = Provider(
+        provider_id="faiss",
+        provider_type="inline::faiss",
+        config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
+    )
 
     inference_model = ModelInput(
         model_id="${env.INFERENCE_MODEL}",
@@ -41,7 +47,7 @@ def get_distribution_template() -> DistributionTemplate:
     )
 
     return DistributionTemplate(
-        name="tgi",
+        name=name,
         distro_type="self_hosted",
         description="Use (an external) TGI server for running LLM inference",
         docker_image=None,
@@ -52,6 +58,7 @@ def get_distribution_template() -> DistributionTemplate:
             "run.yaml": RunConfigSettings(
                 provider_overrides={
                     "inference": [inference_provider],
+                    "memory": [memory_provider],
                 },
                 default_models=[inference_model],
             ),