diff --git a/llama_stack/templates/vllm-gpu/run.yaml b/llama_stack/templates/vllm-gpu/run.yaml
index ddf069e8d..ea188777f 100644
--- a/llama_stack/templates/vllm-gpu/run.yaml
+++ b/llama_stack/templates/vllm-gpu/run.yaml
@@ -29,12 +29,6 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/faiss_store.db
-  - provider_id: chromadb
-    provider_type: remote::chromadb
-    config: {}
-  - provider_id: pgvector
-    provider_type: remote::pgvector
-    config: {}
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
diff --git a/llama_stack/templates/vllm-gpu/vllm.py b/llama_stack/templates/vllm-gpu/vllm.py
index 27665efa3..10b448b5c 100644
--- a/llama_stack/templates/vllm-gpu/vllm.py
+++ b/llama_stack/templates/vllm-gpu/vllm.py
@@ -6,6 +6,7 @@
 
 from llama_stack.distribution.datatypes import ModelInput, Provider
 from llama_stack.providers.inline.inference.vllm import VLLMConfig
+from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
 
 
@@ -20,12 +21,17 @@ def get_distribution_template() -> DistributionTemplate:
         "datasetio": ["remote::huggingface", "inline::localfs"],
         "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
     }
-
+    name = "vllm-gpu"
     inference_provider = Provider(
         provider_id="vllm",
         provider_type="inline::vllm",
         config=VLLMConfig.sample_run_config(),
     )
+    memory_provider = Provider(
+        provider_id="faiss",
+        provider_type="inline::faiss",
+        config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
+    )
 
     inference_model = ModelInput(
         model_id="${env.INFERENCE_MODEL}",
@@ -33,7 +39,7 @@ def get_distribution_template() -> DistributionTemplate:
     )
 
     return DistributionTemplate(
-        name="vllm-gpu",
+        name=name,
         distro_type="self_hosted",
         description="Use a built-in vLLM engine for running LLM inference",
         docker_image=None,
@@ -44,6 +50,7 @@ def get_distribution_template() -> DistributionTemplate:
             "run.yaml": RunConfigSettings(
                 provider_overrides={
                     "inference": [inference_provider],
+                    "memory": [memory_provider],
                 },
                 default_models=[inference_model],
             ),