diff --git a/docs/source/distributions/k8s/apply.sh b/docs/source/distributions/k8s/apply.sh
index 7ff7d28eb..b29e18d2a 100755
--- a/docs/source/distributions/k8s/apply.sh
+++ b/docs/source/distributions/k8s/apply.sh
@@ -16,6 +16,14 @@ export SAFETY_MODEL=${SAFETY_MODEL:-meta-llama/Llama-Guard-3-1B}
 set -euo pipefail
 set -x
 
+# Install NVIDIA device plugin for GPU support
+echo "Installing NVIDIA device plugin..."
+kubectl apply -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/refs/tags/v0.17.2/deployments/static/nvidia-device-plugin.yml
+
+# Wait for NVIDIA device plugin to be ready
+echo "Waiting for NVIDIA device plugin to be ready..."
+kubectl wait --for=condition=ready pod -l name=nvidia-device-plugin-ds -n kube-system --timeout=300s
+
 envsubst < ./vllm-k8s.yaml.template | kubectl apply -f -
 envsubst < ./vllm-safety-k8s.yaml.template | kubectl apply -f -
 envsubst < ./postgres-k8s.yaml.template | kubectl apply -f -
diff --git a/docs/source/distributions/k8s/chroma-k8s.yaml.template b/docs/source/distributions/k8s/chroma-k8s.yaml.template
index a2a5e3be3..2083a566b 100644
--- a/docs/source/distributions/k8s/chroma-k8s.yaml.template
+++ b/docs/source/distributions/k8s/chroma-k8s.yaml.template
@@ -5,6 +5,7 @@ metadata:
 spec:
   accessModes:
     - ReadWriteOnce
+  storageClassName: gp2
   resources:
     requests:
       storage: 20Gi
@@ -23,6 +24,8 @@ spec:
       labels:
         app: chromadb
     spec:
+      nodeSelector:
+        eks.amazonaws.com/nodegroup: cpu
       containers:
       - name: chromadb
         image: chromadb/chroma:latest
diff --git a/docs/source/distributions/k8s/postgres-k8s.yaml.template b/docs/source/distributions/k8s/postgres-k8s.yaml.template
index 86a765652..66e197b15 100644
--- a/docs/source/distributions/k8s/postgres-k8s.yaml.template
+++ b/docs/source/distributions/k8s/postgres-k8s.yaml.template
@@ -5,6 +5,7 @@ metadata:
 spec:
   accessModes:
     - ReadWriteOnce
+  storageClassName: gp2
   resources:
     requests:
       storage: 10Gi
@@ -23,6 +24,8 @@ spec:
       labels:
         app.kubernetes.io/name: postgres
     spec:
+      nodeSelector:
+        eks.amazonaws.com/nodegroup: cpu
       containers:
       - name: postgres
         image: postgres:15
diff --git a/docs/source/distributions/k8s/stack-k8s.yaml.template b/docs/source/distributions/k8s/stack-k8s.yaml.template
index 1cfc63ef5..44f69f69f 100644
--- a/docs/source/distributions/k8s/stack-k8s.yaml.template
+++ b/docs/source/distributions/k8s/stack-k8s.yaml.template
@@ -5,6 +5,7 @@ metadata:
 spec:
   accessModes:
     - ReadWriteOnce
+  storageClassName: gp2
   resources:
     requests:
       storage: 1Gi
@@ -25,9 +26,11 @@ spec:
         app.kubernetes.io/name: llama-stack
         app.kubernetes.io/component: server
     spec:
+      nodeSelector:
+        eks.amazonaws.com/nodegroup: cpu
       containers:
       - name: llama-stack
-        image: llamastack/distribution-remote-vllm:latest
+        image: llamastack/distribution-postgres-demo:latest
         imagePullPolicy: Always # since we have specified latest instead of a version
         env:
         - name: ENABLE_CHROMADB
diff --git a/docs/source/distributions/k8s/ui-k8s.yaml.template b/docs/source/distributions/k8s/ui-k8s.yaml.template
index ef1bf0c55..ca429c029 100644
--- a/docs/source/distributions/k8s/ui-k8s.yaml.template
+++ b/docs/source/distributions/k8s/ui-k8s.yaml.template
@@ -17,6 +17,8 @@ spec:
         app.kubernetes.io/name: llama-stack
         app.kubernetes.io/component: ui
     spec:
+      nodeSelector:
+        eks.amazonaws.com/nodegroup: cpu
       containers:
       - name: llama-stack-ui
         image: node:18-alpine
diff --git a/docs/source/distributions/k8s/vllm-k8s.yaml.template b/docs/source/distributions/k8s/vllm-k8s.yaml.template
index 6256cc7e1..3988066b2 100644
--- a/docs/source/distributions/k8s/vllm-k8s.yaml.template
+++ b/docs/source/distributions/k8s/vllm-k8s.yaml.template
@@ -6,6 +6,7 @@ spec:
   accessModes:
     - ReadWriteOnce
   volumeMode: Filesystem
+  storageClassName: gp2
   resources:
     requests:
       storage: 50Gi
@@ -25,16 +26,8 @@ spec:
         app.kubernetes.io/name: vllm
         workload-type: inference
     spec:
-      affinity:
-        podAntiAffinity:
-          requiredDuringSchedulingIgnoredDuringExecution:
-          - labelSelector:
-              matchExpressions:
-              - key: workload-type
-                operator: In
-                values:
-                - inference
-            topologyKey: kubernetes.io/hostname  # Ensures no two inference pods on same node
+      nodeSelector:
+        eks.amazonaws.com/nodegroup: gpu
       containers:
       - name: vllm
         image: vllm/vllm-openai:latest
@@ -49,6 +42,11 @@ spec:
               key: token
         ports:
           - containerPort: 8000
+        resources:
+          requests:
+            nvidia.com/gpu: 1
+          limits:
+            nvidia.com/gpu: 1
         volumeMounts:
           - name: llama-storage
             mountPath: /root/.cache/huggingface
diff --git a/docs/source/distributions/k8s/vllm-safety-k8s.yaml.template b/docs/source/distributions/k8s/vllm-safety-k8s.yaml.template
index 8857e83b6..9bce4aa95 100644
--- a/docs/source/distributions/k8s/vllm-safety-k8s.yaml.template
+++ b/docs/source/distributions/k8s/vllm-safety-k8s.yaml.template
@@ -26,16 +26,8 @@ spec:
         app.kubernetes.io/name: vllm-safety
         workload-type: inference
     spec:
-      affinity:
-        podAntiAffinity:
-          requiredDuringSchedulingIgnoredDuringExecution:
-          - labelSelector:
-              matchExpressions:
-              - key: workload-type
-                operator: In
-                values:
-                - inference
-            topologyKey: kubernetes.io/hostname  # Ensures no two inference pods on same node
+      nodeSelector:
+        eks.amazonaws.com/nodegroup: gpu
       containers:
       - name: vllm-safety
         image: vllm/vllm-openai:latest