diff --git a/docs/source/distributions/eks/apply.sh b/docs/source/distributions/eks/apply.sh
new file mode 100755
index 000000000..3ad3dd263
--- /dev/null
+++ b/docs/source/distributions/eks/apply.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+set -euo pipefail
+
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+K8S_DIR="${SCRIPT_DIR}/../k8s"
+
+echo "Setting up AWS EKS-specific storage class..."
+kubectl apply -f gp3-topology-aware.yaml
+
+echo "Running main Kubernetes deployment..."
+cd "${K8S_DIR}"
+./apply.sh "$@"
diff --git a/docs/source/distributions/eks/gp3-topology-aware.yaml b/docs/source/distributions/eks/gp3-topology-aware.yaml
new file mode 100644
index 000000000..1192ba18c
--- /dev/null
+++ b/docs/source/distributions/eks/gp3-topology-aware.yaml
@@ -0,0 +1,15 @@
+# Set up default storage class on AWS EKS
+apiVersion: storage.k8s.io/v1
+kind: StorageClass
+metadata:
+  name: gp3-topology-aware
+  annotations:
+    storageclass.kubernetes.io/is-default-class: "true"
+parameters:
+  type: gp3
+  iops: "3000"
+  throughput: "125"
+provisioner: ebs.csi.aws.com
+reclaimPolicy: Delete
+volumeBindingMode: WaitForFirstConsumer
+allowVolumeExpansion: true
diff --git a/docs/source/distributions/k8s/apply.sh b/docs/source/distributions/k8s/apply.sh
index 7ff7d28eb..06b1ea10c 100755
--- a/docs/source/distributions/k8s/apply.sh
+++ b/docs/source/distributions/k8s/apply.sh
@@ -13,9 +13,22 @@ export POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-llamastack}
 export INFERENCE_MODEL=${INFERENCE_MODEL:-meta-llama/Llama-3.2-3B-Instruct}
 export SAFETY_MODEL=${SAFETY_MODEL:-meta-llama/Llama-Guard-3-1B}
 
+# HF_TOKEN should be set by the user; base64 encode it for the secret
+if [ -n "${HF_TOKEN:-}" ]; then
+  export HF_TOKEN_BASE64=$(echo -n "$HF_TOKEN" | base64)
+else
+  echo "ERROR: HF_TOKEN not set. You need it for vLLM to download models from Hugging Face."
+  exit 1
+fi
+
 set -euo pipefail
 set -x
 
+# Apply the HF token secret if HF_TOKEN is provided
+if [ -n "${HF_TOKEN:-}" ]; then
+  envsubst < ./hf-token-secret.yaml.template | kubectl apply -f -
+fi
+
 envsubst < ./vllm-k8s.yaml.template | kubectl apply -f -
 envsubst < ./vllm-safety-k8s.yaml.template | kubectl apply -f -
 envsubst < ./postgres-k8s.yaml.template | kubectl apply -f -
diff --git a/docs/source/distributions/k8s/hf-token-secret.yaml.template b/docs/source/distributions/k8s/hf-token-secret.yaml.template
new file mode 100644
index 000000000..b6db8e7bc
--- /dev/null
+++ b/docs/source/distributions/k8s/hf-token-secret.yaml.template
@@ -0,0 +1,7 @@
+apiVersion: v1
+kind: Secret
+metadata:
+  name: hf-token-secret
+type: Opaque
+data:
+  token: ${HF_TOKEN_BASE64}
diff --git a/docs/source/distributions/k8s/stack-configmap.yaml b/docs/source/distributions/k8s/stack-configmap.yaml
index 0a08bca03..129471862 100644
--- a/docs/source/distributions/k8s/stack-configmap.yaml
+++ b/docs/source/distributions/k8s/stack-configmap.yaml
@@ -22,10 +22,10 @@ data:
       - provider_id: vllm-safety
         provider_type: remote::vllm
         config:
-          url: ${env.VLLM_SAFETY_URL:http://localhost:8000/v1}
-          max_tokens: ${env.VLLM_MAX_TOKENS:4096}
-          api_token: ${env.VLLM_API_TOKEN:fake}
-          tls_verify: ${env.VLLM_TLS_VERIFY:true}
+          url: ${env.VLLM_SAFETY_URL:=http://localhost:8000/v1}
+          max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
+          api_token: ${env.VLLM_API_TOKEN:=fake}
+          tls_verify: ${env.VLLM_TLS_VERIFY:=true}
       - provider_id: sentence-transformers
         provider_type: inline::sentence-transformers
         config: {}
@@ -33,7 +33,7 @@ data:
       - provider_id: ${env.ENABLE_CHROMADB:+chromadb}
         provider_type: remote::chromadb
         config:
-          url: ${env.CHROMADB_URL:+}
+          url: ${env.CHROMADB_URL:=}
       safety:
       - provider_id: llama-guard
         provider_type: inline::llama-guard
@@ -48,7 +48,7 @@ data:
             host: ${env.POSTGRES_HOST:=localhost}
             port: ${env.POSTGRES_PORT:=5432}
             db: ${env.POSTGRES_DB:=llamastack}
-            user: ${env.POSTGRES_USER:llamastack}
+            user: ${env.POSTGRES_USER:=llamastack}
             password: ${env.POSTGRES_PASSWORD:=llamastack}
           responses_store:
             type: postgres
@@ -61,8 +61,8 @@ data:
       - provider_id: meta-reference
         provider_type: inline::meta-reference
         config:
-          service_name: ${env.OTEL_SERVICE_NAME:+}
-          sinks: ${env.TELEMETRY_SINKS:console}
+          service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+          sinks: ${env.TELEMETRY_SINKS:=console}
       tool_runtime:
       - provider_id: brave-search
         provider_type: remote::brave-search
diff --git a/docs/source/distributions/k8s/stack_run_config.yaml b/docs/source/distributions/k8s/stack_run_config.yaml
index 5ac08134c..23993ca5d 100644
--- a/docs/source/distributions/k8s/stack_run_config.yaml
+++ b/docs/source/distributions/k8s/stack_run_config.yaml
@@ -30,7 +30,7 @@ providers:
   - provider_id: ${env.ENABLE_CHROMADB:+chromadb}
     provider_type: remote::chromadb
     config:
-      url: ${env.CHROMADB_URL:+}
+      url: ${env.CHROMADB_URL:=}
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -58,8 +58,8 @@ providers:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: ${env.OTEL_SERVICE_NAME:+console}
-      sinks: ${env.TELEMETRY_SINKS:+console}
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
diff --git a/docs/source/distributions/k8s/vllm-k8s.yaml.template b/docs/source/distributions/k8s/vllm-k8s.yaml.template
index 6256cc7e1..03f3759c3 100644
--- a/docs/source/distributions/k8s/vllm-k8s.yaml.template
+++ b/docs/source/distributions/k8s/vllm-k8s.yaml.template
@@ -25,16 +25,8 @@ spec:
         app.kubernetes.io/name: vllm
         workload-type: inference
     spec:
-      affinity:
-        podAntiAffinity:
-          requiredDuringSchedulingIgnoredDuringExecution:
-          - labelSelector:
-              matchExpressions:
-              - key: workload-type
-                operator: In
-                values:
-                - inference
-            topologyKey: kubernetes.io/hostname  # Ensures no two inference pods on same node
+      nodeSelector:
+        eks.amazonaws.com/nodegroup: gpu
       containers:
       - name: vllm
         image: vllm/vllm-openai:latest
@@ -42,6 +34,8 @@ spec:
         args:
         - "vllm serve ${INFERENCE_MODEL} --dtype float16 --enforce-eager --max-model-len 4096 --gpu-memory-utilization 0.6"
         env:
+        - name: INFERENCE_MODEL
+          value: "${INFERENCE_MODEL}"
         - name: HUGGING_FACE_HUB_TOKEN
           valueFrom:
             secretKeyRef:
@@ -49,6 +43,11 @@ spec:
               key: token
         ports:
           - containerPort: 8000
+        resources:
+          limits:
+            nvidia.com/gpu: 1
+          requests:
+            nvidia.com/gpu: 1
         volumeMounts:
           - name: llama-storage
             mountPath: /root/.cache/huggingface
diff --git a/docs/source/distributions/k8s/vllm-safety-k8s.yaml.template b/docs/source/distributions/k8s/vllm-safety-k8s.yaml.template
index 8857e83b6..37b2b9a6b 100644
--- a/docs/source/distributions/k8s/vllm-safety-k8s.yaml.template
+++ b/docs/source/distributions/k8s/vllm-safety-k8s.yaml.template
@@ -6,7 +6,6 @@ spec:
   accessModes:
     - ReadWriteOnce
   volumeMode: Filesystem
-  storageClassName: gp2
   resources:
     requests:
       storage: 30Gi
@@ -26,16 +25,8 @@ spec:
         app.kubernetes.io/name: vllm-safety
         workload-type: inference
     spec:
-      affinity:
-        podAntiAffinity:
-          requiredDuringSchedulingIgnoredDuringExecution:
-          - labelSelector:
-              matchExpressions:
-              - key: workload-type
-                operator: In
-                values:
-                - inference
-            topologyKey: kubernetes.io/hostname  # Ensures no two inference pods on same node
+      nodeSelector:
+        eks.amazonaws.com/nodegroup: gpu
       containers:
       - name: vllm-safety
         image: vllm/vllm-openai:latest
@@ -44,6 +35,8 @@ spec:
           "vllm serve ${SAFETY_MODEL} --dtype float16 --enforce-eager --max-model-len 4096 --port 8001 --gpu-memory-utilization 0.3"
         ]
         env:
+        - name: SAFETY_MODEL
+          value: "${SAFETY_MODEL}"
         - name: HUGGING_FACE_HUB_TOKEN
           valueFrom:
             secretKeyRef:
@@ -51,6 +44,11 @@ spec:
               key: token
         ports:
           - containerPort: 8001
+        resources:
+          limits:
+            nvidia.com/gpu: 1
+          requests:
+            nvidia.com/gpu: 1
         volumeMounts:
           - name: llama-storage
             mountPath: /root/.cache/huggingface
diff --git a/docs/source/distributions/kubernetes_deployment.md b/docs/source/distributions/kubernetes_deployment.md
index f43039824..3a8dccd07 100644
--- a/docs/source/distributions/kubernetes_deployment.md
+++ b/docs/source/distributions/kubernetes_deployment.md
@@ -5,6 +5,8 @@ Instead of starting the Llama Stack and vLLM servers locally. We can deploy them
 ### Prerequisites
 In this guide, we'll use a local [Kind](https://kind.sigs.k8s.io/) cluster and a vLLM inference service in the same cluster for demonstration purposes.
 
+Note: You can also deploy the Llama Stack server in an AWS EKS cluster. See [Deploying Llama Stack Server in AWS EKS](#deploying-llama-stack-server-in-aws-eks) for more details.
+
 First, create a local Kubernetes cluster via Kind:
 
 ```
@@ -217,3 +219,18 @@ Finally, we forward the Kubernetes service to a local port and test some inferen
 kubectl port-forward service/llama-stack-service 5000:5000
 llama-stack-client --endpoint http://localhost:5000 inference chat-completion --message "hello, what model are you?"
 ```
+
+## Deploying Llama Stack Server in AWS EKS
+
+We've also provided a script to deploy the Llama Stack server in an AWS EKS cluster. Once you have an [EKS cluster](https://docs.aws.amazon.com/eks/latest/userguide/getting-started.html), you can run the following script to deploy the Llama Stack server.
+
+
+```
+cd docs/source/distributions/eks
+./apply.sh
+```
+
+This script will:
+
+- Set up a default storage class for AWS EKS
+- Deploy the Llama Stack server in a Kubernetes Pod and Service
\ No newline at end of file