diff --git a/docs/source/distributions/k8s/apply.sh b/docs/source/distributions/k8s/apply.sh
index 4b025d20f..67754bdb7 100755
--- a/docs/source/distributions/k8s/apply.sh
+++ b/docs/source/distributions/k8s/apply.sh
@@ -12,7 +12,7 @@ export POSTGRES_USER=llamastack
 export POSTGRES_DB=llamastack
 export POSTGRES_PASSWORD=llamastack
 
-export INFERENCE_MODEL=meta-llama/Llama-3.3-70B-Instruct
+export INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
 export CODE_MODEL=bigcode/starcoder2-7b
 export OLLAMA_MODEL=llama-guard3:1b
 # Set USE_EBS to false if you don't have permission to use EKS EBS
diff --git a/docs/source/distributions/k8s/stack-k8s.yaml.template b/docs/source/distributions/k8s/stack-k8s.yaml.template
index 7d24a8853..4b85a2063 100644
--- a/docs/source/distributions/k8s/stack-k8s.yaml.template
+++ b/docs/source/distributions/k8s/stack-k8s.yaml.template
@@ -60,11 +60,11 @@ spec:
         resources:
           requests:
             memory: "2Gi"
-            cpu: "8000m"
+            cpu: "4000m"
             ephemeral-storage: "6Gi"
           limits:
             memory: "2Gi"
-            cpu: "8000m"
+            cpu: "4000m"
             ephemeral-storage: "6Gi"
         env:
         - name: ENABLE_CHROMADB
@@ -106,7 +106,7 @@ spec:
             apt-get update && apt-get install -y git
             # Clone the repository
             git clone https://github.com/meta-llama/llama-stack.git /app
-            git checkout k8s_demo
+            git checkout 7f83433
 
             cd /app/llama_stack/
             # Install llama-stack
diff --git a/docs/source/distributions/k8s/ui-k8s.yaml.template b/docs/source/distributions/k8s/ui-k8s.yaml.template
index b92aadb4d..ad83120f8 100644
--- a/docs/source/distributions/k8s/ui-k8s.yaml.template
+++ b/docs/source/distributions/k8s/ui-k8s.yaml.template
@@ -37,6 +37,8 @@ spec:
             git clone https://github.com/meta-llama/llama-stack.git /app
             git checkout k8s_demo
             # Navigate to the playground directory
+            cd /app
+            pip install -e .
             cd /app/llama_stack/distribution/ui
 
             # Install requirements
diff --git a/docs/source/distributions/k8s/vllm-k8s.yaml.template b/docs/source/distributions/k8s/vllm-k8s.yaml.template
index 36a946d16..5bd091d8c 100644
--- a/docs/source/distributions/k8s/vllm-k8s.yaml.template
+++ b/docs/source/distributions/k8s/vllm-k8s.yaml.template
@@ -39,7 +39,7 @@ spec:
         image: vllm/vllm-openai:latest
         command: ["/bin/sh", "-c"]
         args:
-        - "vllm serve ${INFERENCE_MODEL} --enforce-eager -tp 4 --max-model-len 80000 --gpu-memory-utilization 0.92 --enable-auto-tool-choice --tool-call-parser llama3_json --max-num-seqs 1 --port 8001"
+        - "vllm serve ${INFERENCE_MODEL} --enforce-eager -tp 1 --max-model-len 80000 --gpu-memory-utilization 0.92 --enable-auto-tool-choice --tool-call-parser llama3_json --max-num-seqs 1 --port 8001"
         env:
         - name: NCCL_DEBUG
           value: "INFO"
@@ -55,9 +55,9 @@ spec:
             name: http
         resources:
           limits:
-            nvidia.com/gpu: 4
+            nvidia.com/gpu: 1
           requests:
-            nvidia.com/gpu: 4
+            nvidia.com/gpu: 1
         volumeMounts:
           - name: llama-storage
             mountPath: /root/.cache/huggingface