first draft

2025-10-23 16:37:28 +00:00 · 2025-07-25 10:41:06 -07:00 · 2025-07-25 10:41:06 -07:00 · e614241876
commit e614241876
parent 025163d8e6
9 changed files with 64 additions and 60 deletions
--- a/docs/source/distributions/k8s/apply.sh
+++ b/docs/source/distributions/k8s/apply.sh
@ -13,6 +13,9 @@ export POSTGRES_PASSWORD=llamastack
 export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
 export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B

+# Set USE_EBS to false if you don't have permission to use EKS EBS
+export USE_EBS=${USE_EBS:-false}
+
 # HF_TOKEN should be set by the user; base64 encode it for the secret
 if [ -n "${HF_TOKEN:-}" ]; then
  export HF_TOKEN_BASE64=$(echo -n "$HF_TOKEN" | base64)
@ -47,17 +50,37 @@ if [ -n "${HF_TOKEN:-}" ]; then
  envsubst < ./hf-token-secret.yaml.template | kubectl apply -f -
 fi

-envsubst < ./vllm-k8s.yaml.template | kubectl apply -f -
-envsubst < ./vllm-safety-k8s.yaml.template | kubectl apply -f -
-envsubst < ./postgres-k8s.yaml.template | kubectl apply -f -
-envsubst < ./chroma-k8s.yaml.template | kubectl apply -f -
+# Apply templates with appropriate storage configuration based on USE_EBS setting
+if [ "$USE_EBS" = "true" ]; then
+  echo "Using EBS storage for persistent volumes"
+  envsubst < ./vllm-k8s.yaml.template | kubectl apply -f -
+  envsubst < ./vllm-safety-k8s.yaml.template | kubectl apply -f -
+  envsubst < ./postgres-k8s.yaml.template | kubectl apply -f -
+  envsubst < ./chroma-k8s.yaml.template | kubectl apply -f -

-kubectl create configmap llama-stack-config --from-file=stack_run_config.yaml \
-  --dry-run=client -o yaml > stack-configmap.yaml
+  kubectl create configmap llama-stack-config --from-file=stack_run_config.yaml \
+    --dry-run=client -o yaml > stack-configmap.yaml

-kubectl apply -f stack-configmap.yaml
+  kubectl apply -f stack-configmap.yaml

-envsubst < ./stack-k8s.yaml.template | kubectl apply -f -
-envsubst < ./ingress-k8s.yaml.template | kubectl apply -f -
+  envsubst < ./stack-k8s.yaml.template | kubectl apply -f -
+  envsubst < ./ingress-k8s.yaml.template | kubectl apply -f -
+  envsubst < ./ui-k8s.yaml.template | kubectl apply -f -
+else
+  echo "Using emptyDir for storage (data will not persist across pod restarts)"
+  # Process templates to replace EBS storage with emptyDir
+  envsubst < ./vllm-k8s.yaml.template | sed 's/persistentVolumeClaim:/emptyDir: {}/g' | sed '/claimName:/d' | kubectl apply -f -
+  envsubst < ./vllm-safety-k8s.yaml.template | sed 's/persistentVolumeClaim:/emptyDir: {}/g' | sed '/claimName:/d' | kubectl apply -f -
+  envsubst < ./postgres-k8s.yaml.template | sed 's/persistentVolumeClaim:/emptyDir: {}/g' | sed '/claimName:/d' | kubectl apply -f -
+  envsubst < ./chroma-k8s.yaml.template | sed 's/persistentVolumeClaim:/emptyDir: {}/g' | sed '/claimName:/d' | kubectl apply -f -

-envsubst < ./ui-k8s.yaml.template | kubectl apply -f -
+  kubectl create configmap llama-stack-config --from-file=stack_run_config.yaml \
+    --dry-run=client -o yaml > stack-configmap.yaml
+
+  kubectl apply -f stack-configmap.yaml
+
+  # Apply the same emptyDir transformation to the remaining templates
+  envsubst < ./stack-k8s.yaml.template | sed 's/persistentVolumeClaim:/emptyDir: {}/g' | sed '/claimName:/d' | kubectl apply -f -
+  envsubst < ./ingress-k8s.yaml.template | sed 's/persistentVolumeClaim:/emptyDir: {}/g' | sed '/claimName:/d' | kubectl apply -f -
+  envsubst < ./ui-k8s.yaml.template | sed 's/persistentVolumeClaim:/emptyDir: {}/g' | sed '/claimName:/d' | kubectl apply -f -
+fi