From b63982ef00d2f302b65d69fc9d135be033098b10 Mon Sep 17 00:00:00 2001 From: Kai Wu Date: Thu, 31 Jul 2025 10:19:53 -0700 Subject: [PATCH] working now --- docs/source/distributions/k8s/apply.sh | 39 +++++++++++++++---- .../k8s/ingress-k8s.yaml.template | 5 +-- .../k8s/set-secret.yaml.template | 13 ++++--- .../distributions/k8s/stack-k8s.yaml.template | 7 ++-- .../distributions/k8s/ui-k8s.yaml.template | 4 +- .../k8s/ui-service-k8s.yaml.template | 14 +++++++ 6 files changed, 59 insertions(+), 23 deletions(-) create mode 100644 docs/source/distributions/k8s/ui-service-k8s.yaml.template diff --git a/docs/source/distributions/k8s/apply.sh b/docs/source/distributions/k8s/apply.sh index a38144898..012b3e609 100755 --- a/docs/source/distributions/k8s/apply.sh +++ b/docs/source/distributions/k8s/apply.sh @@ -21,7 +21,8 @@ export CODE_MODEL=bigcode/starcoder2-7b # Set USE_EBS to false if you don't have permission to use EKS EBS export USE_EBS=${USE_EBS:-false} - +set -euo pipefail +set -x # HF_TOKEN should be set by the user; base64 encode it for the secret if [ -n "${HF_TOKEN:-}" ]; then export HF_TOKEN_BASE64=$(echo -n "$HF_TOKEN" | base64) @@ -50,13 +51,35 @@ fi -# Apply the HF token secret if HF_TOKEN is provided -if [ -n "${HF_TOKEN:-}" ]; then - envsubst < ./set-secret.yaml.template | kubectl apply -f - -fi +# Create secrets and capture any errors +echo "Creating Kubernetes secrets..." +SECRET_OUTPUT=$(envsubst < ./set-secret.yaml.template | kubectl apply -f - 2>&1) || { + echo "ERROR: Failed to create secrets. Error output:" + echo "$SECRET_OUTPUT" + exit 1 +} -set -euo pipefail -set -x +# Wait a moment for Kubernetes to process the secrets +echo "Waiting for secrets to be processed..." +sleep 2 + +# Verify that the secrets were created successfully +echo "Verifying that secrets were created successfully..." + +# Check each secret with better error handling +for SECRET_NAME in "hf-token-secret" "ngc-docker-registry" "ngc-api"; do + echo "Checking for secret: $SECRET_NAME" + if ! kubectl get secret "$SECRET_NAME" &> /dev/null; then + echo "ERROR: Secret '$SECRET_NAME' not found in the cluster." + echo "Secret creation output was:" + echo "$SECRET_OUTPUT" + echo "Current secrets in the namespace:" + kubectl get secrets + exit 1 + fi +done + +echo "Secret verification successful. All required secrets are present." # Apply templates with appropriate storage configuration based on USE_EBS setting if [ "$USE_EBS" = "true" ]; then echo "Using EBS storage for persistent volumes" @@ -73,6 +96,7 @@ if [ "$USE_EBS" = "true" ]; then envsubst < ./stack-k8s.yaml.template | kubectl apply -f - envsubst < ./ingress-k8s.yaml.template | kubectl apply -f - envsubst < ./ui-k8s.yaml.template | kubectl apply -f - + envsubst < ./ui-service-k8s.yaml.template | kubectl apply -f - else echo "Using emptyDir for storage (data will not persist across pod restarts)" # Process templates to replace EBS storage with emptyDir @@ -90,4 +114,5 @@ else envsubst < ./stack-k8s.yaml.template | sed 's/persistentVolumeClaim:/emptyDir: {}/g' | sed '/claimName:/d' | kubectl apply -f - envsubst < ./ingress-k8s.yaml.template | sed 's/persistentVolumeClaim:/emptyDir: {}/g' | sed '/claimName:/d' | kubectl apply -f - envsubst < ./ui-k8s.yaml.template | sed 's/persistentVolumeClaim:/emptyDir: {}/g' | sed '/claimName:/d' | kubectl apply -f - + envsubst < ./ui-service-k8s.yaml.template | kubectl apply -f - fi diff --git a/docs/source/distributions/k8s/ingress-k8s.yaml.template b/docs/source/distributions/k8s/ingress-k8s.yaml.template index 9ebe86b69..f0e74af83 100644 --- a/docs/source/distributions/k8s/ingress-k8s.yaml.template +++ b/docs/source/distributions/k8s/ingress-k8s.yaml.template @@ -6,12 +6,9 @@ spec: type: LoadBalancer selector: app.kubernetes.io/name: llama-stack + app.kubernetes.io/component: server ports: - name: llama-stack-api port: 8321 targetPort: 8321 protocol: TCP - - name: llama-stack-ui - port: 8322 - targetPort: 8322 - protocol: TCP diff --git a/docs/source/distributions/k8s/set-secret.yaml.template b/docs/source/distributions/k8s/set-secret.yaml.template index e020e3076..c35710668 100644 --- a/docs/source/distributions/k8s/set-secret.yaml.template +++ b/docs/source/distributions/k8s/set-secret.yaml.template @@ -1,3 +1,6 @@ +# ------------------------------------------------- +# Hugging Face Token Secret +# ------------------------------------------------- apiVersion: v1 kind: Secret metadata: @@ -5,24 +8,22 @@ metadata: type: Opaque data: token: ${HF_TOKEN_BASE64} - +--- # ------------------------------------------------- # NGC Docker Registry Secret # ------------------------------------------------- - -apiVersion: apps/v1 +apiVersion: v1 kind: Secret metadata: name: ngc-docker-registry type: kubernetes.io/dockerconfigjson data: .dockerconfigjson: ${NGC_DOCKER_CONFIG_JSON} - +--- # ------------------------------------------------- # NGC API Secret # ------------------------------------------------- - -apiVersion: apps/v1 +apiVersion: v1 kind: Secret metadata: name: ngc-api diff --git a/docs/source/distributions/k8s/stack-k8s.yaml.template b/docs/source/distributions/k8s/stack-k8s.yaml.template index 1ac8d743e..9f2d7fc55 100644 --- a/docs/source/distributions/k8s/stack-k8s.yaml.template +++ b/docs/source/distributions/k8s/stack-k8s.yaml.template @@ -34,17 +34,16 @@ spec: command: ['sh', '-c', 'until nc -z llm-nim-code.default.svc.cluster.local 8000; do echo waiting for llm-nim-code on port 8001; sleep 2; done;'] containers: - name: llama-stack - image: llamastack/distribution-starter:latest - imagePullPolicy: Always # since we have specified latest instead of a version + image: llamastack/distribution-starter:0.2.15 resources: requests: memory: "512Mi" cpu: "500m" - ephemeral-storage: "2Gi" + ephemeral-storage: "6Gi" limits: memory: "1Gi" cpu: "1000m" - ephemeral-storage: "5Gi" + ephemeral-storage: "6Gi" env: - name: ENABLE_CHROMADB value: "true" diff --git a/docs/source/distributions/k8s/ui-k8s.yaml.template b/docs/source/distributions/k8s/ui-k8s.yaml.template index a83544567..b92aadb4d 100644 --- a/docs/source/distributions/k8s/ui-k8s.yaml.template +++ b/docs/source/distributions/k8s/ui-k8s.yaml.template @@ -35,7 +35,7 @@ spec: apt-get update && apt-get install -y git # Clone the repository git clone https://github.com/meta-llama/llama-stack.git /app - + git checkout k8s_demo # Navigate to the playground directory cd /app/llama_stack/distribution/ui @@ -45,4 +45,4 @@ spec: # Run the Streamlit app streamlit run app.py --server.port=8322 --server.address=0.0.0.0 ports: - - containerPort: 8501 + - containerPort: 8322 diff --git a/docs/source/distributions/k8s/ui-service-k8s.yaml.template b/docs/source/distributions/k8s/ui-service-k8s.yaml.template new file mode 100644 index 000000000..712716f65 --- /dev/null +++ b/docs/source/distributions/k8s/ui-service-k8s.yaml.template @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: Service +metadata: + name: llama-stack-ui-service +spec: + type: LoadBalancer + selector: + app.kubernetes.io/name: llama-stack + app.kubernetes.io/component: ui + ports: + - name: llama-stack-ui + port: 8322 + targetPort: 8322 + protocol: TCP