working now

2025-10-23 16:37:28 +00:00 · 2025-07-31 10:19:53 -07:00 · 2025-07-31 10:19:53 -07:00 · b63982ef00
commit b63982ef00
parent 1cb9d3bca2
6 changed files with 59 additions and 23 deletions
--- a/docs/source/distributions/k8s/apply.sh
+++ b/docs/source/distributions/k8s/apply.sh
@ -21,7 +21,8 @@ export CODE_MODEL=bigcode/starcoder2-7b

 # Set USE_EBS to false if you don't have permission to use EKS EBS
 export USE_EBS=${USE_EBS:-false}
-
+set -euo pipefail
+set -x
 # HF_TOKEN should be set by the user; base64 encode it for the secret
 if [ -n "${HF_TOKEN:-}" ]; then
  export HF_TOKEN_BASE64=$(echo -n "$HF_TOKEN" | base64)
@ -50,13 +51,35 @@ fi



-# Apply the HF token secret if HF_TOKEN is provided
-if [ -n "${HF_TOKEN:-}" ]; then
-  envsubst < ./set-secret.yaml.template | kubectl apply -f -
-fi
+# Create secrets and capture any errors
+echo "Creating Kubernetes secrets..."
+SECRET_OUTPUT=$(envsubst < ./set-secret.yaml.template | kubectl apply -f - 2>&1) || {
+  echo "ERROR: Failed to create secrets. Error output:"
+  echo "$SECRET_OUTPUT"
+  exit 1
+}

-set -euo pipefail
-set -x
+# Wait a moment for Kubernetes to process the secrets
+echo "Waiting for secrets to be processed..."
+sleep 2
+
+# Verify that the secrets were created successfully
+echo "Verifying that secrets were created successfully..."
+
+# Check each secret with better error handling
+for SECRET_NAME in "hf-token-secret" "ngc-docker-registry" "ngc-api"; do
+  echo "Checking for secret: $SECRET_NAME"
+  if ! kubectl get secret "$SECRET_NAME" &> /dev/null; then
+    echo "ERROR: Secret '$SECRET_NAME' not found in the cluster."
+    echo "Secret creation output was:"
+    echo "$SECRET_OUTPUT"
+    echo "Current secrets in the namespace:"
+    kubectl get secrets
+    exit 1
+  fi
+done
+
+echo "Secret verification successful. All required secrets are present."
 # Apply templates with appropriate storage configuration based on USE_EBS setting
 if [ "$USE_EBS" = "true" ]; then
  echo "Using EBS storage for persistent volumes"
@ -73,6 +96,7 @@ if [ "$USE_EBS" = "true" ]; then
  envsubst < ./stack-k8s.yaml.template | kubectl apply -f -
  envsubst < ./ingress-k8s.yaml.template | kubectl apply -f -
  envsubst < ./ui-k8s.yaml.template | kubectl apply -f -
+  envsubst < ./ui-service-k8s.yaml.template | kubectl apply -f -
 else
  echo "Using emptyDir for storage (data will not persist across pod restarts)"
  # Process templates to replace EBS storage with emptyDir
@ -90,4 +114,5 @@ else
  envsubst < ./stack-k8s.yaml.template | sed 's/persistentVolumeClaim:/emptyDir: {}/g' | sed '/claimName:/d' | kubectl apply -f -
  envsubst < ./ingress-k8s.yaml.template | sed 's/persistentVolumeClaim:/emptyDir: {}/g' | sed '/claimName:/d' | kubectl apply -f -
  envsubst < ./ui-k8s.yaml.template | sed 's/persistentVolumeClaim:/emptyDir: {}/g' | sed '/claimName:/d' | kubectl apply -f -
+  envsubst < ./ui-service-k8s.yaml.template | kubectl apply -f -
 fi
--- a/docs/source/distributions/k8s/ingress-k8s.yaml.template
+++ b/docs/source/distributions/k8s/ingress-k8s.yaml.template
@ -6,12 +6,9 @@ spec:
  type: LoadBalancer
  selector:
    app.kubernetes.io/name: llama-stack
+    app.kubernetes.io/component: server
  ports:
    - name: llama-stack-api
      port: 8321
      targetPort: 8321
      protocol: TCP
-    - name: llama-stack-ui
-      port: 8322
-      targetPort: 8322
-      protocol: TCP
--- a/docs/source/distributions/k8s/set-secret.yaml.template
+++ b/docs/source/distributions/k8s/set-secret.yaml.template
@ -1,3 +1,6 @@
+# -------------------------------------------------
+# Hugging Face Token Secret
+# -------------------------------------------------
 apiVersion: v1
 kind: Secret
 metadata:
@ -5,24 +8,22 @@ metadata:
 type: Opaque
 data:
  token: ${HF_TOKEN_BASE64}
-
+---
 # -------------------------------------------------
 # NGC Docker Registry Secret
 # -------------------------------------------------
-
-apiVersion: apps/v1
+apiVersion: v1
 kind: Secret
 metadata:
  name: ngc-docker-registry
 type: kubernetes.io/dockerconfigjson
 data:
  .dockerconfigjson: ${NGC_DOCKER_CONFIG_JSON}
-
+---
 # -------------------------------------------------
 # NGC API Secret
 # -------------------------------------------------
-
-apiVersion: apps/v1
+apiVersion: v1
 kind: Secret
 metadata:
  name: ngc-api
--- a/docs/source/distributions/k8s/stack-k8s.yaml.template
+++ b/docs/source/distributions/k8s/stack-k8s.yaml.template
@ -34,17 +34,16 @@ spec:
        command: ['sh', '-c', 'until nc -z llm-nim-code.default.svc.cluster.local 8000; do echo waiting for llm-nim-code on port 8001; sleep 2; done;']
      containers:
      - name: llama-stack
-        image: llamastack/distribution-starter:latest
-        imagePullPolicy: Always # since we have specified latest instead of a version
+        image: llamastack/distribution-starter:0.2.15
        resources:
          requests:
            memory: "512Mi"
            cpu: "500m"
-            ephemeral-storage: "2Gi"
+            ephemeral-storage: "6Gi"
          limits:
            memory: "1Gi"
            cpu: "1000m"
-            ephemeral-storage: "5Gi"
+            ephemeral-storage: "6Gi"
        env:
        - name: ENABLE_CHROMADB
          value: "true"
--- a/docs/source/distributions/k8s/ui-k8s.yaml.template
+++ b/docs/source/distributions/k8s/ui-k8s.yaml.template
@ -35,7 +35,7 @@ spec:
            apt-get update && apt-get install -y git
            # Clone the repository
            git clone https://github.com/meta-llama/llama-stack.git /app
-
+            git checkout k8s_demo
            # Navigate to the playground directory
            cd /app/llama_stack/distribution/ui

@ -45,4 +45,4 @@ spec:
            # Run the Streamlit app
            streamlit run app.py --server.port=8322 --server.address=0.0.0.0
        ports:
-        - containerPort: 8501
+        - containerPort: 8322
--- a/docs/source/distributions/k8s/ui-service-k8s.yaml.template
+++ b/docs/source/distributions/k8s/ui-service-k8s.yaml.template
@ -0,0 +1,14 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: llama-stack-ui-service
+spec:
+  type: LoadBalancer
+  selector:
+    app.kubernetes.io/name: llama-stack
+    app.kubernetes.io/component: ui
+  ports:
+    - name: llama-stack-ui
+      port: 8322
+      targetPort: 8322
+      protocol: TCP