working now

This commit is contained in:
Kai Wu 2025-07-31 10:19:53 -07:00
parent 1cb9d3bca2
commit b63982ef00
6 changed files with 59 additions and 23 deletions

View file

@ -21,7 +21,8 @@ export CODE_MODEL=bigcode/starcoder2-7b
# Set USE_EBS to false if you don't have permission to use EKS EBS
export USE_EBS=${USE_EBS:-false}
set -euo pipefail
set -x
# HF_TOKEN should be set by the user; base64 encode it for the secret
if [ -n "${HF_TOKEN:-}" ]; then
export HF_TOKEN_BASE64=$(echo -n "$HF_TOKEN" | base64)
@ -50,13 +51,35 @@ fi
# Apply the HF token secret if HF_TOKEN is provided
if [ -n "${HF_TOKEN:-}" ]; then
envsubst < ./set-secret.yaml.template | kubectl apply -f -
fi
# Create secrets and capture any errors
echo "Creating Kubernetes secrets..."
SECRET_OUTPUT=$(envsubst < ./set-secret.yaml.template | kubectl apply -f - 2>&1) || {
echo "ERROR: Failed to create secrets. Error output:"
echo "$SECRET_OUTPUT"
exit 1
}
set -euo pipefail
set -x
# Wait a moment for Kubernetes to process the secrets
echo "Waiting for secrets to be processed..."
sleep 2
# Verify that the secrets were created successfully
echo "Verifying that secrets were created successfully..."
# Check each secret with better error handling
for SECRET_NAME in "hf-token-secret" "ngc-docker-registry" "ngc-api"; do
echo "Checking for secret: $SECRET_NAME"
if ! kubectl get secret "$SECRET_NAME" &> /dev/null; then
echo "ERROR: Secret '$SECRET_NAME' not found in the cluster."
echo "Secret creation output was:"
echo "$SECRET_OUTPUT"
echo "Current secrets in the namespace:"
kubectl get secrets
exit 1
fi
done
echo "Secret verification successful. All required secrets are present."
# Apply templates with appropriate storage configuration based on USE_EBS setting
if [ "$USE_EBS" = "true" ]; then
echo "Using EBS storage for persistent volumes"
@ -73,6 +96,7 @@ if [ "$USE_EBS" = "true" ]; then
envsubst < ./stack-k8s.yaml.template | kubectl apply -f -
envsubst < ./ingress-k8s.yaml.template | kubectl apply -f -
envsubst < ./ui-k8s.yaml.template | kubectl apply -f -
envsubst < ./ui-service-k8s.yaml.template | kubectl apply -f -
else
echo "Using emptyDir for storage (data will not persist across pod restarts)"
# Process templates to replace EBS storage with emptyDir
@ -90,4 +114,5 @@ else
envsubst < ./stack-k8s.yaml.template | sed 's/persistentVolumeClaim:/emptyDir: {}/g' | sed '/claimName:/d' | kubectl apply -f -
envsubst < ./ingress-k8s.yaml.template | sed 's/persistentVolumeClaim:/emptyDir: {}/g' | sed '/claimName:/d' | kubectl apply -f -
envsubst < ./ui-k8s.yaml.template | sed 's/persistentVolumeClaim:/emptyDir: {}/g' | sed '/claimName:/d' | kubectl apply -f -
envsubst < ./ui-service-k8s.yaml.template | kubectl apply -f -
fi

View file

@ -6,12 +6,9 @@ spec:
type: LoadBalancer
selector:
app.kubernetes.io/name: llama-stack
app.kubernetes.io/component: server
ports:
- name: llama-stack-api
port: 8321
targetPort: 8321
protocol: TCP
- name: llama-stack-ui
port: 8322
targetPort: 8322
protocol: TCP

View file

@ -1,3 +1,6 @@
# -------------------------------------------------
# Hugging Face Token Secret
# -------------------------------------------------
apiVersion: v1
kind: Secret
metadata:
@ -5,24 +8,22 @@ metadata:
type: Opaque
data:
token: ${HF_TOKEN_BASE64}
---
# -------------------------------------------------
# NGC Docker Registry Secret
# -------------------------------------------------
apiVersion: apps/v1
apiVersion: v1
kind: Secret
metadata:
name: ngc-docker-registry
type: kubernetes.io/dockerconfigjson
data:
.dockerconfigjson: ${NGC_DOCKER_CONFIG_JSON}
---
# -------------------------------------------------
# NGC API Secret
# -------------------------------------------------
apiVersion: apps/v1
apiVersion: v1
kind: Secret
metadata:
name: ngc-api

View file

@ -34,17 +34,16 @@ spec:
command: ['sh', '-c', 'until nc -z llm-nim-code.default.svc.cluster.local 8000; do echo waiting for llm-nim-code on port 8001; sleep 2; done;']
containers:
- name: llama-stack
image: llamastack/distribution-starter:latest
imagePullPolicy: Always # since we have specified latest instead of a version
image: llamastack/distribution-starter:0.2.15
resources:
requests:
memory: "512Mi"
cpu: "500m"
ephemeral-storage: "2Gi"
ephemeral-storage: "6Gi"
limits:
memory: "1Gi"
cpu: "1000m"
ephemeral-storage: "5Gi"
ephemeral-storage: "6Gi"
env:
- name: ENABLE_CHROMADB
value: "true"

View file

@ -35,7 +35,7 @@ spec:
apt-get update && apt-get install -y git
# Clone the repository
git clone https://github.com/meta-llama/llama-stack.git /app
git checkout k8s_demo
# Navigate to the playground directory
cd /app/llama_stack/distribution/ui
@ -45,4 +45,4 @@ spec:
# Run the Streamlit app
streamlit run app.py --server.port=8322 --server.address=0.0.0.0
ports:
- containerPort: 8501
- containerPort: 8322

View file

@ -0,0 +1,14 @@
apiVersion: v1
kind: Service
metadata:
name: llama-stack-ui-service
spec:
type: LoadBalancer
selector:
app.kubernetes.io/name: llama-stack
app.kubernetes.io/component: ui
ports:
- name: llama-stack-ui
port: 8322
targetPort: 8322
protocol: TCP