mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-02 00:34:44 +00:00
working now
This commit is contained in:
parent
1cb9d3bca2
commit
b63982ef00
6 changed files with 59 additions and 23 deletions
|
@ -21,7 +21,8 @@ export CODE_MODEL=bigcode/starcoder2-7b
|
||||||
|
|
||||||
# Set USE_EBS to false if you don't have permission to use EKS EBS
|
# Set USE_EBS to false if you don't have permission to use EKS EBS
|
||||||
export USE_EBS=${USE_EBS:-false}
|
export USE_EBS=${USE_EBS:-false}
|
||||||
|
set -euo pipefail
|
||||||
|
set -x
|
||||||
# HF_TOKEN should be set by the user; base64 encode it for the secret
|
# HF_TOKEN should be set by the user; base64 encode it for the secret
|
||||||
if [ -n "${HF_TOKEN:-}" ]; then
|
if [ -n "${HF_TOKEN:-}" ]; then
|
||||||
export HF_TOKEN_BASE64=$(echo -n "$HF_TOKEN" | base64)
|
export HF_TOKEN_BASE64=$(echo -n "$HF_TOKEN" | base64)
|
||||||
|
@ -50,13 +51,35 @@ fi
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Apply the HF token secret if HF_TOKEN is provided
|
# Create secrets and capture any errors
|
||||||
if [ -n "${HF_TOKEN:-}" ]; then
|
echo "Creating Kubernetes secrets..."
|
||||||
envsubst < ./set-secret.yaml.template | kubectl apply -f -
|
SECRET_OUTPUT=$(envsubst < ./set-secret.yaml.template | kubectl apply -f - 2>&1) || {
|
||||||
fi
|
echo "ERROR: Failed to create secrets. Error output:"
|
||||||
|
echo "$SECRET_OUTPUT"
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
set -euo pipefail
|
# Wait a moment for Kubernetes to process the secrets
|
||||||
set -x
|
echo "Waiting for secrets to be processed..."
|
||||||
|
sleep 2
|
||||||
|
|
||||||
|
# Verify that the secrets were created successfully
|
||||||
|
echo "Verifying that secrets were created successfully..."
|
||||||
|
|
||||||
|
# Check each secret with better error handling
|
||||||
|
for SECRET_NAME in "hf-token-secret" "ngc-docker-registry" "ngc-api"; do
|
||||||
|
echo "Checking for secret: $SECRET_NAME"
|
||||||
|
if ! kubectl get secret "$SECRET_NAME" &> /dev/null; then
|
||||||
|
echo "ERROR: Secret '$SECRET_NAME' not found in the cluster."
|
||||||
|
echo "Secret creation output was:"
|
||||||
|
echo "$SECRET_OUTPUT"
|
||||||
|
echo "Current secrets in the namespace:"
|
||||||
|
kubectl get secrets
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "Secret verification successful. All required secrets are present."
|
||||||
# Apply templates with appropriate storage configuration based on USE_EBS setting
|
# Apply templates with appropriate storage configuration based on USE_EBS setting
|
||||||
if [ "$USE_EBS" = "true" ]; then
|
if [ "$USE_EBS" = "true" ]; then
|
||||||
echo "Using EBS storage for persistent volumes"
|
echo "Using EBS storage for persistent volumes"
|
||||||
|
@ -73,6 +96,7 @@ if [ "$USE_EBS" = "true" ]; then
|
||||||
envsubst < ./stack-k8s.yaml.template | kubectl apply -f -
|
envsubst < ./stack-k8s.yaml.template | kubectl apply -f -
|
||||||
envsubst < ./ingress-k8s.yaml.template | kubectl apply -f -
|
envsubst < ./ingress-k8s.yaml.template | kubectl apply -f -
|
||||||
envsubst < ./ui-k8s.yaml.template | kubectl apply -f -
|
envsubst < ./ui-k8s.yaml.template | kubectl apply -f -
|
||||||
|
envsubst < ./ui-service-k8s.yaml.template | kubectl apply -f -
|
||||||
else
|
else
|
||||||
echo "Using emptyDir for storage (data will not persist across pod restarts)"
|
echo "Using emptyDir for storage (data will not persist across pod restarts)"
|
||||||
# Process templates to replace EBS storage with emptyDir
|
# Process templates to replace EBS storage with emptyDir
|
||||||
|
@ -90,4 +114,5 @@ else
|
||||||
envsubst < ./stack-k8s.yaml.template | sed 's/persistentVolumeClaim:/emptyDir: {}/g' | sed '/claimName:/d' | kubectl apply -f -
|
envsubst < ./stack-k8s.yaml.template | sed 's/persistentVolumeClaim:/emptyDir: {}/g' | sed '/claimName:/d' | kubectl apply -f -
|
||||||
envsubst < ./ingress-k8s.yaml.template | sed 's/persistentVolumeClaim:/emptyDir: {}/g' | sed '/claimName:/d' | kubectl apply -f -
|
envsubst < ./ingress-k8s.yaml.template | sed 's/persistentVolumeClaim:/emptyDir: {}/g' | sed '/claimName:/d' | kubectl apply -f -
|
||||||
envsubst < ./ui-k8s.yaml.template | sed 's/persistentVolumeClaim:/emptyDir: {}/g' | sed '/claimName:/d' | kubectl apply -f -
|
envsubst < ./ui-k8s.yaml.template | sed 's/persistentVolumeClaim:/emptyDir: {}/g' | sed '/claimName:/d' | kubectl apply -f -
|
||||||
|
envsubst < ./ui-service-k8s.yaml.template | kubectl apply -f -
|
||||||
fi
|
fi
|
||||||
|
|
|
@ -6,12 +6,9 @@ spec:
|
||||||
type: LoadBalancer
|
type: LoadBalancer
|
||||||
selector:
|
selector:
|
||||||
app.kubernetes.io/name: llama-stack
|
app.kubernetes.io/name: llama-stack
|
||||||
|
app.kubernetes.io/component: server
|
||||||
ports:
|
ports:
|
||||||
- name: llama-stack-api
|
- name: llama-stack-api
|
||||||
port: 8321
|
port: 8321
|
||||||
targetPort: 8321
|
targetPort: 8321
|
||||||
protocol: TCP
|
protocol: TCP
|
||||||
- name: llama-stack-ui
|
|
||||||
port: 8322
|
|
||||||
targetPort: 8322
|
|
||||||
protocol: TCP
|
|
||||||
|
|
|
@ -1,3 +1,6 @@
|
||||||
|
# -------------------------------------------------
|
||||||
|
# Hugging Face Token Secret
|
||||||
|
# -------------------------------------------------
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: Secret
|
kind: Secret
|
||||||
metadata:
|
metadata:
|
||||||
|
@ -5,24 +8,22 @@ metadata:
|
||||||
type: Opaque
|
type: Opaque
|
||||||
data:
|
data:
|
||||||
token: ${HF_TOKEN_BASE64}
|
token: ${HF_TOKEN_BASE64}
|
||||||
|
---
|
||||||
# -------------------------------------------------
|
# -------------------------------------------------
|
||||||
# NGC Docker Registry Secret
|
# NGC Docker Registry Secret
|
||||||
# -------------------------------------------------
|
# -------------------------------------------------
|
||||||
|
apiVersion: v1
|
||||||
apiVersion: apps/v1
|
|
||||||
kind: Secret
|
kind: Secret
|
||||||
metadata:
|
metadata:
|
||||||
name: ngc-docker-registry
|
name: ngc-docker-registry
|
||||||
type: kubernetes.io/dockerconfigjson
|
type: kubernetes.io/dockerconfigjson
|
||||||
data:
|
data:
|
||||||
.dockerconfigjson: ${NGC_DOCKER_CONFIG_JSON}
|
.dockerconfigjson: ${NGC_DOCKER_CONFIG_JSON}
|
||||||
|
---
|
||||||
# -------------------------------------------------
|
# -------------------------------------------------
|
||||||
# NGC API Secret
|
# NGC API Secret
|
||||||
# -------------------------------------------------
|
# -------------------------------------------------
|
||||||
|
apiVersion: v1
|
||||||
apiVersion: apps/v1
|
|
||||||
kind: Secret
|
kind: Secret
|
||||||
metadata:
|
metadata:
|
||||||
name: ngc-api
|
name: ngc-api
|
||||||
|
|
|
@ -34,17 +34,16 @@ spec:
|
||||||
command: ['sh', '-c', 'until nc -z llm-nim-code.default.svc.cluster.local 8000; do echo waiting for llm-nim-code on port 8001; sleep 2; done;']
|
command: ['sh', '-c', 'until nc -z llm-nim-code.default.svc.cluster.local 8000; do echo waiting for llm-nim-code on port 8001; sleep 2; done;']
|
||||||
containers:
|
containers:
|
||||||
- name: llama-stack
|
- name: llama-stack
|
||||||
image: llamastack/distribution-starter:latest
|
image: llamastack/distribution-starter:0.2.15
|
||||||
imagePullPolicy: Always # since we have specified latest instead of a version
|
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
memory: "512Mi"
|
memory: "512Mi"
|
||||||
cpu: "500m"
|
cpu: "500m"
|
||||||
ephemeral-storage: "2Gi"
|
ephemeral-storage: "6Gi"
|
||||||
limits:
|
limits:
|
||||||
memory: "1Gi"
|
memory: "1Gi"
|
||||||
cpu: "1000m"
|
cpu: "1000m"
|
||||||
ephemeral-storage: "5Gi"
|
ephemeral-storage: "6Gi"
|
||||||
env:
|
env:
|
||||||
- name: ENABLE_CHROMADB
|
- name: ENABLE_CHROMADB
|
||||||
value: "true"
|
value: "true"
|
||||||
|
|
|
@ -35,7 +35,7 @@ spec:
|
||||||
apt-get update && apt-get install -y git
|
apt-get update && apt-get install -y git
|
||||||
# Clone the repository
|
# Clone the repository
|
||||||
git clone https://github.com/meta-llama/llama-stack.git /app
|
git clone https://github.com/meta-llama/llama-stack.git /app
|
||||||
|
git checkout k8s_demo
|
||||||
# Navigate to the playground directory
|
# Navigate to the playground directory
|
||||||
cd /app/llama_stack/distribution/ui
|
cd /app/llama_stack/distribution/ui
|
||||||
|
|
||||||
|
@ -45,4 +45,4 @@ spec:
|
||||||
# Run the Streamlit app
|
# Run the Streamlit app
|
||||||
streamlit run app.py --server.port=8322 --server.address=0.0.0.0
|
streamlit run app.py --server.port=8322 --server.address=0.0.0.0
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 8501
|
- containerPort: 8322
|
||||||
|
|
14
docs/source/distributions/k8s/ui-service-k8s.yaml.template
Normal file
14
docs/source/distributions/k8s/ui-service-k8s.yaml.template
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: llama-stack-ui-service
|
||||||
|
spec:
|
||||||
|
type: LoadBalancer
|
||||||
|
selector:
|
||||||
|
app.kubernetes.io/name: llama-stack
|
||||||
|
app.kubernetes.io/component: ui
|
||||||
|
ports:
|
||||||
|
- name: llama-stack-ui
|
||||||
|
port: 8322
|
||||||
|
targetPort: 8322
|
||||||
|
protocol: TCP
|
Loading…
Add table
Add a link
Reference in a new issue