mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-13 00:26:10 +00:00
make it work on gpus
This commit is contained in:
parent
ee96c4891b
commit
f99ca37f91
7 changed files with 30 additions and 21 deletions
|
@ -16,6 +16,14 @@ export SAFETY_MODEL=${SAFETY_MODEL:-meta-llama/Llama-Guard-3-1B}
|
|||
set -euo pipefail
|
||||
set -x
|
||||
|
||||
# Install NVIDIA device plugin for GPU support
|
||||
echo "Installing NVIDIA device plugin..."
|
||||
kubectl apply -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/refs/tags/v0.17.2/deployments/static/nvidia-device-plugin.yml
|
||||
|
||||
# Wait for NVIDIA device plugin to be ready
|
||||
echo "Waiting for NVIDIA device plugin to be ready..."
|
||||
kubectl wait --for=condition=ready pod -l name=nvidia-device-plugin-ds -n kube-system --timeout=300s
|
||||
|
||||
envsubst < ./vllm-k8s.yaml.template | kubectl apply -f -
|
||||
envsubst < ./vllm-safety-k8s.yaml.template | kubectl apply -f -
|
||||
envsubst < ./postgres-k8s.yaml.template | kubectl apply -f -
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue