docs: Simplify vLLM deployment in K8s deployment guide

Signed-off-by: Yuan Tang <terrytangyuan@gmail.com>
This commit is contained in:
Yuan Tang 2025-03-15 11:15:12 -04:00
parent 60ae7455f6
commit 556089cc3c
No known key found for this signature in database

View file

@ -8,7 +8,7 @@ First, create a local Kubernetes cluster via Kind:
kind create cluster --image kindest/node:v1.32.0 --name llama-stack-test kind create cluster --image kindest/node:v1.32.0 --name llama-stack-test
``` ```
Start vLLM server as a Kubernetes Pod and Service: First, create a Kubernetes PVC and Secret for downloading and storing Hugging Face model:
```bash ```bash
cat <<EOF |kubectl apply -f - cat <<EOF |kubectl apply -f -
@ -31,7 +31,12 @@ metadata:
type: Opaque type: Opaque
data: data:
token: $(HF_TOKEN) token: $(HF_TOKEN)
--- ```
Next, start the vLLM server as a Kubernetes Deployment and Service:
```bash
cat <<EOF |kubectl apply -f -
apiVersion: apps/v1 apiVersion: apps/v1
kind: Deployment kind: Deployment
metadata: metadata:
@ -47,28 +52,23 @@ spec:
app.kubernetes.io/name: vllm app.kubernetes.io/name: vllm
spec: spec:
containers: containers:
- name: llama-stack - name: vllm
image: $(VLLM_IMAGE) image: vllm/vllm-openai:latest
command: command: ["/bin/sh", "-c"]
- bash args: [
- -c "vllm serve meta-llama/Llama-3.2-1B-Instruct"
- | ]
MODEL="meta-llama/Llama-3.2-1B-Instruct" env:
MODEL_PATH=/app/model/$(basename $MODEL) - name: HUGGING_FACE_HUB_TOKEN
huggingface-cli login --token $HUGGING_FACE_HUB_TOKEN valueFrom:
huggingface-cli download $MODEL --local-dir $MODEL_PATH --cache-dir $MODEL_PATH secretKeyRef:
python3 -m vllm.entrypoints.openai.api_server --model $MODEL_PATH --served-model-name $MODEL --port 8000 name: hf-token-secret
key: token
ports: ports:
- containerPort: 8000 - containerPort: 8000
volumeMounts: volumeMounts:
- name: llama-storage - name: llama-storage
mountPath: /app/model mountPath: /root/.cache/huggingface
env:
- name: HUGGING_FACE_HUB_TOKEN
valueFrom:
secretKeyRef:
name: hf-token-secret
key: token
volumes: volumes:
- name: llama-storage - name: llama-storage
persistentVolumeClaim: persistentVolumeClaim: