mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-07 02:58:21 +00:00
Pod -> Deployment, NodePort -> ClusterIP
Signed-off-by: Yuan Tang <terrytangyuan@gmail.com>
This commit is contained in:
parent
abad607bf8
commit
ddba43fada
1 changed files with 67 additions and 52 deletions
|
@ -32,40 +32,47 @@ type: Opaque
|
||||||
data:
|
data:
|
||||||
token: "<YOUR-HF-TOKEN>"
|
token: "<YOUR-HF-TOKEN>"
|
||||||
---
|
---
|
||||||
apiVersion: v1
|
apiVersion: apps/v1
|
||||||
kind: Pod
|
kind: Deployment
|
||||||
metadata:
|
metadata:
|
||||||
name: vllm-server
|
name: vllm-server
|
||||||
labels:
|
|
||||||
app: vllm
|
|
||||||
spec:
|
spec:
|
||||||
containers:
|
replicas: 1
|
||||||
- name: llama-stack
|
selector:
|
||||||
image: <VLLM-IMAGE>
|
matchLabels:
|
||||||
command:
|
app.kubernetes.io/name: vllm
|
||||||
- bash
|
template:
|
||||||
- -c
|
metadata:
|
||||||
- |
|
labels:
|
||||||
MODEL="meta-llama/Llama-3.2-1B-Instruct"
|
app.kubernetes.io/name: vllm
|
||||||
MODEL_PATH=/app/model/$(basename $MODEL)
|
spec:
|
||||||
huggingface-cli login --token $HUGGING_FACE_HUB_TOKEN
|
containers:
|
||||||
huggingface-cli download $MODEL --local-dir $MODEL_PATH --cache-dir $MODEL_PATH
|
- name: llama-stack
|
||||||
python3 -m vllm.entrypoints.openai.api_server --model $MODEL_PATH --served-model-name $MODEL --port 8000
|
image: <VLLM-IMAGE>
|
||||||
ports:
|
command:
|
||||||
- containerPort: 8000
|
- bash
|
||||||
volumeMounts:
|
- -c
|
||||||
|
- |
|
||||||
|
MODEL="meta-llama/Llama-3.2-1B-Instruct"
|
||||||
|
MODEL_PATH=/app/model/$(basename $MODEL)
|
||||||
|
huggingface-cli login --token $HUGGING_FACE_HUB_TOKEN
|
||||||
|
huggingface-cli download $MODEL --local-dir $MODEL_PATH --cache-dir $MODEL_PATH
|
||||||
|
python3 -m vllm.entrypoints.openai.api_server --model $MODEL_PATH --served-model-name $MODEL --port 8000
|
||||||
|
ports:
|
||||||
|
- containerPort: 8000
|
||||||
|
volumeMounts:
|
||||||
|
- name: llama-storage
|
||||||
|
mountPath: /app/model
|
||||||
|
env:
|
||||||
|
- name: HUGGING_FACE_HUB_TOKEN
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: hf-token-secret
|
||||||
|
key: token
|
||||||
|
volumes:
|
||||||
- name: llama-storage
|
- name: llama-storage
|
||||||
mountPath: /app/model
|
persistentVolumeClaim:
|
||||||
env:
|
claimName: vllm-models
|
||||||
- name: HUGGING_FACE_HUB_TOKEN
|
|
||||||
valueFrom:
|
|
||||||
secretKeyRef:
|
|
||||||
name: hf-token-secret
|
|
||||||
key: token
|
|
||||||
volumes:
|
|
||||||
- name: llama-storage
|
|
||||||
persistentVolumeClaim:
|
|
||||||
claimName: vllm-models
|
|
||||||
---
|
---
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: Service
|
kind: Service
|
||||||
|
@ -73,11 +80,12 @@ metadata:
|
||||||
name: vllm-server
|
name: vllm-server
|
||||||
spec:
|
spec:
|
||||||
selector:
|
selector:
|
||||||
app: vllm
|
app.kubernetes.io/name: vllm
|
||||||
ports:
|
ports:
|
||||||
- port: 8000
|
- protocol: TCP
|
||||||
|
port: 8000
|
||||||
targetPort: 8000
|
targetPort: 8000
|
||||||
type: NodePort
|
type: ClusterIP
|
||||||
EOF
|
EOF
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -135,27 +143,34 @@ spec:
|
||||||
requests:
|
requests:
|
||||||
storage: 1Gi
|
storage: 1Gi
|
||||||
---
|
---
|
||||||
apiVersion: v1
|
apiVersion: apps/v1
|
||||||
kind: Pod
|
kind: Deployment
|
||||||
metadata:
|
metadata:
|
||||||
name: llama-stack-pod
|
name: llama-stack-server
|
||||||
labels:
|
|
||||||
app: llama-stack
|
|
||||||
spec:
|
spec:
|
||||||
containers:
|
replicas: 1
|
||||||
- name: llama-stack
|
selector:
|
||||||
image: localhost/llama-stack-run-k8s:latest
|
matchLabels:
|
||||||
imagePullPolicy: IfNotPresent
|
app.kubernetes.io/name: llama-stack
|
||||||
command: ["python", "-m", "llama_stack.distribution.server.server", "--yaml-config", "/app/config.yaml"]
|
template:
|
||||||
ports:
|
metadata:
|
||||||
- containerPort: 5000
|
labels:
|
||||||
volumeMounts:
|
app.kubernetes.io/name: llama-stack
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: llama-stack
|
||||||
|
image: localhost/llama-stack-run-k8s:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
command: ["python", "-m", "llama_stack.distribution.server.server", "--yaml-config", "/app/config.yaml"]
|
||||||
|
ports:
|
||||||
|
- containerPort: 5000
|
||||||
|
volumeMounts:
|
||||||
|
- name: llama-storage
|
||||||
|
mountPath: /root/.llama
|
||||||
|
volumes:
|
||||||
- name: llama-storage
|
- name: llama-storage
|
||||||
mountPath: /root/.llama
|
persistentVolumeClaim:
|
||||||
volumes:
|
claimName: llama-pvc
|
||||||
- name: llama-storage
|
|
||||||
persistentVolumeClaim:
|
|
||||||
claimName: llama-pvc
|
|
||||||
---
|
---
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: Service
|
kind: Service
|
||||||
|
@ -163,7 +178,7 @@ metadata:
|
||||||
name: llama-stack-service
|
name: llama-stack-service
|
||||||
spec:
|
spec:
|
||||||
selector:
|
selector:
|
||||||
app: llama-stack
|
app.kubernetes.io/name: llama-stack
|
||||||
ports:
|
ports:
|
||||||
- protocol: TCP
|
- protocol: TCP
|
||||||
port: 5000
|
port: 5000
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue