more fixes, gah

This commit is contained in:
Ashwin Bharambe 2025-06-01 17:07:18 -07:00
parent 6f4f51f8d9
commit 6cbb3366f2
3 changed files with 16 additions and 1 deletions

View file

@ -31,6 +31,8 @@ spec:
env:
- name: ENABLE_CHROMADB
value: "true"
- name: CHROMADB_URL
value: http://chromadb.default.svc.cluster.local:6000
- name: VLLM_URL
value: http://vllm-server.default.svc.cluster.local:8000/v1
- name: VLLM_MAX_TOKENS

View file

@ -57,3 +57,16 @@ spec:
- name: llama-storage
persistentVolumeClaim:
claimName: vllm-models
---
apiVersion: v1
kind: Service
metadata:
name: vllm-server
spec:
selector:
app.kubernetes.io/name: vllm
ports:
- protocol: TCP
port: 8000
targetPort: 8000
type: ClusterIP

View file

@ -41,7 +41,7 @@ spec:
image: vllm/vllm-openai:latest
command: ["/bin/sh", "-c"]
args: [
"vllm serve ${SAFETY_MODEL} --dtype float16 --enforce-eager --max-model-len 4096 --gpu-memory-utilization 0.3"
"vllm serve ${SAFETY_MODEL} --dtype float16 --enforce-eager --max-model-len 4096 --port 8001 --gpu-memory-utilization 0.3"
]
env:
- name: HUGGING_FACE_HUB_TOKEN