more fixes, gah

This commit is contained in:
Ashwin Bharambe 2025-06-01 17:07:18 -07:00
parent 6f4f51f8d9
commit 6cbb3366f2
3 changed files with 16 additions and 1 deletions

View file

@ -41,7 +41,7 @@ spec:
image: vllm/vllm-openai:latest
command: ["/bin/sh", "-c"]
args: [
"vllm serve ${SAFETY_MODEL} --dtype float16 --enforce-eager --max-model-len 4096 --gpu-memory-utilization 0.3"
"vllm serve ${SAFETY_MODEL} --dtype float16 --enforce-eager --max-model-len 4096 --port 8001 --gpu-memory-utilization 0.3"
]
env:
- name: HUGGING_FACE_HUB_TOKEN