This commit is contained in:
Kai Wu 2025-08-03 14:01:27 -07:00
parent 4e19f15bca
commit dcc47c2008
6 changed files with 351 additions and 8927 deletions

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load diff

View file

@ -39,7 +39,7 @@ spec:
image: vllm/vllm-openai:latest
command: ["/bin/sh", "-c"]
args:
- "vllm serve ${INFERENCE_MODEL} --enforce-eager --max-model-len 8192 --gpu-memory-utilization 0.7 --enable-auto-tool-choice --tool-call-parser llama3_json --max-num-seqs 4 --port 8001"
- "vllm serve ${INFERENCE_MODEL} --enforce-eager --max-model-len 100000 --gpu-memory-utilization 0.9 --enable-auto-tool-choice --tool-call-parser llama3_json --max-num-seqs 2 --port 8001"
env:
- name: INFERENCE_MODEL
value: "${INFERENCE_MODEL}"