name: Setup VLLM description: Start VLLM runs: using: "composite" steps: - name: Start VLLM shell: bash run: | # Start vllm container docker run -d \ --name vllm \ -p 8000:8000 \ --privileged=true \ quay.io/higginsd/vllm-cpu:65393ee064-qwen3 \ --host 0.0.0.0 \ --port 8000 \ --enable-auto-tool-choice \ --tool-call-parser hermes \ --model /root/.cache/Qwen3-0.6B \ --served-model-name Qwen/Qwen3-0.6B \ --max-model-len 8192 # Wait for vllm to be ready echo "Waiting for vllm to be ready..." timeout 900 bash -c 'until curl -f http://localhost:8000/health; do echo "Waiting for vllm..." sleep 5 done'