llama-stack-mirror/.github/actions/setup-vllm/action.yml

name: Setup VLLM
description: Start VLLM
runs:
  using: "composite"
  steps:
    - name: Start VLLM
      shell: bash
      run: |
        # Start vllm container
        docker run -d \
          --name vllm \
          -p 8000:8000 \
          --privileged=true \
          quay.io/higginsd/vllm-cpu:65393ee064-qwen3 \
          --host 0.0.0.0 \
          --port 8000 \
          --enable-auto-tool-choice \
          --tool-call-parser hermes \
          --model /root/.cache/Qwen3-0.6B \
          --served-model-name Qwen/Qwen3-0.6B \
          --max-model-len 8192

          # Wait for vllm to be ready
          echo "Waiting for vllm to be ready..."
          timeout 900 bash -c 'until curl -f http://localhost:8000/health; do
            echo "Waiting for vllm..."
            sleep 5
          done'