ci: integrate vLLM inference tests with GitHub Actions workflows

Add vLLM provider support to integration test CI workflows alongside
existing Ollama support. Configure provider-specific test execution
where vLLM runs only inference specific tests (excluding vision tests) while
Ollama continues to run the full test suite.

This enables comprehensive CI testing of both inference providers but
keeps the vLLM footprint small, this can be expanded later if it proves
to not be too disruptive.

Signed-off-by: Derek Higgins <derekh@redhat.com>
This commit is contained in:
Derek Higgins 2025-08-13 14:19:52 +01:00
parent 666d6a6fc0
commit 746e9c91a9
3 changed files with 27 additions and 7 deletions

View file

@ -68,7 +68,8 @@ runs:
echo "New recordings detected, committing and pushing" echo "New recordings detected, committing and pushing"
git add tests/integration/recordings/ git add tests/integration/recordings/
git commit -m "Recordings update from CI (suite: ${{ inputs.suite }})" git commit -m "Recordings update from CI (setup: ${{ inputs.setup }}, suite: ${{ inputs.suite }})"
git fetch origin ${{ github.ref_name }} git fetch origin ${{ github.ref_name }}
git rebase origin/${{ github.ref_name }} git rebase origin/${{ github.ref_name }}
echo "Rebased successfully" echo "Rebased successfully"
@ -82,7 +83,8 @@ runs:
if: ${{ always() }} if: ${{ always() }}
shell: bash shell: bash
run: | run: |
sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log || true sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log 2>&1 || true
sudo docker logs vllm > vllm-${{ inputs.inference-mode }}.log 2>&1 || true
- name: Upload logs - name: Upload logs
if: ${{ always() }} if: ${{ always() }}

View file

@ -21,7 +21,6 @@ on:
schedule: schedule:
# If changing the cron schedule, update the provider in the test-matrix job # If changing the cron schedule, update the provider in the test-matrix job
- cron: '0 0 * * *' # (test latest client) Daily at 12 AM UTC - cron: '0 0 * * *' # (test latest client) Daily at 12 AM UTC
- cron: '1 0 * * 0' # (test vllm) Weekly on Sunday at 1 AM UTC
workflow_dispatch: workflow_dispatch:
inputs: inputs:
test-all-client-versions: test-all-client-versions:
@ -48,24 +47,38 @@ jobs:
fail-fast: false fail-fast: false
matrix: matrix:
client-type: [library, server] client-type: [library, server]
# Use vllm on weekly schedule, otherwise use test-setup input (defaults to ollama)
setup: ${{ (github.event.schedule == '1 0 * * 0') && fromJSON('["vllm"]') || fromJSON(format('["{0}"]', github.event.inputs.test-setup || 'ollama')) }}
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12 # Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }} python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }} client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
setup: [ollama, vllm]
suite: [base, vision] suite: [base, vision]
exclude:
- setup: vllm
suite: vision
steps: steps:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
# This could in theory be done in the matrix, but it was getting too complex
- name: Update Matrix
id: update-matrix
run: |
REWRITTEN_SUITE="${{ matrix.suite }}"
if [[ "${{ matrix.setup }}" == "vllm" && "${{ matrix.suite }}" == "base" ]]; then
REWRITTEN_SUITE="base-vllm-subset"
fi
echo "suite=${REWRITTEN_SUITE}" >> $GITHUB_OUTPUT
echo "Rewritten suite: ${REWRITTEN_SUITE}"
- name: Setup test environment - name: Setup test environment
uses: ./.github/actions/setup-test-environment uses: ./.github/actions/setup-test-environment
with: with:
python-version: ${{ matrix.python-version }} python-version: ${{ matrix.python-version }}
client-version: ${{ matrix.client-version }} client-version: ${{ matrix.client-version }}
setup: ${{ matrix.setup }} setup: ${{ matrix.setup }}
suite: ${{ matrix.suite }} suite: ${{ steps.update-matrix.outputs.suite }}
inference-mode: 'replay' inference-mode: 'replay'
- name: Run tests - name: Run tests
@ -74,4 +87,4 @@ jobs:
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }} stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
setup: ${{ matrix.setup }} setup: ${{ matrix.setup }}
inference-mode: 'replay' inference-mode: 'replay'
suite: ${{ matrix.suite }} suite: ${{ steps.update-matrix.outputs.suite }}

View file

@ -147,6 +147,11 @@ SUITE_DEFINITIONS: dict[str, Suite] = {
roots=base_roots, roots=base_roots,
default_setup="ollama", default_setup="ollama",
), ),
"base-vllm-subset": Suite(
name="base-vllm-subset",
roots=["tests/integration/inference"],
default_setup="vllm",
),
"responses": Suite( "responses": Suite(
name="responses", name="responses",
roots=["tests/integration/responses"], roots=["tests/integration/responses"],