ci: integrate vLLM inference tests with GitHub Actions workflows

Add vLLM provider support to integration test CI workflows alongside existing Ollama support. Configure provider-specific test execution where vLLM runs only inference specific tests (excluding vision tests) while Ollama continues to run the full test suite. This enables comprehensive CI testing of both inference providers but keeps the vLLM footprint small, this can be expanded later if it proves to not be too disruptive. Signed-off-by: Derek Higgins <derekh@redhat.com>
2025-10-03 19:57:35 +00:00 · 2025-08-13 14:19:52 +01:00 · 2025-08-13 14:19:52 +01:00 · 746e9c91a9
commit 746e9c91a9
parent 666d6a6fc0
3 changed files with 27 additions and 7 deletions
--- a/.github/actions/run-and-record-tests/action.yml
+++ b/.github/actions/run-and-record-tests/action.yml
@ -68,7 +68,8 @@ runs:
          echo "New recordings detected, committing and pushing"
          git add tests/integration/recordings/
-          git commit -m "Recordings update from CI (suite: ${{ inputs.suite }})"
+          git commit -m "Recordings update from CI (setup: ${{ inputs.setup }}, suite: ${{ inputs.suite }})"
          git fetch origin ${{ github.ref_name }}
          git rebase origin/${{ github.ref_name }}
          echo "Rebased successfully"
@ -82,7 +83,8 @@ runs:
      if: ${{ always() }}
      shell: bash
      run: |
-        sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log || true
+        sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log 2>&1 || true
        sudo docker logs vllm > vllm-${{ inputs.inference-mode }}.log 2>&1 || true
    - name: Upload logs
      if: ${{ always() }}
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@ -21,7 +21,6 @@ on:
  schedule:
    # If changing the cron schedule, update the provider in the test-matrix job
    - cron: '0 0 * * *'  # (test latest client) Daily at 12 AM UTC
    - cron: '1 0 * * 0'  # (test vllm) Weekly on Sunday at 1 AM UTC
  workflow_dispatch:
    inputs:
      test-all-client-versions:
@ -48,24 +47,38 @@ jobs:
      fail-fast: false
      matrix:
        client-type: [library, server]
        # Use vllm on weekly schedule, otherwise use test-setup input (defaults to ollama)
        setup: ${{ (github.event.schedule == '1 0 * * 0') && fromJSON('["vllm"]') || fromJSON(format('["{0}"]', github.event.inputs.test-setup || 'ollama')) }}
        # Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
        python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
        client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
        setup: [ollama, vllm]
        suite: [base, vision]
        exclude:
          - setup: vllm
            suite: vision
    steps:
      - name: Checkout repository
        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
      # This could in theory be done in the matrix, but it was getting too complex
      - name: Update Matrix
        id: update-matrix
        run: |
          REWRITTEN_SUITE="${{ matrix.suite }}"
          if [[ "${{ matrix.setup }}" == "vllm" && "${{ matrix.suite }}" == "base" ]]; then
            REWRITTEN_SUITE="base-vllm-subset"
          fi
          echo "suite=${REWRITTEN_SUITE}" >> $GITHUB_OUTPUT
          echo "Rewritten suite: ${REWRITTEN_SUITE}"
      - name: Setup test environment
        uses: ./.github/actions/setup-test-environment
        with:
          python-version: ${{ matrix.python-version }}
          client-version: ${{ matrix.client-version }}
          setup: ${{ matrix.setup }}
-          suite: ${{ matrix.suite }}
+          suite: ${{ steps.update-matrix.outputs.suite }}
          inference-mode: 'replay'
      - name: Run tests
@ -74,4 +87,4 @@ jobs:
          stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
          setup: ${{ matrix.setup }}
          inference-mode: 'replay'
-          suite: ${{ matrix.suite }}
+          suite: ${{ steps.update-matrix.outputs.suite }}
--- a/tests/integration/suites.py
+++ b/tests/integration/suites.py
@ -147,6 +147,11 @@ SUITE_DEFINITIONS: dict[str, Suite] = {
        roots=base_roots,
        default_setup="ollama",
    ),
    "base-vllm-subset": Suite(
        name="base-vllm-subset",
        roots=["tests/integration/inference"],
        default_setup="vllm",
    ),
    "responses": Suite(
        name="responses",
        roots=["tests/integration/responses"],