ci: integrate vLLM inference tests with GitHub Actions workflows

Add vLLM provider support to integration test CI workflows alongside existing Ollama support. Configure provider-specific test execution where vLLM runs only inference specific tests (excluding vision tests) while Ollama continues to run the full test suite. This enables comprehensive CI testing of both inference providers but keeps the vLLM footprint small, this can be expanded later if it proves to not be too disruptive. Signed-off-by: Derek Higgins <derekh@redhat.com>
2025-08-15 14:08:00 +00:00 · 2025-08-13 14:19:52 +01:00 · 2025-08-13 14:19:52 +01:00 · 1f29aaa2e1
commit 1f29aaa2e1
parent 91a010fb12
3 changed files with 46 additions and 42 deletions
--- a/.github/actions/run-and-record-tests/action.yml
+++ b/.github/actions/run-and-record-tests/action.yml
@ -52,9 +52,9 @@ runs:
          git add tests/integration/recordings/
          if [ "${{ inputs.run-vision-tests }}" == "true" ]; then
-            git commit -m "Recordings update from CI (vision)"
+            git commit -m "Recordings update from CI (vision) (${{ inputs.provider }})"
          else
-            git commit -m "Recordings update from CI"
+            git commit -m "Recordings update from CI (${{ inputs.provider }})"
          fi
          git fetch origin ${{ github.event.pull_request.head.ref }}
@ -70,7 +70,8 @@ runs:
      if: ${{ always() }}
      shell: bash
      run: |
-        sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log || true
+        sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log 2>&1 || true
        sudo docker logs vllm > vllm-${{ inputs.inference-mode }}.log 2>&1 || true
    - name: Upload logs
      if: ${{ always() }}
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@ -20,7 +20,6 @@ on:
  schedule:
    # If changing the cron schedule, update the provider in the test-matrix job
    - cron: '0 0 * * *'  # (test latest client) Daily at 12 AM UTC
    - cron: '1 0 * * 0'  # (test vllm) Weekly on Sunday at 1 AM UTC
  workflow_dispatch:
    inputs:
      test-all-client-versions:
@ -38,28 +37,7 @@ concurrency:
  cancel-in-progress: true
 jobs:
  discover-tests:
    runs-on: ubuntu-latest
    outputs:
      test-types: ${{ steps.generate-test-types.outputs.test-types }}
    steps:
      - name: Checkout repository
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
      - name: Generate test types
        id: generate-test-types
        run: |
          # Get test directories dynamically, excluding non-test directories
          # NOTE: we are excluding post_training since the tests take too long
          TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d |
            sed 's|tests/integration/||' |
            grep -Ev "^(__pycache__|fixtures|test_cases|recordings|non_ci|post_training)$" |
            sort | jq -R -s -c 'split("\n")[:-1]')
          echo "test-types=$TEST_TYPES" >> $GITHUB_OUTPUT
  run-replay-mode-tests:
    needs: discover-tests
    runs-on: ubuntu-latest
    name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, vision={4})', matrix.client-type, matrix.provider, matrix.python-version, matrix.client-version, matrix.run-vision-tests) }}
@ -68,11 +46,14 @@ jobs:
      matrix:
        client-type: [library, server]
        # Use vllm on weekly schedule, otherwise use test-provider input (defaults to ollama)
-        provider: ${{ (github.event.schedule == '1 0 * * 0') && fromJSON('["vllm"]') || fromJSON(format('["{0}"]', github.event.inputs.test-provider || 'ollama')) }}
+        provider: [ollama, vllm]
        # Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
        python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
        client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
        run-vision-tests: [true, false]
        exclude:
          - provider: vllm
            run-vision-tests: true
    steps:
      - name: Checkout repository
@ -87,10 +68,27 @@ jobs:
          run-vision-tests: ${{ matrix.run-vision-tests }}
          inference-mode: 'replay'
      - name: Generate test types
        id: generate-test-types
        run: |
          # Only run inference tests for vllm as these are more likely to exercise the vllm provider
          # TODO: Add agent tests for vllm
          if [ ${{ matrix.provider }} == "vllm" ]; then
            echo "test-types=[\"inference\"]" >> $GITHUB_OUTPUT
            exit 0
          fi
          # Get test directories dynamically, excluding non-test directories
          # NOTE: we are excluding post_training since the tests take too long
          TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d |
            sed 's|tests/integration/||' |
            grep -Ev "^(__pycache__|fixtures|test_cases|recordings|non_ci|post_training)$" |
            sort | jq -R -s -c 'split("\n")[:-1]')
          echo "test-types=$TEST_TYPES" >> $GITHUB_OUTPUT
      - name: Run tests
        uses: ./.github/actions/run-and-record-tests
        with:
-          test-types: ${{ needs.discover-tests.outputs.test-types }}
+          test-types: ${{ steps.generate-test-types.outputs.test-types }}
          stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
          provider: ${{ matrix.provider }}
          inference-mode: 'replay'
--- a/.github/workflows/record-integration-tests.yml
+++ b/.github/workflows/record-integration-tests.yml
@ -15,12 +15,6 @@ on:
      - '.github/actions/setup-ollama/action.yml'
      - '.github/actions/setup-test-environment/action.yml'
      - '.github/actions/run-and-record-tests/action.yml'
  workflow_dispatch:
    inputs:
      test-provider:
        description: 'Test against a specific provider'
        type: string
        default: 'ollama'
 concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
@ -42,12 +36,6 @@ jobs:
      - name: Generate test types
        id: generate-test-types
        run: |
          # Get test directories dynamically, excluding non-test directories
          TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d -printf "%f\n" |
            grep -Ev "^(__pycache__|fixtures|test_cases|recordings|post_training)$" |
            sort | jq -R -s -c 'split("\n")[:-1]')
          echo "test-types=$TEST_TYPES" >> $GITHUB_OUTPUT
          labels=$(gh pr view ${{ github.event.pull_request.number }} --json labels --jq '.labels[].name')
          echo "labels=$labels"
@ -82,6 +70,10 @@ jobs:
      fail-fast: false
      matrix:
        mode: ${{ fromJSON(needs.discover-tests.outputs.matrix-modes) }}
        provider: [ollama, vllm]
        exclude:
          - mode: vision
            provider: vllm
    steps:
      - name: Checkout repository
@ -90,20 +82,33 @@ jobs:
          ref: ${{ github.event.pull_request.head.ref }}
          fetch-depth: 0
      - name: Generate test types
        id: generate-test-types
        run: |
          if [ ${{ matrix.provider }} == "vllm" ]; then
            echo "test-types=[\"inference\"]" >> $GITHUB_OUTPUT
          else
            # Get test directories dynamically, excluding non-test directories
            TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d -printf "%f\n" |
            grep -Ev "^(__pycache__|fixtures|test_cases|recordings|non_ci|post_training)$" |
            sort | jq -R -s -c 'split("\n")[:-1]')
            echo "test-types=$TEST_TYPES" >> $GITHUB_OUTPUT
          fi
      - name: Setup test environment
        uses: ./.github/actions/setup-test-environment
        with:
          python-version: "3.12"  # Use single Python version for recording
          client-version: "latest"
-          provider: ${{ inputs.test-provider || 'ollama' }}
+          provider: ${{ matrix.provider }}
          run-vision-tests: ${{ matrix.mode == 'vision' && 'true' || 'false' }}
          inference-mode: 'record'
      - name: Run and record tests
        uses: ./.github/actions/run-and-record-tests
        with:
-          test-types: ${{ needs.discover-tests.outputs.test-types }}
+          test-types: ${{ steps.generate-test-types.outputs.test-types }}
          stack-config: 'server:ci-tests'  # recording must be done with server since more tests are run
-          provider: ${{ inputs.test-provider || 'ollama' }}
+          provider: ${{ matrix.provider }}
          inference-mode: 'record'
          run-vision-tests: ${{ matrix.mode == 'vision' && 'true' || 'false' }}