ci: integrate vLLM inference tests with GitHub Actions workflows

Add vLLM provider support to integration test CI workflows alongside
existing Ollama support. Configure provider-specific test execution
where vLLM runs only inference specific tests (excluding vision tests) while
Ollama continues to run the full test suite.

This enables comprehensive CI testing of both inference providers but
keeps the vLLM footprint small, this can be expanded later if it proves
to not be too disruptive.

Signed-off-by: Derek Higgins <derekh@redhat.com>
This commit is contained in:
Derek Higgins 2025-08-13 14:19:52 +01:00
parent 91a010fb12
commit 1f29aaa2e1
3 changed files with 46 additions and 42 deletions

View file

@ -52,9 +52,9 @@ runs:
git add tests/integration/recordings/ git add tests/integration/recordings/
if [ "${{ inputs.run-vision-tests }}" == "true" ]; then if [ "${{ inputs.run-vision-tests }}" == "true" ]; then
git commit -m "Recordings update from CI (vision)" git commit -m "Recordings update from CI (vision) (${{ inputs.provider }})"
else else
git commit -m "Recordings update from CI" git commit -m "Recordings update from CI (${{ inputs.provider }})"
fi fi
git fetch origin ${{ github.event.pull_request.head.ref }} git fetch origin ${{ github.event.pull_request.head.ref }}
@ -70,7 +70,8 @@ runs:
if: ${{ always() }} if: ${{ always() }}
shell: bash shell: bash
run: | run: |
sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log || true sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log 2>&1 || true
sudo docker logs vllm > vllm-${{ inputs.inference-mode }}.log 2>&1 || true
- name: Upload logs - name: Upload logs
if: ${{ always() }} if: ${{ always() }}

View file

@ -20,7 +20,6 @@ on:
schedule: schedule:
# If changing the cron schedule, update the provider in the test-matrix job # If changing the cron schedule, update the provider in the test-matrix job
- cron: '0 0 * * *' # (test latest client) Daily at 12 AM UTC - cron: '0 0 * * *' # (test latest client) Daily at 12 AM UTC
- cron: '1 0 * * 0' # (test vllm) Weekly on Sunday at 1 AM UTC
workflow_dispatch: workflow_dispatch:
inputs: inputs:
test-all-client-versions: test-all-client-versions:
@ -38,28 +37,7 @@ concurrency:
cancel-in-progress: true cancel-in-progress: true
jobs: jobs:
discover-tests:
runs-on: ubuntu-latest
outputs:
test-types: ${{ steps.generate-test-types.outputs.test-types }}
steps:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Generate test types
id: generate-test-types
run: |
# Get test directories dynamically, excluding non-test directories
# NOTE: we are excluding post_training since the tests take too long
TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d |
sed 's|tests/integration/||' |
grep -Ev "^(__pycache__|fixtures|test_cases|recordings|non_ci|post_training)$" |
sort | jq -R -s -c 'split("\n")[:-1]')
echo "test-types=$TEST_TYPES" >> $GITHUB_OUTPUT
run-replay-mode-tests: run-replay-mode-tests:
needs: discover-tests
runs-on: ubuntu-latest runs-on: ubuntu-latest
name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, vision={4})', matrix.client-type, matrix.provider, matrix.python-version, matrix.client-version, matrix.run-vision-tests) }} name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, vision={4})', matrix.client-type, matrix.provider, matrix.python-version, matrix.client-version, matrix.run-vision-tests) }}
@ -68,11 +46,14 @@ jobs:
matrix: matrix:
client-type: [library, server] client-type: [library, server]
# Use vllm on weekly schedule, otherwise use test-provider input (defaults to ollama) # Use vllm on weekly schedule, otherwise use test-provider input (defaults to ollama)
provider: ${{ (github.event.schedule == '1 0 * * 0') && fromJSON('["vllm"]') || fromJSON(format('["{0}"]', github.event.inputs.test-provider || 'ollama')) }} provider: [ollama, vllm]
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12 # Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }} python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }} client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
run-vision-tests: [true, false] run-vision-tests: [true, false]
exclude:
- provider: vllm
run-vision-tests: true
steps: steps:
- name: Checkout repository - name: Checkout repository
@ -87,10 +68,27 @@ jobs:
run-vision-tests: ${{ matrix.run-vision-tests }} run-vision-tests: ${{ matrix.run-vision-tests }}
inference-mode: 'replay' inference-mode: 'replay'
- name: Generate test types
id: generate-test-types
run: |
# Only run inference tests for vllm as these are more likely to exercise the vllm provider
# TODO: Add agent tests for vllm
if [ ${{ matrix.provider }} == "vllm" ]; then
echo "test-types=[\"inference\"]" >> $GITHUB_OUTPUT
exit 0
fi
# Get test directories dynamically, excluding non-test directories
# NOTE: we are excluding post_training since the tests take too long
TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d |
sed 's|tests/integration/||' |
grep -Ev "^(__pycache__|fixtures|test_cases|recordings|non_ci|post_training)$" |
sort | jq -R -s -c 'split("\n")[:-1]')
echo "test-types=$TEST_TYPES" >> $GITHUB_OUTPUT
- name: Run tests - name: Run tests
uses: ./.github/actions/run-and-record-tests uses: ./.github/actions/run-and-record-tests
with: with:
test-types: ${{ needs.discover-tests.outputs.test-types }} test-types: ${{ steps.generate-test-types.outputs.test-types }}
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }} stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
provider: ${{ matrix.provider }} provider: ${{ matrix.provider }}
inference-mode: 'replay' inference-mode: 'replay'

View file

@ -15,12 +15,6 @@ on:
- '.github/actions/setup-ollama/action.yml' - '.github/actions/setup-ollama/action.yml'
- '.github/actions/setup-test-environment/action.yml' - '.github/actions/setup-test-environment/action.yml'
- '.github/actions/run-and-record-tests/action.yml' - '.github/actions/run-and-record-tests/action.yml'
workflow_dispatch:
inputs:
test-provider:
description: 'Test against a specific provider'
type: string
default: 'ollama'
concurrency: concurrency:
group: ${{ github.workflow }}-${{ github.ref }} group: ${{ github.workflow }}-${{ github.ref }}
@ -42,12 +36,6 @@ jobs:
- name: Generate test types - name: Generate test types
id: generate-test-types id: generate-test-types
run: | run: |
# Get test directories dynamically, excluding non-test directories
TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d -printf "%f\n" |
grep -Ev "^(__pycache__|fixtures|test_cases|recordings|post_training)$" |
sort | jq -R -s -c 'split("\n")[:-1]')
echo "test-types=$TEST_TYPES" >> $GITHUB_OUTPUT
labels=$(gh pr view ${{ github.event.pull_request.number }} --json labels --jq '.labels[].name') labels=$(gh pr view ${{ github.event.pull_request.number }} --json labels --jq '.labels[].name')
echo "labels=$labels" echo "labels=$labels"
@ -82,6 +70,10 @@ jobs:
fail-fast: false fail-fast: false
matrix: matrix:
mode: ${{ fromJSON(needs.discover-tests.outputs.matrix-modes) }} mode: ${{ fromJSON(needs.discover-tests.outputs.matrix-modes) }}
provider: [ollama, vllm]
exclude:
- mode: vision
provider: vllm
steps: steps:
- name: Checkout repository - name: Checkout repository
@ -90,20 +82,33 @@ jobs:
ref: ${{ github.event.pull_request.head.ref }} ref: ${{ github.event.pull_request.head.ref }}
fetch-depth: 0 fetch-depth: 0
- name: Generate test types
id: generate-test-types
run: |
if [ ${{ matrix.provider }} == "vllm" ]; then
echo "test-types=[\"inference\"]" >> $GITHUB_OUTPUT
else
# Get test directories dynamically, excluding non-test directories
TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d -printf "%f\n" |
grep -Ev "^(__pycache__|fixtures|test_cases|recordings|non_ci|post_training)$" |
sort | jq -R -s -c 'split("\n")[:-1]')
echo "test-types=$TEST_TYPES" >> $GITHUB_OUTPUT
fi
- name: Setup test environment - name: Setup test environment
uses: ./.github/actions/setup-test-environment uses: ./.github/actions/setup-test-environment
with: with:
python-version: "3.12" # Use single Python version for recording python-version: "3.12" # Use single Python version for recording
client-version: "latest" client-version: "latest"
provider: ${{ inputs.test-provider || 'ollama' }} provider: ${{ matrix.provider }}
run-vision-tests: ${{ matrix.mode == 'vision' && 'true' || 'false' }} run-vision-tests: ${{ matrix.mode == 'vision' && 'true' || 'false' }}
inference-mode: 'record' inference-mode: 'record'
- name: Run and record tests - name: Run and record tests
uses: ./.github/actions/run-and-record-tests uses: ./.github/actions/run-and-record-tests
with: with:
test-types: ${{ needs.discover-tests.outputs.test-types }} test-types: ${{ steps.generate-test-types.outputs.test-types }}
stack-config: 'server:ci-tests' # recording must be done with server since more tests are run stack-config: 'server:ci-tests' # recording must be done with server since more tests are run
provider: ${{ inputs.test-provider || 'ollama' }} provider: ${{ matrix.provider }}
inference-mode: 'record' inference-mode: 'record'
run-vision-tests: ${{ matrix.mode == 'vision' && 'true' || 'false' }} run-vision-tests: ${{ matrix.mode == 'vision' && 'true' || 'false' }}