ci: integrate vLLM inference tests with GitHub Actions workflows

Add vLLM provider support to integration test CI workflows alongside
existing Ollama support. Configure provider-specific test execution
where vLLM runs only inference specific tests (excluding vision tests) while
Ollama continues to run the full test suite.

This enables comprehensive CI testing of both inference providers but
keeps the vLLM footprint small, this can be expanded later if it proves
to not be too disruptive.

Signed-off-by: Derek Higgins <derekh@redhat.com>
This commit is contained in:
Derek Higgins 2025-08-13 14:19:52 +01:00
parent 91a010fb12
commit 1f29aaa2e1
3 changed files with 46 additions and 42 deletions

View file

@ -52,9 +52,9 @@ runs:
git add tests/integration/recordings/
if [ "${{ inputs.run-vision-tests }}" == "true" ]; then
git commit -m "Recordings update from CI (vision)"
git commit -m "Recordings update from CI (vision) (${{ inputs.provider }})"
else
git commit -m "Recordings update from CI"
git commit -m "Recordings update from CI (${{ inputs.provider }})"
fi
git fetch origin ${{ github.event.pull_request.head.ref }}
@ -70,7 +70,8 @@ runs:
if: ${{ always() }}
shell: bash
run: |
sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log || true
sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log 2>&1 || true
sudo docker logs vllm > vllm-${{ inputs.inference-mode }}.log 2>&1 || true
- name: Upload logs
if: ${{ always() }}

View file

@ -20,7 +20,6 @@ on:
schedule:
# If changing the cron schedule, update the provider in the test-matrix job
- cron: '0 0 * * *' # (test latest client) Daily at 12 AM UTC
- cron: '1 0 * * 0' # (test vllm) Weekly on Sunday at 1 AM UTC
workflow_dispatch:
inputs:
test-all-client-versions:
@ -38,28 +37,7 @@ concurrency:
cancel-in-progress: true
jobs:
discover-tests:
runs-on: ubuntu-latest
outputs:
test-types: ${{ steps.generate-test-types.outputs.test-types }}
steps:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Generate test types
id: generate-test-types
run: |
# Get test directories dynamically, excluding non-test directories
# NOTE: we are excluding post_training since the tests take too long
TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d |
sed 's|tests/integration/||' |
grep -Ev "^(__pycache__|fixtures|test_cases|recordings|non_ci|post_training)$" |
sort | jq -R -s -c 'split("\n")[:-1]')
echo "test-types=$TEST_TYPES" >> $GITHUB_OUTPUT
run-replay-mode-tests:
needs: discover-tests
runs-on: ubuntu-latest
name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, vision={4})', matrix.client-type, matrix.provider, matrix.python-version, matrix.client-version, matrix.run-vision-tests) }}
@ -68,11 +46,14 @@ jobs:
matrix:
client-type: [library, server]
# Use vllm on weekly schedule, otherwise use test-provider input (defaults to ollama)
provider: ${{ (github.event.schedule == '1 0 * * 0') && fromJSON('["vllm"]') || fromJSON(format('["{0}"]', github.event.inputs.test-provider || 'ollama')) }}
provider: [ollama, vllm]
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
run-vision-tests: [true, false]
exclude:
- provider: vllm
run-vision-tests: true
steps:
- name: Checkout repository
@ -87,10 +68,27 @@ jobs:
run-vision-tests: ${{ matrix.run-vision-tests }}
inference-mode: 'replay'
- name: Generate test types
id: generate-test-types
run: |
# Only run inference tests for vllm as these are more likely to exercise the vllm provider
# TODO: Add agent tests for vllm
if [ ${{ matrix.provider }} == "vllm" ]; then
echo "test-types=[\"inference\"]" >> $GITHUB_OUTPUT
exit 0
fi
# Get test directories dynamically, excluding non-test directories
# NOTE: we are excluding post_training since the tests take too long
TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d |
sed 's|tests/integration/||' |
grep -Ev "^(__pycache__|fixtures|test_cases|recordings|non_ci|post_training)$" |
sort | jq -R -s -c 'split("\n")[:-1]')
echo "test-types=$TEST_TYPES" >> $GITHUB_OUTPUT
- name: Run tests
uses: ./.github/actions/run-and-record-tests
with:
test-types: ${{ needs.discover-tests.outputs.test-types }}
test-types: ${{ steps.generate-test-types.outputs.test-types }}
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
provider: ${{ matrix.provider }}
inference-mode: 'replay'

View file

@ -15,12 +15,6 @@ on:
- '.github/actions/setup-ollama/action.yml'
- '.github/actions/setup-test-environment/action.yml'
- '.github/actions/run-and-record-tests/action.yml'
workflow_dispatch:
inputs:
test-provider:
description: 'Test against a specific provider'
type: string
default: 'ollama'
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
@ -42,12 +36,6 @@ jobs:
- name: Generate test types
id: generate-test-types
run: |
# Get test directories dynamically, excluding non-test directories
TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d -printf "%f\n" |
grep -Ev "^(__pycache__|fixtures|test_cases|recordings|post_training)$" |
sort | jq -R -s -c 'split("\n")[:-1]')
echo "test-types=$TEST_TYPES" >> $GITHUB_OUTPUT
labels=$(gh pr view ${{ github.event.pull_request.number }} --json labels --jq '.labels[].name')
echo "labels=$labels"
@ -82,6 +70,10 @@ jobs:
fail-fast: false
matrix:
mode: ${{ fromJSON(needs.discover-tests.outputs.matrix-modes) }}
provider: [ollama, vllm]
exclude:
- mode: vision
provider: vllm
steps:
- name: Checkout repository
@ -90,20 +82,33 @@ jobs:
ref: ${{ github.event.pull_request.head.ref }}
fetch-depth: 0
- name: Generate test types
id: generate-test-types
run: |
if [ ${{ matrix.provider }} == "vllm" ]; then
echo "test-types=[\"inference\"]" >> $GITHUB_OUTPUT
else
# Get test directories dynamically, excluding non-test directories
TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d -printf "%f\n" |
grep -Ev "^(__pycache__|fixtures|test_cases|recordings|non_ci|post_training)$" |
sort | jq -R -s -c 'split("\n")[:-1]')
echo "test-types=$TEST_TYPES" >> $GITHUB_OUTPUT
fi
- name: Setup test environment
uses: ./.github/actions/setup-test-environment
with:
python-version: "3.12" # Use single Python version for recording
client-version: "latest"
provider: ${{ inputs.test-provider || 'ollama' }}
provider: ${{ matrix.provider }}
run-vision-tests: ${{ matrix.mode == 'vision' && 'true' || 'false' }}
inference-mode: 'record'
- name: Run and record tests
uses: ./.github/actions/run-and-record-tests
with:
test-types: ${{ needs.discover-tests.outputs.test-types }}
test-types: ${{ steps.generate-test-types.outputs.test-types }}
stack-config: 'server:ci-tests' # recording must be done with server since more tests are run
provider: ${{ inputs.test-provider || 'ollama' }}
provider: ${{ matrix.provider }}
inference-mode: 'record'
run-vision-tests: ${{ matrix.mode == 'vision' && 'true' || 'false' }}