From 1f29aaa2e1abe946a0ab9ef55ce4756d6b30f2a2 Mon Sep 17 00:00:00 2001 From: Derek Higgins Date: Wed, 13 Aug 2025 14:19:52 +0100 Subject: [PATCH] ci: integrate vLLM inference tests with GitHub Actions workflows Add vLLM provider support to integration test CI workflows alongside existing Ollama support. Configure provider-specific test execution where vLLM runs only inference specific tests (excluding vision tests) while Ollama continues to run the full test suite. This enables comprehensive CI testing of both inference providers but keeps the vLLM footprint small, this can be expanded later if it proves to not be too disruptive. Signed-off-by: Derek Higgins --- .../actions/run-and-record-tests/action.yml | 7 +-- .github/workflows/integration-tests.yml | 46 +++++++++---------- .../workflows/record-integration-tests.yml | 35 ++++++++------ 3 files changed, 46 insertions(+), 42 deletions(-) diff --git a/.github/actions/run-and-record-tests/action.yml b/.github/actions/run-and-record-tests/action.yml index 573148e46..d4f7db7fe 100644 --- a/.github/actions/run-and-record-tests/action.yml +++ b/.github/actions/run-and-record-tests/action.yml @@ -52,9 +52,9 @@ runs: git add tests/integration/recordings/ if [ "${{ inputs.run-vision-tests }}" == "true" ]; then - git commit -m "Recordings update from CI (vision)" + git commit -m "Recordings update from CI (vision) (${{ inputs.provider }})" else - git commit -m "Recordings update from CI" + git commit -m "Recordings update from CI (${{ inputs.provider }})" fi git fetch origin ${{ github.event.pull_request.head.ref }} @@ -70,7 +70,8 @@ runs: if: ${{ always() }} shell: bash run: | - sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log || true + sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log 2>&1 || true + sudo docker logs vllm > vllm-${{ inputs.inference-mode }}.log 2>&1 || true - name: Upload logs if: ${{ always() }} diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index 9ef49fba3..e3f2a8c8e 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -20,7 +20,6 @@ on: schedule: # If changing the cron schedule, update the provider in the test-matrix job - cron: '0 0 * * *' # (test latest client) Daily at 12 AM UTC - - cron: '1 0 * * 0' # (test vllm) Weekly on Sunday at 1 AM UTC workflow_dispatch: inputs: test-all-client-versions: @@ -38,28 +37,7 @@ concurrency: cancel-in-progress: true jobs: - discover-tests: - runs-on: ubuntu-latest - outputs: - test-types: ${{ steps.generate-test-types.outputs.test-types }} - - steps: - - name: Checkout repository - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - - name: Generate test types - id: generate-test-types - run: | - # Get test directories dynamically, excluding non-test directories - # NOTE: we are excluding post_training since the tests take too long - TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d | - sed 's|tests/integration/||' | - grep -Ev "^(__pycache__|fixtures|test_cases|recordings|non_ci|post_training)$" | - sort | jq -R -s -c 'split("\n")[:-1]') - echo "test-types=$TEST_TYPES" >> $GITHUB_OUTPUT - run-replay-mode-tests: - needs: discover-tests runs-on: ubuntu-latest name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, vision={4})', matrix.client-type, matrix.provider, matrix.python-version, matrix.client-version, matrix.run-vision-tests) }} @@ -68,11 +46,14 @@ jobs: matrix: client-type: [library, server] # Use vllm on weekly schedule, otherwise use test-provider input (defaults to ollama) - provider: ${{ (github.event.schedule == '1 0 * * 0') && fromJSON('["vllm"]') || fromJSON(format('["{0}"]', github.event.inputs.test-provider || 'ollama')) }} + provider: [ollama, vllm] # Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12 python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }} client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }} run-vision-tests: [true, false] + exclude: + - provider: vllm + run-vision-tests: true steps: - name: Checkout repository @@ -87,10 +68,27 @@ jobs: run-vision-tests: ${{ matrix.run-vision-tests }} inference-mode: 'replay' + - name: Generate test types + id: generate-test-types + run: | + # Only run inference tests for vllm as these are more likely to exercise the vllm provider + # TODO: Add agent tests for vllm + if [ ${{ matrix.provider }} == "vllm" ]; then + echo "test-types=[\"inference\"]" >> $GITHUB_OUTPUT + exit 0 + fi + # Get test directories dynamically, excluding non-test directories + # NOTE: we are excluding post_training since the tests take too long + TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d | + sed 's|tests/integration/||' | + grep -Ev "^(__pycache__|fixtures|test_cases|recordings|non_ci|post_training)$" | + sort | jq -R -s -c 'split("\n")[:-1]') + echo "test-types=$TEST_TYPES" >> $GITHUB_OUTPUT + - name: Run tests uses: ./.github/actions/run-and-record-tests with: - test-types: ${{ needs.discover-tests.outputs.test-types }} + test-types: ${{ steps.generate-test-types.outputs.test-types }} stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }} provider: ${{ matrix.provider }} inference-mode: 'replay' diff --git a/.github/workflows/record-integration-tests.yml b/.github/workflows/record-integration-tests.yml index 12957db27..3b54b57bc 100644 --- a/.github/workflows/record-integration-tests.yml +++ b/.github/workflows/record-integration-tests.yml @@ -15,12 +15,6 @@ on: - '.github/actions/setup-ollama/action.yml' - '.github/actions/setup-test-environment/action.yml' - '.github/actions/run-and-record-tests/action.yml' - workflow_dispatch: - inputs: - test-provider: - description: 'Test against a specific provider' - type: string - default: 'ollama' concurrency: group: ${{ github.workflow }}-${{ github.ref }} @@ -42,12 +36,6 @@ jobs: - name: Generate test types id: generate-test-types run: | - # Get test directories dynamically, excluding non-test directories - TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d -printf "%f\n" | - grep -Ev "^(__pycache__|fixtures|test_cases|recordings|post_training)$" | - sort | jq -R -s -c 'split("\n")[:-1]') - echo "test-types=$TEST_TYPES" >> $GITHUB_OUTPUT - labels=$(gh pr view ${{ github.event.pull_request.number }} --json labels --jq '.labels[].name') echo "labels=$labels" @@ -82,6 +70,10 @@ jobs: fail-fast: false matrix: mode: ${{ fromJSON(needs.discover-tests.outputs.matrix-modes) }} + provider: [ollama, vllm] + exclude: + - mode: vision + provider: vllm steps: - name: Checkout repository @@ -90,20 +82,33 @@ jobs: ref: ${{ github.event.pull_request.head.ref }} fetch-depth: 0 + - name: Generate test types + id: generate-test-types + run: | + if [ ${{ matrix.provider }} == "vllm" ]; then + echo "test-types=[\"inference\"]" >> $GITHUB_OUTPUT + else + # Get test directories dynamically, excluding non-test directories + TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d -printf "%f\n" | + grep -Ev "^(__pycache__|fixtures|test_cases|recordings|non_ci|post_training)$" | + sort | jq -R -s -c 'split("\n")[:-1]') + echo "test-types=$TEST_TYPES" >> $GITHUB_OUTPUT + fi + - name: Setup test environment uses: ./.github/actions/setup-test-environment with: python-version: "3.12" # Use single Python version for recording client-version: "latest" - provider: ${{ inputs.test-provider || 'ollama' }} + provider: ${{ matrix.provider }} run-vision-tests: ${{ matrix.mode == 'vision' && 'true' || 'false' }} inference-mode: 'record' - name: Run and record tests uses: ./.github/actions/run-and-record-tests with: - test-types: ${{ needs.discover-tests.outputs.test-types }} + test-types: ${{ steps.generate-test-types.outputs.test-types }} stack-config: 'server:ci-tests' # recording must be done with server since more tests are run - provider: ${{ inputs.test-provider || 'ollama' }} + provider: ${{ matrix.provider }} inference-mode: 'record' run-vision-tests: ${{ matrix.mode == 'vision' && 'true' || 'false' }}