feat(ci): add support for running vision inference tests (#2972)

This PR significantly refactors the Integration Tests workflow. The main
goal behind the PR was to enable recording of vision tests which were
never run as part of our CI ever before. During debugging, I ended up
making several other changes refactoring and hopefully increasing the
robustness of the workflow.

After doing the experiments, I have updated the trigger event to be
`pull_request_target` so this workflow can get write permissions by
default but it will run with source code from the base (main) branch in
the source repository only. If you do change the workflow, you'd need to
experiment using the `workflow_dispatch` triggers. This should not be
news to anyone using Github Actions (except me!)

It is likely to be a little rocky though while I learn more about GitHub
Actions, etc. Please be patient :)

---------

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
This commit is contained in:
Ashwin Bharambe 2025-07-31 11:50:42 -07:00 committed by GitHub
parent 709c974bd8
commit 27d866795c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
108 changed files with 13985 additions and 15254 deletions

View file

@ -0,0 +1,141 @@
name: Vision Inference Integration Tests
run-name: Run vision inference integration test suite from tests/integration/inference
on:
push:
branches: [ main ]
pull_request_target:
branches: [ main ]
types: [opened, synchronize, labeled]
paths:
- 'llama_stack/**'
- 'tests/**'
- 'uv.lock'
- 'pyproject.toml'
- '.github/workflows/integration-vision-tests.yml' # This workflow
- '.github/actions/setup-ollama/action.yml'
- '.github/actions/setup-test-environment/action.yml'
- '.github/actions/run-and-record-tests/action.yml'
workflow_dispatch:
inputs:
test-all-client-versions:
description: 'Test against both the latest and published versions'
type: boolean
default: false
force-inference-mode:
description: 'Force inference mode (record or replay)'
type: string
default: ''
concurrency:
# This creates three concurrency groups:
# ${{ github.workflow }}-${{ github.ref }}-rerecord (for valid triggers with re-record-tests label)
# ${{ github.workflow }}-${{ github.ref }}-replay (for valid triggers without re-record-tests label)
# ${{ github.workflow }}-${{ github.ref }}-no-run (for invalid triggers that will be skipped)
# The "no-run" group ensures that irrelevant label events don't interfere with the real workflows.
group: >-
${{ github.workflow }}-${{ github.ref }}-${{
((github.event.action == 'opened' || github.event.action == 'synchronize') && 'replay') ||
((github.event.action == 'labeled' && contains(github.event.pull_request.labels.*.name, 're-record-tests')) && 'rerecord' ||
'no-run')
}}
cancel-in-progress: true
jobs:
discover-tests:
if: |
github.event.action == 'opened' ||
github.event.action == 'synchronize' ||
(github.event.action == 'labeled' && contains(github.event.pull_request.labels.*.name, 're-record-tests'))
runs-on: ubuntu-latest
outputs:
rerecord-tests: ${{ steps.check-rerecord-tests.outputs.rerecord-tests }}
steps:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Check if re-record-tests label exists
id: check-rerecord-tests
run: |
if [[ "${{ inputs.force-inference-mode }}" == "record" ]]; then
echo "rerecord-tests=true" >> $GITHUB_OUTPUT
elif [[ "${{ inputs.force-inference-mode }}" == "replay" ]]; then
echo "rerecord-tests=false" >> $GITHUB_OUTPUT
else
if [[ "${{ contains(github.event.pull_request.labels.*.name, 're-record-tests') }}" == "true" ]]; then
echo "rerecord-tests=true" >> $GITHUB_OUTPUT
else
echo "rerecord-tests=false" >> $GITHUB_OUTPUT
fi
fi
record-tests:
# Sequential job for recording to avoid SQLite conflicts
if: ${{ needs.discover-tests.outputs.rerecord-tests == 'true' }}
needs: discover-tests
runs-on: ubuntu-latest
permissions:
contents: write
pull-requests: write
steps:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
ref: ${{ github.event.pull_request.head.ref }}
fetch-depth: 0
- name: Setup test environment
uses: ./.github/actions/setup-test-environment
with:
python-version: "3.12" # Use single Python version for recording
client-version: "latest"
provider: 'ollama'
run-vision-tests: 'true'
- name: Run and record tests
uses: ./.github/actions/run-and-record-tests
with:
test-types: '["vision"]'
stack-config: 'server:ci-tests' # re-recording must be done in server mode
provider: 'ollama'
inference-mode: 'record'
run-vision-tests: 'true'
run-replay-mode-tests:
# Skip this job if we're in recording mode (handled by record-tests job)
if: ${{ needs.discover-tests.outputs.rerecord-tests != 'true' }}
needs: discover-tests
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
client-type: [library]
provider: [ollama]
python-version: ["3.12"]
client-version: ["latest"]
steps:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Setup test environment
uses: ./.github/actions/setup-test-environment
with:
python-version: ${{ matrix.python-version }}
client-version: ${{ matrix.client-version }}
provider: ${{ matrix.provider }}
run-vision-tests: 'true'
- name: Run and record tests
uses: ./.github/actions/run-and-record-tests
with:
test-types: '["vision"]'
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
provider: ${{ matrix.provider }}
inference-mode: 'replay'
run-vision-tests: 'true'