mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-13 22:17:59 +00:00
This PR significantly refactors the Integration Tests workflow. The main goal behind the PR was to enable recording of vision tests which were never run as part of our CI ever before. During debugging, I ended up making several other changes refactoring and hopefully increasing the robustness of the workflow. After doing the experiments, I have updated the trigger event to be `pull_request_target` so this workflow can get write permissions by default but it will run with source code from the base (main) branch in the source repository only. If you do change the workflow, you'd need to experiment using the `workflow_dispatch` triggers. This should not be news to anyone using Github Actions (except me!) It is likely to be a little rocky though while I learn more about GitHub Actions, etc. Please be patient :) --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
156 lines
6.2 KiB
YAML
156 lines
6.2 KiB
YAML
name: Integration Tests
|
|
|
|
run-name: Run the integration test suite from tests/integration
|
|
|
|
on:
|
|
push:
|
|
branches: [ main ]
|
|
pull_request_target:
|
|
branches: [ main ]
|
|
types: [opened, synchronize, labeled]
|
|
paths:
|
|
- 'llama_stack/**'
|
|
- 'tests/**'
|
|
- 'uv.lock'
|
|
- 'pyproject.toml'
|
|
- '.github/workflows/integration-tests.yml' # This workflow
|
|
- '.github/actions/setup-ollama/action.yml'
|
|
- '.github/actions/setup-test-environment/action.yml'
|
|
- '.github/actions/run-and-record-tests/action.yml'
|
|
schedule:
|
|
# If changing the cron schedule, update the provider in the test-matrix job
|
|
- cron: '0 0 * * *' # (test latest client) Daily at 12 AM UTC
|
|
- cron: '1 0 * * 0' # (test vllm) Weekly on Sunday at 1 AM UTC
|
|
workflow_dispatch:
|
|
inputs:
|
|
test-all-client-versions:
|
|
description: 'Test against both the latest and published versions'
|
|
type: boolean
|
|
default: false
|
|
test-provider:
|
|
description: 'Test against a specific provider'
|
|
type: string
|
|
default: 'ollama'
|
|
force-inference-mode:
|
|
description: 'Force inference mode (record or replay)'
|
|
type: string
|
|
default: ''
|
|
|
|
concurrency:
|
|
# This creates three concurrency groups:
|
|
# ${{ github.workflow }}-${{ github.ref }}-rerecord (for valid triggers with re-record-tests label)
|
|
# ${{ github.workflow }}-${{ github.ref }}-replay (for valid triggers without re-record-tests label)
|
|
# ${{ github.workflow }}-${{ github.ref }}-no-run (for invalid triggers that will be skipped)
|
|
# The "no-run" group ensures that irrelevant label events don't interfere with the real workflows.
|
|
group: >-
|
|
${{ github.workflow }}-${{ github.ref }}-${{
|
|
((github.event.action == 'opened' || github.event.action == 'synchronize') && 'replay') ||
|
|
((github.event.action == 'labeled' && contains(github.event.pull_request.labels.*.name, 're-record-tests')) && 'rerecord' ||
|
|
'no-run')
|
|
}}
|
|
cancel-in-progress: true
|
|
|
|
jobs:
|
|
discover-tests:
|
|
if: |
|
|
github.event.action == 'opened' ||
|
|
github.event.action == 'synchronize' ||
|
|
(github.event.action == 'labeled' && contains(github.event.pull_request.labels.*.name, 're-record-tests'))
|
|
runs-on: ubuntu-latest
|
|
outputs:
|
|
test-types: ${{ steps.generate-test-types.outputs.test-types }}
|
|
rerecord-tests: ${{ steps.check-rerecord-tests.outputs.rerecord-tests }}
|
|
|
|
steps:
|
|
- name: Checkout repository
|
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
|
|
- name: Generate test types
|
|
id: generate-test-types
|
|
run: |
|
|
# Get test directories dynamically, excluding non-test directories
|
|
TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d -printf "%f\n" |
|
|
grep -Ev "^(__pycache__|fixtures|test_cases|recordings)$" |
|
|
sort | jq -R -s -c 'split("\n")[:-1]')
|
|
echo "test-types=$TEST_TYPES" >> $GITHUB_OUTPUT
|
|
|
|
- name: Check if re-record-tests label exists
|
|
id: check-rerecord-tests
|
|
run: |
|
|
if [[ "${{ inputs.force-inference-mode }}" == "record" ]]; then
|
|
echo "rerecord-tests=true" >> $GITHUB_OUTPUT
|
|
elif [[ "${{ inputs.force-inference-mode }}" == "replay" ]]; then
|
|
echo "rerecord-tests=false" >> $GITHUB_OUTPUT
|
|
else
|
|
if [[ "${{ contains(github.event.pull_request.labels.*.name, 're-record-tests') }}" == "true" ]]; then
|
|
echo "rerecord-tests=true" >> $GITHUB_OUTPUT
|
|
else
|
|
echo "rerecord-tests=false" >> $GITHUB_OUTPUT
|
|
fi
|
|
fi
|
|
|
|
record-tests:
|
|
# Sequential job for recording to avoid SQLite conflicts
|
|
if: ${{ needs.discover-tests.outputs.rerecord-tests == 'true' }}
|
|
needs: discover-tests
|
|
runs-on: ubuntu-latest
|
|
|
|
permissions:
|
|
contents: write
|
|
pull-requests: write
|
|
|
|
steps:
|
|
- name: Checkout repository
|
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
with:
|
|
ref: ${{ github.event.pull_request.head.ref }}
|
|
fetch-depth: 0
|
|
|
|
- name: Setup test environment
|
|
uses: ./.github/actions/setup-test-environment
|
|
with:
|
|
python-version: "3.12" # Use single Python version for recording
|
|
client-version: "latest"
|
|
provider: ${{ inputs.test-provider || 'ollama' }}
|
|
|
|
- name: Run and record tests
|
|
uses: ./.github/actions/run-and-record-tests
|
|
with:
|
|
test-types: ${{ needs.discover-tests.outputs.test-types }}
|
|
stack-config: 'server:ci-tests' # recording must be done with server since more tests are run
|
|
provider: ${{ inputs.test-provider || 'ollama' }}
|
|
inference-mode: 'record'
|
|
|
|
run-replay-mode-tests:
|
|
# Skip this job if we're in recording mode (handled by record-tests job)
|
|
if: ${{ needs.discover-tests.outputs.rerecord-tests != 'true' }}
|
|
needs: discover-tests
|
|
runs-on: ubuntu-latest
|
|
|
|
strategy:
|
|
fail-fast: false
|
|
matrix:
|
|
client-type: [library, server]
|
|
# Use vllm on weekly schedule, otherwise use test-provider input (defaults to ollama)
|
|
provider: ${{ (github.event.schedule == '1 0 * * 0') && fromJSON('["vllm"]') || fromJSON(format('["{0}"]', github.event.inputs.test-provider || 'ollama')) }}
|
|
python-version: ["3.12", "3.13"]
|
|
client-version: ${{ (github.event.schedule == '0 0 * * 0' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
|
|
|
|
steps:
|
|
- name: Checkout repository
|
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
|
|
- name: Setup test environment
|
|
uses: ./.github/actions/setup-test-environment
|
|
with:
|
|
python-version: ${{ matrix.python-version }}
|
|
client-version: ${{ matrix.client-version }}
|
|
provider: ${{ matrix.provider }}
|
|
|
|
- name: Run and record tests
|
|
uses: ./.github/actions/run-and-record-tests
|
|
with:
|
|
test-types: ${{ needs.discover-tests.outputs.test-types }}
|
|
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
|
|
provider: ${{ matrix.provider }}
|
|
inference-mode: 'replay'
|