fix(ci): simplify integration tests replay mode (#2997)

We are going to split record and replay workflows completely to simplify
the concurrency key design.

We can add vision tests by just adding to our matrix.
This commit is contained in:
Ashwin Bharambe 2025-07-31 15:18:18 -07:00 committed by GitHub
parent 218c89fff1
commit f4489eeb83
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 19 additions and 220 deletions

View file

@ -192,7 +192,7 @@ runs:
if: ${{ always() }}
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:
name: ${{ inputs.inference-mode }}-logs-${{ github.run_id }}-${{ github.run_attempt || '' }}-${{ inputs.provider }}-${{ inputs.run-vision-tests }}-${{ inputs.stack-config }}
name: logs-${{ github.run_id }}-${{ github.run_attempt || '' }}-${{ strategy.job-index }}
path: |
*.log
retention-days: 1

View file

@ -16,6 +16,9 @@ inputs:
description: 'Whether to setup provider for vision tests'
required: false
default: 'false'
inference-mode:
description: 'Inference mode (record or replay)'
required: true
runs:
using: 'composite'
@ -27,13 +30,13 @@ runs:
client-version: ${{ inputs.client-version }}
- name: Setup ollama
if: ${{ inputs.provider == 'ollama' }}
if: ${{ inputs.provider == 'ollama' && inputs.inference-mode == 'record' }}
uses: ./.github/actions/setup-ollama
with:
run-vision-tests: ${{ inputs.run-vision-tests }}
- name: Setup vllm
if: ${{ inputs.provider == 'vllm' }}
if: ${{ inputs.provider == 'vllm' && inputs.inference-mode == 'record' }}
uses: ./.github/actions/setup-vllm
- name: Build Llama Stack

View file

@ -8,9 +8,8 @@ Llama Stack uses GitHub Actions for Continuous Integration (CI). Below is a tabl
| Installer CI | [install-script-ci.yml](install-script-ci.yml) | Test the installation script |
| Integration Auth Tests | [integration-auth-tests.yml](integration-auth-tests.yml) | Run the integration test suite with Kubernetes authentication |
| SqlStore Integration Tests | [integration-sql-store-tests.yml](integration-sql-store-tests.yml) | Run the integration test suite with SqlStore |
| Integration Tests | [integration-tests.yml](integration-tests.yml) | Run the integration test suite from tests/integration |
| Integration Tests (Replay) | [integration-tests.yml](integration-tests.yml) | Run the integration test suite from tests/integration in replay mode |
| Vector IO Integration Tests | [integration-vector-io-tests.yml](integration-vector-io-tests.yml) | Run the integration test suite with various VectorIO providers |
| Vision Inference Integration Tests | [integration-vision-tests.yml](integration-vision-tests.yml) | Run vision inference integration test suite from tests/integration/inference |
| Pre-commit | [pre-commit.yml](pre-commit.yml) | Run pre-commit checks |
| Test Llama Stack Build | [providers-build.yml](providers-build.yml) | Test llama stack build |
| Python Package Build Test | [python-build-test.yml](python-build-test.yml) | Test building the llama-stack PyPI project |

View file

@ -1,13 +1,13 @@
name: Integration Tests
name: Integration Tests (Replay)
run-name: Run the integration test suite from tests/integration
run-name: Run the integration test suite from tests/integration in replay mode
on:
push:
branches: [ main ]
pull_request_target:
pull_request:
branches: [ main ]
types: [opened, synchronize, labeled]
types: [opened, synchronize, reopened]
paths:
- 'llama_stack/**'
- 'tests/**'
@ -31,35 +31,17 @@ on:
description: 'Test against a specific provider'
type: string
default: 'ollama'
force-inference-mode:
description: 'Force inference mode (record or replay)'
type: string
default: ''
concurrency:
# Skip concurrency for pushes to main - each commit should be tested independently
# For other events, create concurrency groups:
# ${{ github.workflow }}-${{ github.ref }}-rerecord (for labeled events with re-record-tests label)
# ${{ github.workflow }}-${{ github.ref }}-replay (for all non-labeled events)
# ${{ github.workflow }}-${{ github.ref }}-no-run (for labeled events without re-record-tests label)
# The "no-run" group ensures that irrelevant label events don't interfere with the real workflows.
group: >-
${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }}-${{
github.event.action == 'labeled' && (
contains(github.event.pull_request.labels.*.name, 're-record-tests') && 'rerecord' || 'no-run'
) || 'replay'
}}
group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }}
cancel-in-progress: true
jobs:
discover-tests:
if: |
github.event.action != 'labeled' ||
contains(github.event.pull_request.labels.*.name, 're-record-tests')
runs-on: ubuntu-latest
outputs:
test-types: ${{ steps.generate-test-types.outputs.test-types }}
rerecord-tests: ${{ steps.check-rerecord-tests.outputs.rerecord-tests }}
steps:
- name: Checkout repository
@ -69,61 +51,13 @@ jobs:
id: generate-test-types
run: |
# Get test directories dynamically, excluding non-test directories
# NOTE: we are excluding post_training since the tests take too long
TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d -printf "%f\n" |
grep -Ev "^(__pycache__|fixtures|test_cases|recordings)$" |
grep -Ev "^(__pycache__|fixtures|test_cases|recordings|post_training)$" |
sort | jq -R -s -c 'split("\n")[:-1]')
echo "test-types=$TEST_TYPES" >> $GITHUB_OUTPUT
- name: Check if re-record-tests label exists
id: check-rerecord-tests
run: |
if [[ "${{ inputs.force-inference-mode }}" == "record" ]]; then
echo "rerecord-tests=true" >> $GITHUB_OUTPUT
elif [[ "${{ inputs.force-inference-mode }}" == "replay" ]]; then
echo "rerecord-tests=false" >> $GITHUB_OUTPUT
else
if [[ "${{ contains(github.event.pull_request.labels.*.name, 're-record-tests') }}" == "true" ]]; then
echo "rerecord-tests=true" >> $GITHUB_OUTPUT
else
echo "rerecord-tests=false" >> $GITHUB_OUTPUT
fi
fi
record-tests:
# Sequential job for recording to avoid SQLite conflicts
if: ${{ needs.discover-tests.outputs.rerecord-tests == 'true' }}
needs: discover-tests
runs-on: ubuntu-latest
permissions:
contents: write
pull-requests: write
steps:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
ref: ${{ github.event.pull_request.head.ref }}
fetch-depth: 0
- name: Setup test environment
uses: ./.github/actions/setup-test-environment
with:
python-version: "3.12" # Use single Python version for recording
client-version: "latest"
provider: ${{ inputs.test-provider || 'ollama' }}
- name: Run and record tests
uses: ./.github/actions/run-and-record-tests
with:
test-types: ${{ needs.discover-tests.outputs.test-types }}
stack-config: 'server:ci-tests' # recording must be done with server since more tests are run
provider: ${{ inputs.test-provider || 'ollama' }}
inference-mode: 'record'
run-replay-mode-tests:
# Skip this job if we're in recording mode (handled by record-tests job)
if: ${{ needs.discover-tests.outputs.rerecord-tests != 'true' }}
needs: discover-tests
runs-on: ubuntu-latest
@ -135,6 +69,7 @@ jobs:
provider: ${{ (github.event.schedule == '1 0 * * 0') && fromJSON('["vllm"]') || fromJSON(format('["{0}"]', github.event.inputs.test-provider || 'ollama')) }}
python-version: ["3.12", "3.13"]
client-version: ${{ (github.event.schedule == '0 0 * * 0' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
run-vision-tests: ['true', 'false']
steps:
- name: Checkout repository
@ -146,11 +81,14 @@ jobs:
python-version: ${{ matrix.python-version }}
client-version: ${{ matrix.client-version }}
provider: ${{ matrix.provider }}
run-vision-tests: ${{ matrix.run-vision-tests }}
inference-mode: 'replay'
- name: Run and record tests
- name: Run tests
uses: ./.github/actions/run-and-record-tests
with:
test-types: ${{ needs.discover-tests.outputs.test-types }}
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
provider: ${{ matrix.provider }}
inference-mode: 'replay'
run-vision-tests: ${{ matrix.run-vision-tests }}

View file

@ -1,141 +0,0 @@
name: Vision Inference Integration Tests
run-name: Run vision inference integration test suite from tests/integration/inference
on:
push:
branches: [ main ]
pull_request_target:
branches: [ main ]
types: [opened, synchronize, labeled]
paths:
- 'llama_stack/**'
- 'tests/**'
- 'uv.lock'
- 'pyproject.toml'
- '.github/workflows/integration-vision-tests.yml' # This workflow
- '.github/actions/setup-ollama/action.yml'
- '.github/actions/setup-test-environment/action.yml'
- '.github/actions/run-and-record-tests/action.yml'
workflow_dispatch:
inputs:
test-all-client-versions:
description: 'Test against both the latest and published versions'
type: boolean
default: false
force-inference-mode:
description: 'Force inference mode (record or replay)'
type: string
default: ''
concurrency:
# Skip concurrency for pushes to main - each commit should be tested independently
# For other events, create concurrency groups:
# ${{ github.workflow }}-${{ github.ref }}-rerecord (for labeled events with re-record-tests label)
# ${{ github.workflow }}-${{ github.ref }}-replay (for all non-labeled events)
# ${{ github.workflow }}-${{ github.ref }}-no-run (for labeled events without re-record-tests label)
# The "no-run" group ensures that irrelevant label events don't interfere with the real workflows.
group: >-
${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }}-${{
github.event.action == 'labeled' && (
contains(github.event.pull_request.labels.*.name, 're-record-tests') && 'rerecord' || 'no-run'
) || 'replay'
}}
cancel-in-progress: true
jobs:
discover-tests:
if: |
github.event.action != 'labeled' ||
contains(github.event.pull_request.labels.*.name, 're-record-tests')
runs-on: ubuntu-latest
outputs:
rerecord-tests: ${{ steps.check-rerecord-tests.outputs.rerecord-tests }}
steps:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Check if re-record-tests label exists
id: check-rerecord-tests
run: |
if [[ "${{ inputs.force-inference-mode }}" == "record" ]]; then
echo "rerecord-tests=true" >> $GITHUB_OUTPUT
elif [[ "${{ inputs.force-inference-mode }}" == "replay" ]]; then
echo "rerecord-tests=false" >> $GITHUB_OUTPUT
else
if [[ "${{ contains(github.event.pull_request.labels.*.name, 're-record-tests') }}" == "true" ]]; then
echo "rerecord-tests=true" >> $GITHUB_OUTPUT
else
echo "rerecord-tests=false" >> $GITHUB_OUTPUT
fi
fi
record-tests:
# Sequential job for recording to avoid SQLite conflicts
if: ${{ needs.discover-tests.outputs.rerecord-tests == 'true' }}
needs: discover-tests
runs-on: ubuntu-latest
permissions:
contents: write
pull-requests: write
steps:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
ref: ${{ github.event.pull_request.head.ref }}
fetch-depth: 0
- name: Setup test environment
uses: ./.github/actions/setup-test-environment
with:
python-version: "3.12" # Use single Python version for recording
client-version: "latest"
provider: 'ollama'
run-vision-tests: 'true'
- name: Run and record tests
uses: ./.github/actions/run-and-record-tests
with:
test-types: '["vision"]'
stack-config: 'server:ci-tests' # re-recording must be done in server mode
provider: 'ollama'
inference-mode: 'record'
run-vision-tests: 'true'
run-replay-mode-tests:
# Skip this job if we're in recording mode (handled by record-tests job)
if: ${{ needs.discover-tests.outputs.rerecord-tests != 'true' }}
needs: discover-tests
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
client-type: [library]
provider: [ollama]
python-version: ["3.12"]
client-version: ["latest"]
steps:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Setup test environment
uses: ./.github/actions/setup-test-environment
with:
python-version: ${{ matrix.python-version }}
client-version: ${{ matrix.client-version }}
provider: ${{ matrix.provider }}
run-vision-tests: 'true'
- name: Run and record tests
uses: ./.github/actions/run-and-record-tests
with:
test-types: '["vision"]'
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
provider: ${{ matrix.provider }}
inference-mode: 'replay'
run-vision-tests: 'true'