mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-12 13:00:39 +00:00
fix(ci): simplify integration tests replay mode (#2997)
We are going to split record and replay workflows completely to simplify the concurrency key design. We can add vision tests by just adding to our matrix.
This commit is contained in:
parent
218c89fff1
commit
f4489eeb83
5 changed files with 19 additions and 220 deletions
|
@ -192,7 +192,7 @@ runs:
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||||
with:
|
with:
|
||||||
name: ${{ inputs.inference-mode }}-logs-${{ github.run_id }}-${{ github.run_attempt || '' }}-${{ inputs.provider }}-${{ inputs.run-vision-tests }}-${{ inputs.stack-config }}
|
name: logs-${{ github.run_id }}-${{ github.run_attempt || '' }}-${{ strategy.job-index }}
|
||||||
path: |
|
path: |
|
||||||
*.log
|
*.log
|
||||||
retention-days: 1
|
retention-days: 1
|
||||||
|
|
|
@ -16,6 +16,9 @@ inputs:
|
||||||
description: 'Whether to setup provider for vision tests'
|
description: 'Whether to setup provider for vision tests'
|
||||||
required: false
|
required: false
|
||||||
default: 'false'
|
default: 'false'
|
||||||
|
inference-mode:
|
||||||
|
description: 'Inference mode (record or replay)'
|
||||||
|
required: true
|
||||||
|
|
||||||
runs:
|
runs:
|
||||||
using: 'composite'
|
using: 'composite'
|
||||||
|
@ -27,13 +30,13 @@ runs:
|
||||||
client-version: ${{ inputs.client-version }}
|
client-version: ${{ inputs.client-version }}
|
||||||
|
|
||||||
- name: Setup ollama
|
- name: Setup ollama
|
||||||
if: ${{ inputs.provider == 'ollama' }}
|
if: ${{ inputs.provider == 'ollama' && inputs.inference-mode == 'record' }}
|
||||||
uses: ./.github/actions/setup-ollama
|
uses: ./.github/actions/setup-ollama
|
||||||
with:
|
with:
|
||||||
run-vision-tests: ${{ inputs.run-vision-tests }}
|
run-vision-tests: ${{ inputs.run-vision-tests }}
|
||||||
|
|
||||||
- name: Setup vllm
|
- name: Setup vllm
|
||||||
if: ${{ inputs.provider == 'vllm' }}
|
if: ${{ inputs.provider == 'vllm' && inputs.inference-mode == 'record' }}
|
||||||
uses: ./.github/actions/setup-vllm
|
uses: ./.github/actions/setup-vllm
|
||||||
|
|
||||||
- name: Build Llama Stack
|
- name: Build Llama Stack
|
||||||
|
|
3
.github/workflows/README.md
vendored
3
.github/workflows/README.md
vendored
|
@ -8,9 +8,8 @@ Llama Stack uses GitHub Actions for Continuous Integration (CI). Below is a tabl
|
||||||
| Installer CI | [install-script-ci.yml](install-script-ci.yml) | Test the installation script |
|
| Installer CI | [install-script-ci.yml](install-script-ci.yml) | Test the installation script |
|
||||||
| Integration Auth Tests | [integration-auth-tests.yml](integration-auth-tests.yml) | Run the integration test suite with Kubernetes authentication |
|
| Integration Auth Tests | [integration-auth-tests.yml](integration-auth-tests.yml) | Run the integration test suite with Kubernetes authentication |
|
||||||
| SqlStore Integration Tests | [integration-sql-store-tests.yml](integration-sql-store-tests.yml) | Run the integration test suite with SqlStore |
|
| SqlStore Integration Tests | [integration-sql-store-tests.yml](integration-sql-store-tests.yml) | Run the integration test suite with SqlStore |
|
||||||
| Integration Tests | [integration-tests.yml](integration-tests.yml) | Run the integration test suite from tests/integration |
|
| Integration Tests (Replay) | [integration-tests.yml](integration-tests.yml) | Run the integration test suite from tests/integration in replay mode |
|
||||||
| Vector IO Integration Tests | [integration-vector-io-tests.yml](integration-vector-io-tests.yml) | Run the integration test suite with various VectorIO providers |
|
| Vector IO Integration Tests | [integration-vector-io-tests.yml](integration-vector-io-tests.yml) | Run the integration test suite with various VectorIO providers |
|
||||||
| Vision Inference Integration Tests | [integration-vision-tests.yml](integration-vision-tests.yml) | Run vision inference integration test suite from tests/integration/inference |
|
|
||||||
| Pre-commit | [pre-commit.yml](pre-commit.yml) | Run pre-commit checks |
|
| Pre-commit | [pre-commit.yml](pre-commit.yml) | Run pre-commit checks |
|
||||||
| Test Llama Stack Build | [providers-build.yml](providers-build.yml) | Test llama stack build |
|
| Test Llama Stack Build | [providers-build.yml](providers-build.yml) | Test llama stack build |
|
||||||
| Python Package Build Test | [python-build-test.yml](python-build-test.yml) | Test building the llama-stack PyPI project |
|
| Python Package Build Test | [python-build-test.yml](python-build-test.yml) | Test building the llama-stack PyPI project |
|
||||||
|
|
86
.github/workflows/integration-tests.yml
vendored
86
.github/workflows/integration-tests.yml
vendored
|
@ -1,13 +1,13 @@
|
||||||
name: Integration Tests
|
name: Integration Tests (Replay)
|
||||||
|
|
||||||
run-name: Run the integration test suite from tests/integration
|
run-name: Run the integration test suite from tests/integration in replay mode
|
||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
branches: [ main ]
|
branches: [ main ]
|
||||||
pull_request_target:
|
pull_request:
|
||||||
branches: [ main ]
|
branches: [ main ]
|
||||||
types: [opened, synchronize, labeled]
|
types: [opened, synchronize, reopened]
|
||||||
paths:
|
paths:
|
||||||
- 'llama_stack/**'
|
- 'llama_stack/**'
|
||||||
- 'tests/**'
|
- 'tests/**'
|
||||||
|
@ -31,35 +31,17 @@ on:
|
||||||
description: 'Test against a specific provider'
|
description: 'Test against a specific provider'
|
||||||
type: string
|
type: string
|
||||||
default: 'ollama'
|
default: 'ollama'
|
||||||
force-inference-mode:
|
|
||||||
description: 'Force inference mode (record or replay)'
|
|
||||||
type: string
|
|
||||||
default: ''
|
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
# Skip concurrency for pushes to main - each commit should be tested independently
|
# Skip concurrency for pushes to main - each commit should be tested independently
|
||||||
# For other events, create concurrency groups:
|
group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }}
|
||||||
# ${{ github.workflow }}-${{ github.ref }}-rerecord (for labeled events with re-record-tests label)
|
|
||||||
# ${{ github.workflow }}-${{ github.ref }}-replay (for all non-labeled events)
|
|
||||||
# ${{ github.workflow }}-${{ github.ref }}-no-run (for labeled events without re-record-tests label)
|
|
||||||
# The "no-run" group ensures that irrelevant label events don't interfere with the real workflows.
|
|
||||||
group: >-
|
|
||||||
${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }}-${{
|
|
||||||
github.event.action == 'labeled' && (
|
|
||||||
contains(github.event.pull_request.labels.*.name, 're-record-tests') && 'rerecord' || 'no-run'
|
|
||||||
) || 'replay'
|
|
||||||
}}
|
|
||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
discover-tests:
|
discover-tests:
|
||||||
if: |
|
|
||||||
github.event.action != 'labeled' ||
|
|
||||||
contains(github.event.pull_request.labels.*.name, 're-record-tests')
|
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
outputs:
|
outputs:
|
||||||
test-types: ${{ steps.generate-test-types.outputs.test-types }}
|
test-types: ${{ steps.generate-test-types.outputs.test-types }}
|
||||||
rerecord-tests: ${{ steps.check-rerecord-tests.outputs.rerecord-tests }}
|
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
|
@ -69,61 +51,13 @@ jobs:
|
||||||
id: generate-test-types
|
id: generate-test-types
|
||||||
run: |
|
run: |
|
||||||
# Get test directories dynamically, excluding non-test directories
|
# Get test directories dynamically, excluding non-test directories
|
||||||
|
# NOTE: we are excluding post_training since the tests take too long
|
||||||
TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d -printf "%f\n" |
|
TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d -printf "%f\n" |
|
||||||
grep -Ev "^(__pycache__|fixtures|test_cases|recordings)$" |
|
grep -Ev "^(__pycache__|fixtures|test_cases|recordings|post_training)$" |
|
||||||
sort | jq -R -s -c 'split("\n")[:-1]')
|
sort | jq -R -s -c 'split("\n")[:-1]')
|
||||||
echo "test-types=$TEST_TYPES" >> $GITHUB_OUTPUT
|
echo "test-types=$TEST_TYPES" >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
- name: Check if re-record-tests label exists
|
|
||||||
id: check-rerecord-tests
|
|
||||||
run: |
|
|
||||||
if [[ "${{ inputs.force-inference-mode }}" == "record" ]]; then
|
|
||||||
echo "rerecord-tests=true" >> $GITHUB_OUTPUT
|
|
||||||
elif [[ "${{ inputs.force-inference-mode }}" == "replay" ]]; then
|
|
||||||
echo "rerecord-tests=false" >> $GITHUB_OUTPUT
|
|
||||||
else
|
|
||||||
if [[ "${{ contains(github.event.pull_request.labels.*.name, 're-record-tests') }}" == "true" ]]; then
|
|
||||||
echo "rerecord-tests=true" >> $GITHUB_OUTPUT
|
|
||||||
else
|
|
||||||
echo "rerecord-tests=false" >> $GITHUB_OUTPUT
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
record-tests:
|
|
||||||
# Sequential job for recording to avoid SQLite conflicts
|
|
||||||
if: ${{ needs.discover-tests.outputs.rerecord-tests == 'true' }}
|
|
||||||
needs: discover-tests
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
|
|
||||||
permissions:
|
|
||||||
contents: write
|
|
||||||
pull-requests: write
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
||||||
with:
|
|
||||||
ref: ${{ github.event.pull_request.head.ref }}
|
|
||||||
fetch-depth: 0
|
|
||||||
|
|
||||||
- name: Setup test environment
|
|
||||||
uses: ./.github/actions/setup-test-environment
|
|
||||||
with:
|
|
||||||
python-version: "3.12" # Use single Python version for recording
|
|
||||||
client-version: "latest"
|
|
||||||
provider: ${{ inputs.test-provider || 'ollama' }}
|
|
||||||
|
|
||||||
- name: Run and record tests
|
|
||||||
uses: ./.github/actions/run-and-record-tests
|
|
||||||
with:
|
|
||||||
test-types: ${{ needs.discover-tests.outputs.test-types }}
|
|
||||||
stack-config: 'server:ci-tests' # recording must be done with server since more tests are run
|
|
||||||
provider: ${{ inputs.test-provider || 'ollama' }}
|
|
||||||
inference-mode: 'record'
|
|
||||||
|
|
||||||
run-replay-mode-tests:
|
run-replay-mode-tests:
|
||||||
# Skip this job if we're in recording mode (handled by record-tests job)
|
|
||||||
if: ${{ needs.discover-tests.outputs.rerecord-tests != 'true' }}
|
|
||||||
needs: discover-tests
|
needs: discover-tests
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
@ -135,6 +69,7 @@ jobs:
|
||||||
provider: ${{ (github.event.schedule == '1 0 * * 0') && fromJSON('["vllm"]') || fromJSON(format('["{0}"]', github.event.inputs.test-provider || 'ollama')) }}
|
provider: ${{ (github.event.schedule == '1 0 * * 0') && fromJSON('["vllm"]') || fromJSON(format('["{0}"]', github.event.inputs.test-provider || 'ollama')) }}
|
||||||
python-version: ["3.12", "3.13"]
|
python-version: ["3.12", "3.13"]
|
||||||
client-version: ${{ (github.event.schedule == '0 0 * * 0' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
|
client-version: ${{ (github.event.schedule == '0 0 * * 0' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
|
||||||
|
run-vision-tests: ['true', 'false']
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
|
@ -146,11 +81,14 @@ jobs:
|
||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
client-version: ${{ matrix.client-version }}
|
client-version: ${{ matrix.client-version }}
|
||||||
provider: ${{ matrix.provider }}
|
provider: ${{ matrix.provider }}
|
||||||
|
run-vision-tests: ${{ matrix.run-vision-tests }}
|
||||||
|
inference-mode: 'replay'
|
||||||
|
|
||||||
- name: Run and record tests
|
- name: Run tests
|
||||||
uses: ./.github/actions/run-and-record-tests
|
uses: ./.github/actions/run-and-record-tests
|
||||||
with:
|
with:
|
||||||
test-types: ${{ needs.discover-tests.outputs.test-types }}
|
test-types: ${{ needs.discover-tests.outputs.test-types }}
|
||||||
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
|
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
|
||||||
provider: ${{ matrix.provider }}
|
provider: ${{ matrix.provider }}
|
||||||
inference-mode: 'replay'
|
inference-mode: 'replay'
|
||||||
|
run-vision-tests: ${{ matrix.run-vision-tests }}
|
||||||
|
|
141
.github/workflows/integration-vision-tests.yml
vendored
141
.github/workflows/integration-vision-tests.yml
vendored
|
@ -1,141 +0,0 @@
|
||||||
name: Vision Inference Integration Tests
|
|
||||||
|
|
||||||
run-name: Run vision inference integration test suite from tests/integration/inference
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches: [ main ]
|
|
||||||
pull_request_target:
|
|
||||||
branches: [ main ]
|
|
||||||
types: [opened, synchronize, labeled]
|
|
||||||
paths:
|
|
||||||
- 'llama_stack/**'
|
|
||||||
- 'tests/**'
|
|
||||||
- 'uv.lock'
|
|
||||||
- 'pyproject.toml'
|
|
||||||
- '.github/workflows/integration-vision-tests.yml' # This workflow
|
|
||||||
- '.github/actions/setup-ollama/action.yml'
|
|
||||||
- '.github/actions/setup-test-environment/action.yml'
|
|
||||||
- '.github/actions/run-and-record-tests/action.yml'
|
|
||||||
workflow_dispatch:
|
|
||||||
inputs:
|
|
||||||
test-all-client-versions:
|
|
||||||
description: 'Test against both the latest and published versions'
|
|
||||||
type: boolean
|
|
||||||
default: false
|
|
||||||
force-inference-mode:
|
|
||||||
description: 'Force inference mode (record or replay)'
|
|
||||||
type: string
|
|
||||||
default: ''
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
# Skip concurrency for pushes to main - each commit should be tested independently
|
|
||||||
# For other events, create concurrency groups:
|
|
||||||
# ${{ github.workflow }}-${{ github.ref }}-rerecord (for labeled events with re-record-tests label)
|
|
||||||
# ${{ github.workflow }}-${{ github.ref }}-replay (for all non-labeled events)
|
|
||||||
# ${{ github.workflow }}-${{ github.ref }}-no-run (for labeled events without re-record-tests label)
|
|
||||||
# The "no-run" group ensures that irrelevant label events don't interfere with the real workflows.
|
|
||||||
group: >-
|
|
||||||
${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }}-${{
|
|
||||||
github.event.action == 'labeled' && (
|
|
||||||
contains(github.event.pull_request.labels.*.name, 're-record-tests') && 'rerecord' || 'no-run'
|
|
||||||
) || 'replay'
|
|
||||||
}}
|
|
||||||
cancel-in-progress: true
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
discover-tests:
|
|
||||||
if: |
|
|
||||||
github.event.action != 'labeled' ||
|
|
||||||
contains(github.event.pull_request.labels.*.name, 're-record-tests')
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
outputs:
|
|
||||||
rerecord-tests: ${{ steps.check-rerecord-tests.outputs.rerecord-tests }}
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
||||||
|
|
||||||
- name: Check if re-record-tests label exists
|
|
||||||
id: check-rerecord-tests
|
|
||||||
run: |
|
|
||||||
if [[ "${{ inputs.force-inference-mode }}" == "record" ]]; then
|
|
||||||
echo "rerecord-tests=true" >> $GITHUB_OUTPUT
|
|
||||||
elif [[ "${{ inputs.force-inference-mode }}" == "replay" ]]; then
|
|
||||||
echo "rerecord-tests=false" >> $GITHUB_OUTPUT
|
|
||||||
else
|
|
||||||
if [[ "${{ contains(github.event.pull_request.labels.*.name, 're-record-tests') }}" == "true" ]]; then
|
|
||||||
echo "rerecord-tests=true" >> $GITHUB_OUTPUT
|
|
||||||
else
|
|
||||||
echo "rerecord-tests=false" >> $GITHUB_OUTPUT
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
record-tests:
|
|
||||||
# Sequential job for recording to avoid SQLite conflicts
|
|
||||||
if: ${{ needs.discover-tests.outputs.rerecord-tests == 'true' }}
|
|
||||||
needs: discover-tests
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
|
|
||||||
permissions:
|
|
||||||
contents: write
|
|
||||||
pull-requests: write
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
||||||
with:
|
|
||||||
ref: ${{ github.event.pull_request.head.ref }}
|
|
||||||
fetch-depth: 0
|
|
||||||
|
|
||||||
- name: Setup test environment
|
|
||||||
uses: ./.github/actions/setup-test-environment
|
|
||||||
with:
|
|
||||||
python-version: "3.12" # Use single Python version for recording
|
|
||||||
client-version: "latest"
|
|
||||||
provider: 'ollama'
|
|
||||||
run-vision-tests: 'true'
|
|
||||||
|
|
||||||
- name: Run and record tests
|
|
||||||
uses: ./.github/actions/run-and-record-tests
|
|
||||||
with:
|
|
||||||
test-types: '["vision"]'
|
|
||||||
stack-config: 'server:ci-tests' # re-recording must be done in server mode
|
|
||||||
provider: 'ollama'
|
|
||||||
inference-mode: 'record'
|
|
||||||
run-vision-tests: 'true'
|
|
||||||
|
|
||||||
run-replay-mode-tests:
|
|
||||||
# Skip this job if we're in recording mode (handled by record-tests job)
|
|
||||||
if: ${{ needs.discover-tests.outputs.rerecord-tests != 'true' }}
|
|
||||||
needs: discover-tests
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
|
|
||||||
strategy:
|
|
||||||
fail-fast: false
|
|
||||||
matrix:
|
|
||||||
client-type: [library]
|
|
||||||
provider: [ollama]
|
|
||||||
python-version: ["3.12"]
|
|
||||||
client-version: ["latest"]
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
||||||
|
|
||||||
- name: Setup test environment
|
|
||||||
uses: ./.github/actions/setup-test-environment
|
|
||||||
with:
|
|
||||||
python-version: ${{ matrix.python-version }}
|
|
||||||
client-version: ${{ matrix.client-version }}
|
|
||||||
provider: ${{ matrix.provider }}
|
|
||||||
run-vision-tests: 'true'
|
|
||||||
|
|
||||||
- name: Run and record tests
|
|
||||||
uses: ./.github/actions/run-and-record-tests
|
|
||||||
with:
|
|
||||||
test-types: '["vision"]'
|
|
||||||
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
|
|
||||||
provider: ${{ matrix.provider }}
|
|
||||||
inference-mode: 'replay'
|
|
||||||
run-vision-tests: 'true'
|
|
Loading…
Add table
Add a link
Reference in a new issue