feat(tests): migrate to global "setups" system for test configuration (#3390)

This PR refactors the integration test system to use global "setups"
which provides better separation of concerns:

**suites = what to test, setups = how to configure.**

NOTE: if you naming suggestions, please provide feedback

Changes:
- New `tests/integration/setups.py` with global, reusable configurations
(ollama, vllm, gpt, claude)
- Modified `scripts/integration-tests.sh` options to match with the
underlying pytest options
    - Updated documentation to reflect the new global setup system

The main benefit is that setups can be reused across multiple suites
(e.g., use "gpt" with any suite) even though sometimes they could
specifically tailored for a suite (vision <> ollama-vision). It is now
easier to add new configurations without modifying existing suites.

Usage examples:
    - `pytest tests/integration --suite=responses --setup=gpt`
- `pytest tests/integration --suite=vision` # auto-selects
"ollama-vision" setup
    - `pytest tests/integration --suite=base --setup=vllm`
This commit is contained in:
Ashwin Bharambe 2025-09-09 15:50:56 -07:00 committed by GitHub
parent 28696c3f30
commit a8aa815b6a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 385 additions and 206 deletions

View file

@ -5,21 +5,22 @@ inputs:
stack-config:
description: 'Stack configuration to use'
required: true
provider:
description: 'Provider to use for tests'
required: true
setup:
description: 'Setup to use for tests (e.g., ollama, gpt, vllm)'
required: false
default: ''
inference-mode:
description: 'Inference mode (record or replay)'
required: true
test-suite:
suite:
description: 'Test suite to use: base, responses, vision, etc.'
required: false
default: ''
test-subdirs:
description: 'Comma-separated list of test subdirectories to run; overrides test-suite'
subdirs:
description: 'Comma-separated list of test subdirectories to run; overrides suite'
required: false
default: ''
test-pattern:
pattern:
description: 'Regex pattern to pass to pytest -k'
required: false
default: ''
@ -37,14 +38,23 @@ runs:
- name: Run Integration Tests
shell: bash
run: |
uv run --no-sync ./scripts/integration-tests.sh \
--stack-config '${{ inputs.stack-config }}' \
--provider '${{ inputs.provider }}' \
--test-subdirs '${{ inputs.test-subdirs }}' \
--test-pattern '${{ inputs.test-pattern }}' \
--inference-mode '${{ inputs.inference-mode }}' \
--test-suite '${{ inputs.test-suite }}' \
| tee pytest-${{ inputs.inference-mode }}.log
SCRIPT_ARGS="--stack-config ${{ inputs.stack-config }} --inference-mode ${{ inputs.inference-mode }}"
# Add optional arguments only if they are provided
if [ -n '${{ inputs.setup }}' ]; then
SCRIPT_ARGS="$SCRIPT_ARGS --setup ${{ inputs.setup }}"
fi
if [ -n '${{ inputs.suite }}' ]; then
SCRIPT_ARGS="$SCRIPT_ARGS --suite ${{ inputs.suite }}"
fi
if [ -n '${{ inputs.subdirs }}' ]; then
SCRIPT_ARGS="$SCRIPT_ARGS --subdirs ${{ inputs.subdirs }}"
fi
if [ -n '${{ inputs.pattern }}' ]; then
SCRIPT_ARGS="$SCRIPT_ARGS --pattern ${{ inputs.pattern }}"
fi
uv run --no-sync ./scripts/integration-tests.sh $SCRIPT_ARGS | tee pytest-${{ inputs.inference-mode }}.log
- name: Commit and push recordings
@ -58,7 +68,7 @@ runs:
echo "New recordings detected, committing and pushing"
git add tests/integration/recordings/
git commit -m "Recordings update from CI (test-suite: ${{ inputs.test-suite }})"
git commit -m "Recordings update from CI (suite: ${{ inputs.suite }})"
git fetch origin ${{ github.ref_name }}
git rebase origin/${{ github.ref_name }}
echo "Rebased successfully"

View file

@ -1,7 +1,7 @@
name: Setup Ollama
description: Start Ollama
inputs:
test-suite:
suite:
description: 'Test suite to use: base, responses, vision, etc.'
required: false
default: ''
@ -11,7 +11,7 @@ runs:
- name: Start Ollama
shell: bash
run: |
if [ "${{ inputs.test-suite }}" == "vision" ]; then
if [ "${{ inputs.suite }}" == "vision" ]; then
image="ollama-with-vision-model"
else
image="ollama-with-models"

View file

@ -8,11 +8,11 @@ inputs:
client-version:
description: 'Client version (latest or published)'
required: true
provider:
description: 'Provider to setup (ollama or vllm)'
required: true
setup:
description: 'Setup to configure (ollama, vllm, gpt, etc.)'
required: false
default: 'ollama'
test-suite:
suite:
description: 'Test suite to use: base, responses, vision, etc.'
required: false
default: ''
@ -30,13 +30,13 @@ runs:
client-version: ${{ inputs.client-version }}
- name: Setup ollama
if: ${{ inputs.provider == 'ollama' && inputs.inference-mode == 'record' }}
if: ${{ (inputs.setup == 'ollama' || inputs.setup == 'ollama-vision') && inputs.inference-mode == 'record' }}
uses: ./.github/actions/setup-ollama
with:
test-suite: ${{ inputs.test-suite }}
suite: ${{ inputs.suite }}
- name: Setup vllm
if: ${{ inputs.provider == 'vllm' && inputs.inference-mode == 'record' }}
if: ${{ inputs.setup == 'vllm' && inputs.inference-mode == 'record' }}
uses: ./.github/actions/setup-vllm
- name: Build Llama Stack

View file

@ -28,8 +28,8 @@ on:
description: 'Test against both the latest and published versions'
type: boolean
default: false
test-provider:
description: 'Test against a specific provider'
test-setup:
description: 'Test against a specific setup'
type: string
default: 'ollama'
@ -42,18 +42,18 @@ jobs:
run-replay-mode-tests:
runs-on: ubuntu-latest
name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client-type, matrix.provider, matrix.python-version, matrix.client-version, matrix.test-suite) }}
name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client-type, matrix.setup, matrix.python-version, matrix.client-version, matrix.suite) }}
strategy:
fail-fast: false
matrix:
client-type: [library, server]
# Use vllm on weekly schedule, otherwise use test-provider input (defaults to ollama)
provider: ${{ (github.event.schedule == '1 0 * * 0') && fromJSON('["vllm"]') || fromJSON(format('["{0}"]', github.event.inputs.test-provider || 'ollama')) }}
# Use vllm on weekly schedule, otherwise use test-setup input (defaults to ollama)
setup: ${{ (github.event.schedule == '1 0 * * 0') && fromJSON('["vllm"]') || fromJSON(format('["{0}"]', github.event.inputs.test-setup || 'ollama')) }}
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
test-suite: [base, vision]
suite: [base, vision]
steps:
- name: Checkout repository
@ -64,14 +64,14 @@ jobs:
with:
python-version: ${{ matrix.python-version }}
client-version: ${{ matrix.client-version }}
provider: ${{ matrix.provider }}
test-suite: ${{ matrix.test-suite }}
setup: ${{ matrix.setup }}
suite: ${{ matrix.suite }}
inference-mode: 'replay'
- name: Run tests
uses: ./.github/actions/run-and-record-tests
with:
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
provider: ${{ matrix.provider }}
setup: ${{ matrix.setup }}
inference-mode: 'replay'
test-suite: ${{ matrix.test-suite }}
suite: ${{ matrix.suite }}

View file

@ -10,19 +10,19 @@ run-name: Run the integration test suite from tests/integration
on:
workflow_dispatch:
inputs:
test-provider:
description: 'Test against a specific provider'
test-setup:
description: 'Test against a specific setup'
type: string
default: 'ollama'
test-suite:
suite:
description: 'Test suite to use: base, responses, vision, etc.'
type: string
default: ''
test-subdirs:
description: 'Comma-separated list of test subdirectories to run; overrides test-suite'
subdirs:
description: 'Comma-separated list of test subdirectories to run; overrides suite'
type: string
default: ''
test-pattern:
pattern:
description: 'Regex pattern to pass to pytest -k'
type: string
default: ''
@ -39,10 +39,10 @@ jobs:
run: |
echo "::group::Workflow Inputs"
echo "branch: ${{ github.ref_name }}"
echo "test-provider: ${{ inputs.test-provider }}"
echo "test-suite: ${{ inputs.test-suite }}"
echo "test-subdirs: ${{ inputs.test-subdirs }}"
echo "test-pattern: ${{ inputs.test-pattern }}"
echo "test-setup: ${{ inputs.test-setup }}"
echo "suite: ${{ inputs.suite }}"
echo "subdirs: ${{ inputs.subdirs }}"
echo "pattern: ${{ inputs.pattern }}"
echo "::endgroup::"
- name: Checkout repository
@ -55,16 +55,16 @@ jobs:
with:
python-version: "3.12" # Use single Python version for recording
client-version: "latest"
provider: ${{ inputs.test-provider || 'ollama' }}
test-suite: ${{ inputs.test-suite }}
setup: ${{ inputs.test-setup || 'ollama' }}
suite: ${{ inputs.suite }}
inference-mode: 'record'
- name: Run and record tests
uses: ./.github/actions/run-and-record-tests
with:
stack-config: 'server:ci-tests' # recording must be done with server since more tests are run
provider: ${{ inputs.test-provider || 'ollama' }}
setup: ${{ inputs.test-setup || 'ollama' }}
inference-mode: 'record'
test-suite: ${{ inputs.test-suite }}
test-subdirs: ${{ inputs.test-subdirs }}
test-pattern: ${{ inputs.test-pattern }}
suite: ${{ inputs.suite }}
subdirs: ${{ inputs.subdirs }}
pattern: ${{ inputs.pattern }}