llama-stack-mirror/.github/workflows/integration-tests.yml
Ashwin Bharambe 045a0c1d57
feat(tests): implement test isolation for inference recordings (#3681)
Uses test_id in request hashes and test-scoped subdirectories to prevent
cross-test contamination. Model list endpoints exclude test_id to enable
merging recordings from different servers.

Additionally, this PR adds a `record-if-missing` mode (which we will use
instead of `record` which records everything) which is very useful.

🤖 Co-authored with [Claude Code](https://claude.com/claude-code)

---------

Co-authored-by: Claude <noreply@anthropic.com>
2025-10-04 11:34:18 -07:00

86 lines
3.5 KiB
YAML

name: Integration Tests (Replay)
run-name: Run the integration test suites from tests/integration in replay mode
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
types: [opened, synchronize, reopened]
paths:
- 'llama_stack/**'
- '!llama_stack/ui/**'
- 'tests/**'
- 'uv.lock'
- 'pyproject.toml'
- '.github/workflows/integration-tests.yml' # This workflow
- '.github/actions/setup-ollama/action.yml'
- '.github/actions/setup-test-environment/action.yml'
- '.github/actions/run-and-record-tests/action.yml'
schedule:
# If changing the cron schedule, update the provider in the test-matrix job
- cron: '0 0 * * *' # (test latest client) Daily at 12 AM UTC
- cron: '1 0 * * 0' # (test vllm) Weekly on Sunday at 1 AM UTC
workflow_dispatch:
inputs:
test-all-client-versions:
description: 'Test against both the latest and published versions'
type: boolean
default: false
test-setup:
description: 'Test against a specific setup'
type: string
default: 'ollama'
concurrency:
# Skip concurrency for pushes to main - each commit should be tested independently
group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }}
cancel-in-progress: true
jobs:
run-replay-mode-tests:
runs-on: ubuntu-latest
name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client-type, matrix.config.setup, matrix.python-version, matrix.client-version, matrix.config.suite) }}
strategy:
fail-fast: false
matrix:
client-type: [library, server]
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
# Define (setup, suite) pairs - they are always matched and cannot be independent
# Weekly schedule (Sun 1 AM): vllm+base
# Input test-setup=ollama-vision: ollama-vision+vision
# Default (including test-setup=ollama): both ollama+base and ollama-vision+vision
config: >-
${{
github.event.schedule == '1 0 * * 0'
&& fromJSON('[{"setup": "vllm", "suite": "base"}]')
|| github.event.inputs.test-setup == 'ollama-vision'
&& fromJSON('[{"setup": "ollama-vision", "suite": "vision"}]')
|| fromJSON('[{"setup": "ollama", "suite": "base"}, {"setup": "ollama-vision", "suite": "vision"}]')
}}
steps:
- name: Checkout repository
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
- name: Setup test environment
uses: ./.github/actions/setup-test-environment
with:
python-version: ${{ matrix.python-version }}
client-version: ${{ matrix.client-version }}
setup: ${{ matrix.config.setup }}
suite: ${{ matrix.config.suite }}
inference-mode: 'replay'
- name: Run tests
uses: ./.github/actions/run-and-record-tests
with:
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
setup: ${{ matrix.config.setup }}
inference-mode: 'replay'
suite: ${{ matrix.config.suite }}