llama-stack-mirror/.github/workflows/integration-tests.yml

name: Integration Tests (Replay)

run-name: Run the integration test suites from tests/integration in replay mode

on:
  push:
    branches:
      - main
      - 'release-[0-9]+.[0-9]+.x'
  pull_request:
    branches:
      - main
      - 'release-[0-9]+.[0-9]+.x'
    types: [opened, synchronize, reopened]
    paths:
      - 'src/llama_stack/**'
      - '!src/llama_stack_ui/**'
      - 'tests/**'
      - 'uv.lock'
      - 'pyproject.toml'
      - '.github/workflows/integration-tests.yml' # This workflow
      - '.github/actions/setup-ollama/action.yml'
      - '.github/actions/setup-test-environment/action.yml'
      - '.github/actions/run-and-record-tests/action.yml'
      - 'scripts/integration-tests.sh'
      - 'scripts/generate_ci_matrix.py'
  schedule:
    # If changing the cron schedule, update the provider in the test-matrix job
    - cron: '0 0 * * *'  # (test latest client) Daily at 12 AM UTC
  workflow_dispatch:
    inputs:
      test-all-client-versions:
        description: 'Test against both the latest and published versions'
        type: boolean
        default: false
      test-setup:
        description: 'Test against a specific setup'
        type: string
        default: 'ollama'

concurrency:
  # Skip concurrency for pushes to main - each commit should be tested independently
  group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }}
  cancel-in-progress: true

jobs:
  generate-matrix:
    runs-on: ubuntu-latest
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
    steps:
      - name: Checkout repository
        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0

      - name: Generate test matrix
        id: set-matrix
        run: |
          # Generate matrix from CI_MATRIX in tests/integration/suites.py
          # Supports schedule-based and manual input overrides
          MATRIX=$(PYTHONPATH=. python3 scripts/generate_ci_matrix.py \
            --schedule "${{ github.event.schedule }}" \
            --test-setup "${{ github.event.inputs.test-setup }}")
          echo "matrix=$MATRIX" >> $GITHUB_OUTPUT
          echo "Generated matrix: $MATRIX"

  run-replay-mode-tests:
    needs: generate-matrix
    runs-on: ubuntu-latest
    name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client, matrix.config.setup, matrix.python-version, matrix.client-version, matrix.config.suite) }}

    strategy:
      fail-fast: false
      matrix:
        client: [library, docker, server]
        # Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
        python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
        client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
        # Test configurations: Generated from CI_MATRIX in tests/integration/suites.py
        # See scripts/generate_ci_matrix.py for generation logic
        config: ${{ fromJSON(needs.generate-matrix.outputs.matrix).include }}

    steps:
      - name: Checkout repository
        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0

      - name: Setup test environment
        if: ${{ matrix.config.allowed_clients == null || contains(matrix.config.allowed_clients, matrix.client) }}
        uses: ./.github/actions/setup-test-environment
        with:
          python-version: ${{ matrix.python-version }}
          client-version: ${{ matrix.client-version }}
          setup: ${{ matrix.config.setup }}
          suite: ${{ matrix.config.suite }}
          inference-mode: 'replay'

      - name: Setup Node.js for TypeScript client tests
        if: ${{ matrix.client == 'server' }}
        uses: actions/setup-node@v4
        with:
          node-version: '20'
          cache: 'npm'
          cache-dependency-path: tests/integration/client-typescript/package-lock.json

      - name: Run tests
        if: ${{ matrix.config.allowed_clients == null || contains(matrix.config.allowed_clients, matrix.client) }}
        uses: ./.github/actions/run-and-record-tests
        env:
          OPENAI_API_KEY: dummy
          RUN_CLIENT_TS_TESTS: ${{ matrix.client == 'server' && '1' || '0' }}
        with:
          stack-config: >-
            ${{ matrix.config.stack_config
                || (matrix.client == 'library' && 'ci-tests')
                || (matrix.client == 'server' && 'server:ci-tests')
                || 'docker:ci-tests' }}
          setup: ${{ matrix.config.setup }}
          inference-mode: 'replay'
          suite: ${{ matrix.config.suite }}