From 0b08d64ddbb4b4a91da0cd5f1a63e69085028d22 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Thu, 31 Jul 2025 17:30:47 -0700 Subject: [PATCH] feat(ci): introduce workflow for re-recording inference outputs (#3002) --- .github/workflows/README.md | 1 + .../workflows/record-integration-tests.yml | 109 ++++++++++ .../recordings/vision/index.sqlite | Bin 12288 -> 12288 bytes .../vision/responses/4a3a4447b16b.json | 2 +- .../vision/responses/f1592dee71e5.json | 10 +- .../vision/responses/ff7db0102b28.json | 192 +++++++++--------- 6 files changed, 212 insertions(+), 102 deletions(-) create mode 100644 .github/workflows/record-integration-tests.yml diff --git a/.github/workflows/README.md b/.github/workflows/README.md index ef591a51d..3c3d93dc2 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -13,6 +13,7 @@ Llama Stack uses GitHub Actions for Continuous Integration (CI). Below is a tabl | Pre-commit | [pre-commit.yml](pre-commit.yml) | Run pre-commit checks | | Test Llama Stack Build | [providers-build.yml](providers-build.yml) | Test llama stack build | | Python Package Build Test | [python-build-test.yml](python-build-test.yml) | Test building the llama-stack PyPI project | +| Integration Tests (Record) | [record-integration-tests.yml](record-integration-tests.yml) | Run the integration test suite from tests/integration | | Check semantic PR titles | [semantic-pr.yml](semantic-pr.yml) | Ensure that PR titles follow the conventional commit spec | | Close stale issues and PRs | [stale_bot.yml](stale_bot.yml) | Run the Stale Bot action | | Test External Providers Installed via Module | [test-external-provider-module.yml](test-external-provider-module.yml) | Test External Provider installation via Python module | diff --git a/.github/workflows/record-integration-tests.yml b/.github/workflows/record-integration-tests.yml new file mode 100644 index 000000000..12957db27 --- /dev/null +++ b/.github/workflows/record-integration-tests.yml @@ -0,0 +1,109 @@ +name: Integration Tests (Record) + +run-name: Run the integration test suite from tests/integration + +on: + pull_request: + branches: [ main ] + types: [opened, synchronize, labeled] + paths: + - 'llama_stack/**' + - 'tests/**' + - 'uv.lock' + - 'pyproject.toml' + - '.github/workflows/record-integration-tests.yml' # This workflow + - '.github/actions/setup-ollama/action.yml' + - '.github/actions/setup-test-environment/action.yml' + - '.github/actions/run-and-record-tests/action.yml' + workflow_dispatch: + inputs: + test-provider: + description: 'Test against a specific provider' + type: string + default: 'ollama' + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + discover-tests: + if: contains(github.event.pull_request.labels.*.name, 're-record-tests') || + contains(github.event.pull_request.labels.*.name, 're-record-vision-tests') + runs-on: ubuntu-latest + outputs: + test-types: ${{ steps.generate-test-types.outputs.test-types }} + matrix-modes: ${{ steps.generate-test-types.outputs.matrix-modes }} + + steps: + - name: Checkout repository + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + + - name: Generate test types + id: generate-test-types + run: | + # Get test directories dynamically, excluding non-test directories + TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d -printf "%f\n" | + grep -Ev "^(__pycache__|fixtures|test_cases|recordings|post_training)$" | + sort | jq -R -s -c 'split("\n")[:-1]') + echo "test-types=$TEST_TYPES" >> $GITHUB_OUTPUT + + labels=$(gh pr view ${{ github.event.pull_request.number }} --json labels --jq '.labels[].name') + echo "labels=$labels" + + modes_array=() + if [[ $labels == *"re-record-vision-tests"* ]]; then + modes_array+=("vision") + fi + if [[ $labels == *"re-record-tests"* ]]; then + modes_array+=("non-vision") + fi + + # Convert to JSON array + if [ ${#modes_array[@]} -eq 0 ]; then + matrix_modes="[]" + else + matrix_modes=$(printf '%s\n' "${modes_array[@]}" | jq -R -s -c 'split("\n")[:-1]') + fi + echo "matrix_modes=$matrix_modes" + echo "matrix-modes=$matrix_modes" >> $GITHUB_OUTPUT + + env: + GH_TOKEN: ${{ github.token }} + + record-tests: + needs: discover-tests + runs-on: ubuntu-latest + + permissions: + contents: write + + strategy: + fail-fast: false + matrix: + mode: ${{ fromJSON(needs.discover-tests.outputs.matrix-modes) }} + + steps: + - name: Checkout repository + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + ref: ${{ github.event.pull_request.head.ref }} + fetch-depth: 0 + + - name: Setup test environment + uses: ./.github/actions/setup-test-environment + with: + python-version: "3.12" # Use single Python version for recording + client-version: "latest" + provider: ${{ inputs.test-provider || 'ollama' }} + run-vision-tests: ${{ matrix.mode == 'vision' && 'true' || 'false' }} + inference-mode: 'record' + + - name: Run and record tests + uses: ./.github/actions/run-and-record-tests + with: + test-types: ${{ needs.discover-tests.outputs.test-types }} + stack-config: 'server:ci-tests' # recording must be done with server since more tests are run + provider: ${{ inputs.test-provider || 'ollama' }} + inference-mode: 'record' + run-vision-tests: ${{ matrix.mode == 'vision' && 'true' || 'false' }} diff --git a/tests/integration/recordings/vision/index.sqlite b/tests/integration/recordings/vision/index.sqlite index a2df92cfbe913ed93e0316b8598c48e91699b8c2..6ff587c4321e7260f5a0b22893014468fe04a0ae 100644 GIT binary patch delta 169 zcmZojXh@hK%_u)n#+gxmV}hnEqwM4^*=hj`T?0b}0|P4qQ!690Mh>~jqH@*za6vOG zL!(mh$@ArMxr~ghOiis!3`!*@i^(In#Zr@d<+G)rvc_Bt3=FJ%ix~Jz_;>N^@x9<% yv{_NXhHqj+$mCc(S2kH!mPTI2$+B_|jB=A->bWvXP1e_UWfO-8%gH-1N&*1Uw