From bce479ee47222465b1aa979e07c5691b17007695 Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Fri, 5 Sep 2025 13:40:52 -0700
Subject: [PATCH] clean up, add some docs

---
 .github/actions/run-and-record-tests/action.yml | 15 ++++++++-------
 .github/workflows/integration-tests.yml         | 12 +-----------
 .github/workflows/record-integration-tests.yml  | 16 ++++++++--------
 scripts/integration-tests.sh                    |  4 ++++
 tests/integration/README.md                     |  4 ++--
 5 files changed, 23 insertions(+), 28 deletions(-)

diff --git a/.github/actions/run-and-record-tests/action.yml b/.github/actions/run-and-record-tests/action.yml
index 9e381d8b9..7f028b104 100644
--- a/.github/actions/run-and-record-tests/action.yml
+++ b/.github/actions/run-and-record-tests/action.yml
@@ -2,13 +2,6 @@ name: 'Run and Record Tests'
 description: 'Run integration tests and handle recording/artifact upload'
 
 inputs:
-  test-subdirs:
-    description: 'Comma-separated list of test subdirectories to run'
-    required: true
-  test-pattern:
-    description: 'Regex pattern to pass to pytest -k'
-    required: false
-    default: ''
   stack-config:
     description: 'Stack configuration to use'
     required: true
@@ -22,6 +15,14 @@ inputs:
     description: 'Test suite to use: base, responses, vision, etc.'
     required: false
     default: ''
+  test-subdirs:
+    description: 'Comma-separated list of test subdirectories to run; overrides test-suite'
+    required: false
+    default: ''
+  test-pattern:
+    description: 'Regex pattern to pass to pytest -k'
+    required: false
+    default: ''
 
 runs:
   using: 'composite'
diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml
index 4167393eb..bb53eea2f 100644
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@@ -1,6 +1,6 @@
 name: Integration Tests (Replay)
 
-run-name: Run the integration test suite from tests/integration in replay mode
+run-name: Run the integration test suites from tests/integration in replay mode
 
 on:
   push:
@@ -32,14 +32,6 @@ on:
         description: 'Test against a specific provider'
         type: string
         default: 'ollama'
-      test-subdirs:
-        description: 'Comma-separated list of test subdirectories to run'
-        type: string
-        default: ''
-      test-pattern:
-        description: 'Regex pattern to pass to pytest -k'
-        type: string
-        default: ''
 
 concurrency:
   # Skip concurrency for pushes to main - each commit should be tested independently
@@ -79,8 +71,6 @@ jobs:
       - name: Run tests
         uses: ./.github/actions/run-and-record-tests
         with:
-          test-subdirs: ${{ inputs.test-subdirs }}
-          test-pattern: ${{ inputs.test-pattern }}
           stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
           provider: ${{ matrix.provider }}
           inference-mode: 'replay'
diff --git a/.github/workflows/record-integration-tests.yml b/.github/workflows/record-integration-tests.yml
index a35ac0e3a..01797a54b 100644
--- a/.github/workflows/record-integration-tests.yml
+++ b/.github/workflows/record-integration-tests.yml
@@ -10,10 +10,6 @@ run-name: Run the integration test suite from tests/integration
 on:
   workflow_dispatch:
     inputs:
-      test-subdirs:
-        description: 'Comma-separated list of test subdirectories to run'
-        type: string
-        default: ''
       test-provider:
         description: 'Test against a specific provider'
         type: string
@@ -22,6 +18,10 @@ on:
         description: 'Test suite to use: base, responses, vision, etc.'
         type: string
         default: ''
+      test-subdirs:
+        description: 'Comma-separated list of test subdirectories to run; overrides test-suite'
+        type: string
+        default: ''
       test-pattern:
         description: 'Regex pattern to pass to pytest -k'
         type: string
@@ -38,11 +38,11 @@ jobs:
       - name: Echo workflow inputs
         run: |
           echo "::group::Workflow Inputs"
-          echo "test-subdirs: ${{ inputs.test-subdirs }}"
+          echo "branch: ${{ github.ref_name }}"
           echo "test-provider: ${{ inputs.test-provider }}"
           echo "test-suite: ${{ inputs.test-suite }}"
+          echo "test-subdirs: ${{ inputs.test-subdirs }}"
           echo "test-pattern: ${{ inputs.test-pattern }}"
-          echo "branch: ${{ github.ref_name }}"
           echo "::endgroup::"
 
       - name: Checkout repository
@@ -62,9 +62,9 @@ jobs:
       - name: Run and record tests
         uses: ./.github/actions/run-and-record-tests
         with:
-          test-pattern: ${{ inputs.test-pattern }}
-          test-subdirs: ${{ inputs.test-subdirs }}
           stack-config: 'server:ci-tests'  # recording must be done with server since more tests are run
           provider: ${{ inputs.test-provider || 'ollama' }}
           inference-mode: 'record'
           test-suite: ${{ inputs.test-suite }}
+          test-subdirs: ${{ inputs.test-subdirs }}
+          test-pattern: ${{ inputs.test-pattern }}
diff --git a/scripts/integration-tests.sh b/scripts/integration-tests.sh
index 07a5be394..ab7e37579 100755
--- a/scripts/integration-tests.sh
+++ b/scripts/integration-tests.sh
@@ -34,6 +34,10 @@ Options:
     --test-pattern STRING    Regex pattern to pass to pytest -k
     --help                   Show this help message
 
+Suites are defined in tests/integration/suites.py. They are used to narrow the collection of tests and provide default model options.
+
+You can also specify subdirectories (of tests/integration) to select tests from, which will override the suite.
+
 Examples:
     # Basic inference tests with ollama
     $0 --stack-config server:ci-tests --provider ollama
diff --git a/tests/integration/README.md b/tests/integration/README.md
index 912d0d438..b05beeb98 100644
--- a/tests/integration/README.md
+++ b/tests/integration/README.md
@@ -44,9 +44,9 @@ if no model is specified.
 
 ### Suites (fast selection + sane defaults)
 
-- `--suite`: comma-separated list of named suites that both narrow which tests are collected and prefill common model options (unless you pass them explicitly). This keeps runs fast and convenient.
+- `--suite`: comma-separated list of named suites that both narrow which tests are collected and prefill common model options (unless you pass them explicitly).
 - Available suites:
-  - `responses`: collects tests under `tests/integration/responses`; defaults `--text-model=ollama/llama3.2:3b-instruct-fp16`, `--embedding-model=sentence-transformers/all-MiniLM-L6-v2`.
+  - `responses`: collects tests under `tests/integration/responses`; this is a separate suite because it needs a strong tool-calling model.
   - `vision`: collects only `tests/integration/inference/test_vision_inference.py`; defaults `--vision-model=ollama/llama3.2-vision:11b`, `--embedding-model=sentence-transformers/all-MiniLM-L6-v2`.
 - Explicit flags always win. For example, `--suite=responses --text-model=<X>` overrides the suite’s text model.