clean up, add some docs

2025-12-17 09:49:46 +00:00 · 2025-09-05 13:40:52 -07:00 · 2025-09-05 13:40:52 -07:00 · bce479ee47
commit bce479ee47
parent b54f10150e
5 changed files with 23 additions and 28 deletions
--- a/.github/actions/run-and-record-tests/action.yml
+++ b/.github/actions/run-and-record-tests/action.yml
@ -2,13 +2,6 @@ name: 'Run and Record Tests'
 description: 'Run integration tests and handle recording/artifact upload'
 inputs:
  test-subdirs:
    description: 'Comma-separated list of test subdirectories to run'
    required: true
  test-pattern:
    description: 'Regex pattern to pass to pytest -k'
    required: false
    default: ''
  stack-config:
    description: 'Stack configuration to use'
    required: true
@ -22,6 +15,14 @@ inputs:
    description: 'Test suite to use: base, responses, vision, etc.'
    required: false
    default: ''
  test-subdirs:
    description: 'Comma-separated list of test subdirectories to run; overrides test-suite'
    required: false
    default: ''
  test-pattern:
    description: 'Regex pattern to pass to pytest -k'
    required: false
    default: ''
 runs:
  using: 'composite'
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@ -1,6 +1,6 @@
 name: Integration Tests (Replay)
-run-name: Run the integration test suite from tests/integration in replay mode
+run-name: Run the integration test suites from tests/integration in replay mode
 on:
  push:
@ -32,14 +32,6 @@ on:
        description: 'Test against a specific provider'
        type: string
        default: 'ollama'
      test-subdirs:
        description: 'Comma-separated list of test subdirectories to run'
        type: string
        default: ''
      test-pattern:
        description: 'Regex pattern to pass to pytest -k'
        type: string
        default: ''
 concurrency:
  # Skip concurrency for pushes to main - each commit should be tested independently
@ -79,8 +71,6 @@ jobs:
      - name: Run tests
        uses: ./.github/actions/run-and-record-tests
        with:
          test-subdirs: ${{ inputs.test-subdirs }}
          test-pattern: ${{ inputs.test-pattern }}
          stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
          provider: ${{ matrix.provider }}
          inference-mode: 'replay'
--- a/.github/workflows/record-integration-tests.yml
+++ b/.github/workflows/record-integration-tests.yml
@ -10,10 +10,6 @@ run-name: Run the integration test suite from tests/integration
 on:
  workflow_dispatch:
    inputs:
      test-subdirs:
        description: 'Comma-separated list of test subdirectories to run'
        type: string
        default: ''
      test-provider:
        description: 'Test against a specific provider'
        type: string
@ -22,6 +18,10 @@ on:
        description: 'Test suite to use: base, responses, vision, etc.'
        type: string
        default: ''
      test-subdirs:
        description: 'Comma-separated list of test subdirectories to run; overrides test-suite'
        type: string
        default: ''
      test-pattern:
        description: 'Regex pattern to pass to pytest -k'
        type: string
@ -38,11 +38,11 @@ jobs:
      - name: Echo workflow inputs
        run: |
          echo "::group::Workflow Inputs"
-          echo "test-subdirs: ${{ inputs.test-subdirs }}"
+          echo "branch: ${{ github.ref_name }}"
          echo "test-provider: ${{ inputs.test-provider }}"
          echo "test-suite: ${{ inputs.test-suite }}"
          echo "test-subdirs: ${{ inputs.test-subdirs }}"
          echo "test-pattern: ${{ inputs.test-pattern }}"
          echo "branch: ${{ github.ref_name }}"
          echo "::endgroup::"
      - name: Checkout repository
@ -62,9 +62,9 @@ jobs:
      - name: Run and record tests
        uses: ./.github/actions/run-and-record-tests
        with:
          test-pattern: ${{ inputs.test-pattern }}
          test-subdirs: ${{ inputs.test-subdirs }}
          stack-config: 'server:ci-tests'  # recording must be done with server since more tests are run
          provider: ${{ inputs.test-provider || 'ollama' }}
          inference-mode: 'record'
          test-suite: ${{ inputs.test-suite }}
          test-subdirs: ${{ inputs.test-subdirs }}
          test-pattern: ${{ inputs.test-pattern }}
--- a/scripts/integration-tests.sh
+++ b/scripts/integration-tests.sh
@ -34,6 +34,10 @@ Options:
    --test-pattern STRING    Regex pattern to pass to pytest -k
    --help                   Show this help message
 Suites are defined in tests/integration/suites.py. They are used to narrow the collection of tests and provide default model options.
 You can also specify subdirectories (of tests/integration) to select tests from, which will override the suite.
 Examples:
    # Basic inference tests with ollama
    $0 --stack-config server:ci-tests --provider ollama
--- a/tests/integration/README.md
+++ b/tests/integration/README.md
@ -44,9 +44,9 @@ if no model is specified.
 ### Suites (fast selection + sane defaults)
- `--suite`: comma-separated list of named suites that both narrow which tests are collected and prefill common model options (unless you pass them explicitly). This keeps runs fast and convenient.
+- `--suite`: comma-separated list of named suites that both narrow which tests are collected and prefill common model options (unless you pass them explicitly).
 - Available suites:
-  - `responses`: collects tests under `tests/integration/responses`; defaults `--text-model=ollama/llama3.2:3b-instruct-fp16`, `--embedding-model=sentence-transformers/all-MiniLM-L6-v2`.
+  - `responses`: collects tests under `tests/integration/responses`; this is a separate suite because it needs a strong tool-calling model.
  - `vision`: collects only `tests/integration/inference/test_vision_inference.py`; defaults `--vision-model=ollama/llama3.2-vision:11b`, `--embedding-model=sentence-transformers/all-MiniLM-L6-v2`.
 - Explicit flags always win. For example, `--suite=responses --text-model=<X>` overrides the suite’s text model.