mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-16 14:38:00 +00:00
feat(ci): make recording workflow simpler, more parameterizable (#3169)
Some checks failed
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 1s
Test External Providers Installed via Module / test-external-providers-from-module (venv) (push) Has been skipped
Python Package Build Test / build (3.13) (push) Failing after 4s
Integration Tests (Replay) / Integration Tests (, , , client=, vision=) (push) Failing after 7s
Python Package Build Test / build (3.12) (push) Failing after 12s
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 14s
Update ReadTheDocs / update-readthedocs (push) Failing after 12s
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 17s
Test External API and Providers / test-external (venv) (push) Failing after 15s
Vector IO Integration Tests / test-matrix (push) Failing after 28s
Unit Tests / unit-tests (3.12) (push) Failing after 27s
Unit Tests / unit-tests (3.13) (push) Failing after 51s
Pre-commit / pre-commit (push) Successful in 2m6s
Some checks failed
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 1s
Test External Providers Installed via Module / test-external-providers-from-module (venv) (push) Has been skipped
Python Package Build Test / build (3.13) (push) Failing after 4s
Integration Tests (Replay) / Integration Tests (, , , client=, vision=) (push) Failing after 7s
Python Package Build Test / build (3.12) (push) Failing after 12s
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 14s
Update ReadTheDocs / update-readthedocs (push) Failing after 12s
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 17s
Test External API and Providers / test-external (venv) (push) Failing after 15s
Vector IO Integration Tests / test-matrix (push) Failing after 28s
Unit Tests / unit-tests (3.12) (push) Failing after 27s
Unit Tests / unit-tests (3.13) (push) Failing after 51s
Pre-commit / pre-commit (push) Successful in 2m6s
# What does this PR do? Recording tests has become a nightmare. This is the first part of making that process simpler by making it _less_ automatic. I tried to be too clever earlier. It simplifies the record-integration-tests workflow to use workflow dispatch inputs instead of PR labels. No more opaque stuff. Just go to the GitHub UI and run the workflow with inputs. I will soon add a helper script for this also. Other things to aid re-running just the small set of things you need to re-record: - Replaces the `test-types` JSON array parameter with a more intuitive `test-subdirs` comma-separated list. The whole JSON array crap was for matrix. - Adds a new `test-pattern` parameter to allow filtering tests using pytest's `-k` option ## Test Plan Note that this PR is in a fork not the source repository. - Replay tests on this PR are green - Manually [ran](1699856292
) the replay workflow with a test-subdir and test-pattern filter, worked - Manually [ran](4819508034
) the **record** workflow with a simple pattern, it has worked and updated _this_ PR. --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
This commit is contained in:
parent
a6e2c18909
commit
0e8bb94bf3
7 changed files with 119 additions and 221 deletions
20
.github/actions/run-and-record-tests/action.yml
vendored
20
.github/actions/run-and-record-tests/action.yml
vendored
|
@ -2,9 +2,13 @@ name: 'Run and Record Tests'
|
||||||
description: 'Run integration tests and handle recording/artifact upload'
|
description: 'Run integration tests and handle recording/artifact upload'
|
||||||
|
|
||||||
inputs:
|
inputs:
|
||||||
test-types:
|
test-subdirs:
|
||||||
description: 'JSON array of test types to run'
|
description: 'Comma-separated list of test subdirectories to run'
|
||||||
required: true
|
required: true
|
||||||
|
test-pattern:
|
||||||
|
description: 'Regex pattern to pass to pytest -k'
|
||||||
|
required: false
|
||||||
|
default: ''
|
||||||
stack-config:
|
stack-config:
|
||||||
description: 'Stack configuration to use'
|
description: 'Stack configuration to use'
|
||||||
required: true
|
required: true
|
||||||
|
@ -35,9 +39,11 @@ runs:
|
||||||
./scripts/integration-tests.sh \
|
./scripts/integration-tests.sh \
|
||||||
--stack-config '${{ inputs.stack-config }}' \
|
--stack-config '${{ inputs.stack-config }}' \
|
||||||
--provider '${{ inputs.provider }}' \
|
--provider '${{ inputs.provider }}' \
|
||||||
--test-types '${{ inputs.test-types }}' \
|
--test-subdirs '${{ inputs.test-subdirs }}' \
|
||||||
|
--test-pattern '${{ inputs.test-pattern }}' \
|
||||||
--inference-mode '${{ inputs.inference-mode }}' \
|
--inference-mode '${{ inputs.inference-mode }}' \
|
||||||
${{ inputs.run-vision-tests == 'true' && '--run-vision-tests' || '' }}
|
${{ inputs.run-vision-tests == 'true' && '--run-vision-tests' || '' }} \
|
||||||
|
| tee pytest-${{ inputs.inference-mode }}.log
|
||||||
|
|
||||||
|
|
||||||
- name: Commit and push recordings
|
- name: Commit and push recordings
|
||||||
|
@ -57,10 +63,10 @@ runs:
|
||||||
git commit -m "Recordings update from CI"
|
git commit -m "Recordings update from CI"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
git fetch origin ${{ github.event.pull_request.head.ref }}
|
git fetch origin ${{ github.ref_name }}
|
||||||
git rebase origin/${{ github.event.pull_request.head.ref }}
|
git rebase origin/${{ github.ref_name }}
|
||||||
echo "Rebased successfully"
|
echo "Rebased successfully"
|
||||||
git push origin HEAD:${{ github.event.pull_request.head.ref }}
|
git push origin HEAD:${{ github.ref_name }}
|
||||||
echo "Pushed successfully"
|
echo "Pushed successfully"
|
||||||
else
|
else
|
||||||
echo "No recording changes"
|
echo "No recording changes"
|
||||||
|
|
31
.github/workflows/integration-tests.yml
vendored
31
.github/workflows/integration-tests.yml
vendored
|
@ -31,6 +31,14 @@ on:
|
||||||
description: 'Test against a specific provider'
|
description: 'Test against a specific provider'
|
||||||
type: string
|
type: string
|
||||||
default: 'ollama'
|
default: 'ollama'
|
||||||
|
test-subdirs:
|
||||||
|
description: 'Comma-separated list of test subdirectories to run'
|
||||||
|
type: string
|
||||||
|
default: ''
|
||||||
|
test-pattern:
|
||||||
|
description: 'Regex pattern to pass to pytest -k'
|
||||||
|
type: string
|
||||||
|
default: ''
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
# Skip concurrency for pushes to main - each commit should be tested independently
|
# Skip concurrency for pushes to main - each commit should be tested independently
|
||||||
|
@ -38,28 +46,8 @@ concurrency:
|
||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
discover-tests:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
outputs:
|
|
||||||
test-types: ${{ steps.generate-test-types.outputs.test-types }}
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
||||||
|
|
||||||
- name: Generate test types
|
|
||||||
id: generate-test-types
|
|
||||||
run: |
|
|
||||||
# Get test directories dynamically, excluding non-test directories
|
|
||||||
# NOTE: we are excluding post_training since the tests take too long
|
|
||||||
TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d |
|
|
||||||
sed 's|tests/integration/||' |
|
|
||||||
grep -Ev "^(__pycache__|fixtures|test_cases|recordings|non_ci|post_training)$" |
|
|
||||||
sort | jq -R -s -c 'split("\n")[:-1]')
|
|
||||||
echo "test-types=$TEST_TYPES" >> $GITHUB_OUTPUT
|
|
||||||
|
|
||||||
run-replay-mode-tests:
|
run-replay-mode-tests:
|
||||||
needs: discover-tests
|
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, vision={4})', matrix.client-type, matrix.provider, matrix.python-version, matrix.client-version, matrix.run-vision-tests) }}
|
name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, vision={4})', matrix.client-type, matrix.provider, matrix.python-version, matrix.client-version, matrix.run-vision-tests) }}
|
||||||
|
|
||||||
|
@ -90,7 +78,8 @@ jobs:
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
uses: ./.github/actions/run-and-record-tests
|
uses: ./.github/actions/run-and-record-tests
|
||||||
with:
|
with:
|
||||||
test-types: ${{ needs.discover-tests.outputs.test-types }}
|
test-subdirs: ${{ inputs.test-subdirs }}
|
||||||
|
test-pattern: ${{ inputs.test-pattern }}
|
||||||
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
|
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
|
||||||
provider: ${{ matrix.provider }}
|
provider: ${{ matrix.provider }}
|
||||||
inference-mode: 'replay'
|
inference-mode: 'replay'
|
||||||
|
|
91
.github/workflows/record-integration-tests.yml
vendored
91
.github/workflows/record-integration-tests.yml
vendored
|
@ -1,93 +1,43 @@
|
||||||
|
# This workflow should be run manually when needing to re-record tests. This happens when you have
|
||||||
|
# - added a new test
|
||||||
|
# - or changed an existing test such that a new inference call is made
|
||||||
|
# You should make a PR and then run this workflow on that PR branch. The workflow will re-record the
|
||||||
|
# tests and commit the recordings to the PR branch.
|
||||||
name: Integration Tests (Record)
|
name: Integration Tests (Record)
|
||||||
|
|
||||||
run-name: Run the integration test suite from tests/integration
|
run-name: Run the integration test suite from tests/integration
|
||||||
|
|
||||||
on:
|
on:
|
||||||
pull_request_target:
|
|
||||||
branches: [ main ]
|
|
||||||
types: [opened, synchronize, labeled]
|
|
||||||
paths:
|
|
||||||
- 'llama_stack/**'
|
|
||||||
- 'tests/**'
|
|
||||||
- 'uv.lock'
|
|
||||||
- 'pyproject.toml'
|
|
||||||
- '.github/workflows/record-integration-tests.yml' # This workflow
|
|
||||||
- '.github/actions/setup-ollama/action.yml'
|
|
||||||
- '.github/actions/setup-test-environment/action.yml'
|
|
||||||
- '.github/actions/run-and-record-tests/action.yml'
|
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
inputs:
|
inputs:
|
||||||
|
test-subdirs:
|
||||||
|
description: 'Comma-separated list of test subdirectories to run'
|
||||||
|
type: string
|
||||||
|
default: ''
|
||||||
test-provider:
|
test-provider:
|
||||||
description: 'Test against a specific provider'
|
description: 'Test against a specific provider'
|
||||||
type: string
|
type: string
|
||||||
default: 'ollama'
|
default: 'ollama'
|
||||||
|
run-vision-tests:
|
||||||
concurrency:
|
description: 'Whether to run vision tests'
|
||||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number }}
|
type: boolean
|
||||||
cancel-in-progress: true
|
default: false
|
||||||
|
test-pattern:
|
||||||
|
description: 'Regex pattern to pass to pytest -k'
|
||||||
|
type: string
|
||||||
|
default: ''
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
discover-tests:
|
|
||||||
if: contains(github.event.pull_request.labels.*.name, 're-record-tests') ||
|
|
||||||
contains(github.event.pull_request.labels.*.name, 're-record-vision-tests')
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
outputs:
|
|
||||||
test-types: ${{ steps.generate-test-types.outputs.test-types }}
|
|
||||||
matrix-modes: ${{ steps.generate-test-types.outputs.matrix-modes }}
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
||||||
|
|
||||||
- name: Generate test types
|
|
||||||
id: generate-test-types
|
|
||||||
run: |
|
|
||||||
# Get test directories dynamically, excluding non-test directories
|
|
||||||
TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d -printf "%f\n" |
|
|
||||||
grep -Ev "^(__pycache__|fixtures|test_cases|recordings|post_training)$" |
|
|
||||||
sort | jq -R -s -c 'split("\n")[:-1]')
|
|
||||||
echo "test-types=$TEST_TYPES" >> $GITHUB_OUTPUT
|
|
||||||
|
|
||||||
labels=$(gh pr view ${{ github.event.pull_request.number }} --json labels --jq '.labels[].name')
|
|
||||||
echo "labels=$labels"
|
|
||||||
|
|
||||||
modes_array=()
|
|
||||||
if [[ $labels == *"re-record-vision-tests"* ]]; then
|
|
||||||
modes_array+=("vision")
|
|
||||||
fi
|
|
||||||
if [[ $labels == *"re-record-tests"* ]]; then
|
|
||||||
modes_array+=("non-vision")
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Convert to JSON array
|
|
||||||
if [ ${#modes_array[@]} -eq 0 ]; then
|
|
||||||
matrix_modes="[]"
|
|
||||||
else
|
|
||||||
matrix_modes=$(printf '%s\n' "${modes_array[@]}" | jq -R -s -c 'split("\n")[:-1]')
|
|
||||||
fi
|
|
||||||
echo "matrix_modes=$matrix_modes"
|
|
||||||
echo "matrix-modes=$matrix_modes" >> $GITHUB_OUTPUT
|
|
||||||
|
|
||||||
env:
|
|
||||||
GH_TOKEN: ${{ github.token }}
|
|
||||||
|
|
||||||
record-tests:
|
record-tests:
|
||||||
needs: discover-tests
|
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
permissions:
|
permissions:
|
||||||
contents: write
|
contents: write
|
||||||
|
|
||||||
strategy:
|
|
||||||
fail-fast: false
|
|
||||||
matrix:
|
|
||||||
mode: ${{ fromJSON(needs.discover-tests.outputs.matrix-modes) }}
|
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||||
with:
|
with:
|
||||||
ref: ${{ github.event.pull_request.head.ref }}
|
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
|
|
||||||
- name: Setup test environment
|
- name: Setup test environment
|
||||||
|
@ -96,14 +46,15 @@ jobs:
|
||||||
python-version: "3.12" # Use single Python version for recording
|
python-version: "3.12" # Use single Python version for recording
|
||||||
client-version: "latest"
|
client-version: "latest"
|
||||||
provider: ${{ inputs.test-provider || 'ollama' }}
|
provider: ${{ inputs.test-provider || 'ollama' }}
|
||||||
run-vision-tests: ${{ matrix.mode == 'vision' && 'true' || 'false' }}
|
run-vision-tests: ${{ inputs.run-vision-tests }}
|
||||||
inference-mode: 'record'
|
inference-mode: 'record'
|
||||||
|
|
||||||
- name: Run and record tests
|
- name: Run and record tests
|
||||||
uses: ./.github/actions/run-and-record-tests
|
uses: ./.github/actions/run-and-record-tests
|
||||||
with:
|
with:
|
||||||
test-types: ${{ needs.discover-tests.outputs.test-types }}
|
test-pattern: ${{ inputs.test-pattern }}
|
||||||
|
test-subdirs: ${{ inputs.test-subdirs }}
|
||||||
stack-config: 'server:ci-tests' # recording must be done with server since more tests are run
|
stack-config: 'server:ci-tests' # recording must be done with server since more tests are run
|
||||||
provider: ${{ inputs.test-provider || 'ollama' }}
|
provider: ${{ inputs.test-provider || 'ollama' }}
|
||||||
inference-mode: 'record'
|
inference-mode: 'record'
|
||||||
run-vision-tests: ${{ matrix.mode == 'vision' && 'true' || 'false' }}
|
run-vision-tests: ${{ inputs.run-vision-tests }}
|
||||||
|
|
|
@ -14,7 +14,8 @@ set -euo pipefail
|
||||||
# Default values
|
# Default values
|
||||||
STACK_CONFIG=""
|
STACK_CONFIG=""
|
||||||
PROVIDER=""
|
PROVIDER=""
|
||||||
TEST_TYPES='["inference"]'
|
TEST_SUBDIRS=""
|
||||||
|
TEST_PATTERN=""
|
||||||
RUN_VISION_TESTS="false"
|
RUN_VISION_TESTS="false"
|
||||||
INFERENCE_MODE="replay"
|
INFERENCE_MODE="replay"
|
||||||
EXTRA_PARAMS=""
|
EXTRA_PARAMS=""
|
||||||
|
@ -27,23 +28,24 @@ Usage: $0 [OPTIONS]
|
||||||
Options:
|
Options:
|
||||||
--stack-config STRING Stack configuration to use (required)
|
--stack-config STRING Stack configuration to use (required)
|
||||||
--provider STRING Provider to use (ollama, vllm, etc.) (required)
|
--provider STRING Provider to use (ollama, vllm, etc.) (required)
|
||||||
--test-types JSON JSON array of test types to run (default: '["inference"]')
|
--test-subdirs STRING Comma-separated list of test subdirectories to run (default: 'inference')
|
||||||
--run-vision-tests Run vision tests instead of regular tests
|
--run-vision-tests Run vision tests instead of regular tests
|
||||||
--inference-mode STRING Inference mode: record or replay (default: replay)
|
--inference-mode STRING Inference mode: record or replay (default: replay)
|
||||||
|
--test-pattern STRING Regex pattern to pass to pytest -k
|
||||||
--help Show this help message
|
--help Show this help message
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
# Basic inference tests with ollama
|
# Basic inference tests with ollama
|
||||||
$0 --stack-config server:ollama --provider ollama
|
$0 --stack-config server:ci-tests --provider ollama
|
||||||
|
|
||||||
# Multiple test types with vllm
|
# Multiple test directories with vllm
|
||||||
$0 --stack-config server:vllm --provider vllm --test-types '["inference", "agents"]'
|
$0 --stack-config server:ci-tests --provider vllm --test-subdirs 'inference,agents'
|
||||||
|
|
||||||
# Vision tests with ollama
|
# Vision tests with ollama
|
||||||
$0 --stack-config server:ollama --provider ollama --run-vision-tests
|
$0 --stack-config server:ci-tests --provider ollama --run-vision-tests
|
||||||
|
|
||||||
# Record mode for updating test recordings
|
# Record mode for updating test recordings
|
||||||
$0 --stack-config server:ollama --provider ollama --inference-mode record
|
$0 --stack-config server:ci-tests --provider ollama --inference-mode record
|
||||||
EOF
|
EOF
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -58,8 +60,8 @@ while [[ $# -gt 0 ]]; do
|
||||||
PROVIDER="$2"
|
PROVIDER="$2"
|
||||||
shift 2
|
shift 2
|
||||||
;;
|
;;
|
||||||
--test-types)
|
--test-subdirs)
|
||||||
TEST_TYPES="$2"
|
TEST_SUBDIRS="$2"
|
||||||
shift 2
|
shift 2
|
||||||
;;
|
;;
|
||||||
--run-vision-tests)
|
--run-vision-tests)
|
||||||
|
@ -70,6 +72,10 @@ while [[ $# -gt 0 ]]; do
|
||||||
INFERENCE_MODE="$2"
|
INFERENCE_MODE="$2"
|
||||||
shift 2
|
shift 2
|
||||||
;;
|
;;
|
||||||
|
--test-pattern)
|
||||||
|
TEST_PATTERN="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
--help)
|
--help)
|
||||||
usage
|
usage
|
||||||
exit 0
|
exit 0
|
||||||
|
@ -99,9 +105,10 @@ fi
|
||||||
echo "=== Llama Stack Integration Test Runner ==="
|
echo "=== Llama Stack Integration Test Runner ==="
|
||||||
echo "Stack Config: $STACK_CONFIG"
|
echo "Stack Config: $STACK_CONFIG"
|
||||||
echo "Provider: $PROVIDER"
|
echo "Provider: $PROVIDER"
|
||||||
echo "Test Types: $TEST_TYPES"
|
echo "Test Subdirs: $TEST_SUBDIRS"
|
||||||
echo "Vision Tests: $RUN_VISION_TESTS"
|
echo "Vision Tests: $RUN_VISION_TESTS"
|
||||||
echo "Inference Mode: $INFERENCE_MODE"
|
echo "Inference Mode: $INFERENCE_MODE"
|
||||||
|
echo "Test Pattern: $TEST_PATTERN"
|
||||||
echo ""
|
echo ""
|
||||||
|
|
||||||
# Check storage and memory before tests
|
# Check storage and memory before tests
|
||||||
|
@ -164,17 +171,29 @@ if [[ "$PROVIDER" == "vllm" ]]; then
|
||||||
EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls"
|
EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
PYTEST_PATTERN="not( $EXCLUDE_TESTS )"
|
||||||
|
if [[ -n "$TEST_PATTERN" ]]; then
|
||||||
|
PYTEST_PATTERN="${PYTEST_PATTERN} and $TEST_PATTERN"
|
||||||
|
fi
|
||||||
|
|
||||||
# Run vision tests if specified
|
# Run vision tests if specified
|
||||||
if [[ "$RUN_VISION_TESTS" == "true" ]]; then
|
if [[ "$RUN_VISION_TESTS" == "true" ]]; then
|
||||||
echo "Running vision tests..."
|
echo "Running vision tests..."
|
||||||
if uv run pytest -s -v tests/integration/inference/test_vision_inference.py \
|
set +e
|
||||||
|
uv run pytest -s -v tests/integration/inference/test_vision_inference.py \
|
||||||
--stack-config="$STACK_CONFIG" \
|
--stack-config="$STACK_CONFIG" \
|
||||||
-k "not( $EXCLUDE_TESTS )" \
|
-k "$PYTEST_PATTERN" \
|
||||||
--vision-model=ollama/llama3.2-vision:11b \
|
--vision-model=ollama/llama3.2-vision:11b \
|
||||||
--embedding-model=sentence-transformers/all-MiniLM-L6-v2 \
|
--embedding-model=sentence-transformers/all-MiniLM-L6-v2 \
|
||||||
--color=yes $EXTRA_PARAMS \
|
--color=yes $EXTRA_PARAMS \
|
||||||
--capture=tee-sys | tee pytest-${INFERENCE_MODE}-vision.log; then
|
--capture=tee-sys
|
||||||
|
exit_code=$?
|
||||||
|
set -e
|
||||||
|
|
||||||
|
if [ $exit_code -eq 0 ]; then
|
||||||
echo "✅ Vision tests completed successfully"
|
echo "✅ Vision tests completed successfully"
|
||||||
|
elif [ $exit_code -eq 5 ]; then
|
||||||
|
echo "⚠️ No vision tests collected (pattern matched no tests)"
|
||||||
else
|
else
|
||||||
echo "❌ Vision tests failed"
|
echo "❌ Vision tests failed"
|
||||||
exit 1
|
exit 1
|
||||||
|
@ -183,28 +202,34 @@ if [[ "$RUN_VISION_TESTS" == "true" ]]; then
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Run regular tests
|
# Run regular tests
|
||||||
echo "Test types to run: $TEST_TYPES"
|
if [[ -z "$TEST_SUBDIRS" ]]; then
|
||||||
|
TEST_SUBDIRS=$(find tests/integration -maxdepth 1 -mindepth 1 -type d |
|
||||||
|
sed 's|tests/integration/||' |
|
||||||
|
grep -Ev "^(__pycache__|fixtures|test_cases|recordings|non_ci|post_training)$" |
|
||||||
|
sort)
|
||||||
|
fi
|
||||||
|
echo "Test subdirs to run: $TEST_SUBDIRS"
|
||||||
|
|
||||||
# Collect all test files for the specified test types
|
# Collect all test files for the specified test types
|
||||||
TEST_FILES=""
|
TEST_FILES=""
|
||||||
for test_type in $(echo "$TEST_TYPES" | jq -r '.[]'); do
|
for test_subdir in $(echo "$TEST_SUBDIRS" | tr ',' '\n'); do
|
||||||
# Skip certain test types for vllm provider
|
# Skip certain test types for vllm provider
|
||||||
if [[ "$PROVIDER" == "vllm" ]]; then
|
if [[ "$PROVIDER" == "vllm" ]]; then
|
||||||
if [[ "$test_type" == "safety" ]] || [[ "$test_type" == "post_training" ]] || [[ "$test_type" == "tool_runtime" ]]; then
|
if [[ "$test_subdir" == "safety" ]] || [[ "$test_subdir" == "post_training" ]] || [[ "$test_subdir" == "tool_runtime" ]]; then
|
||||||
echo "Skipping $test_type for vllm provider"
|
echo "Skipping $test_subdir for vllm provider"
|
||||||
continue
|
continue
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [[ -d "tests/integration/$test_type" ]]; then
|
if [[ -d "tests/integration/$test_subdir" ]]; then
|
||||||
# Find all Python test files in this directory
|
# Find all Python test files in this directory
|
||||||
test_files=$(find tests/integration/$test_type -name "test_*.py" -o -name "*_test.py")
|
test_files=$(find tests/integration/$test_subdir -name "test_*.py" -o -name "*_test.py")
|
||||||
if [[ -n "$test_files" ]]; then
|
if [[ -n "$test_files" ]]; then
|
||||||
TEST_FILES="$TEST_FILES $test_files"
|
TEST_FILES="$TEST_FILES $test_files"
|
||||||
echo "Added test files from $test_type: $(echo $test_files | wc -w) files"
|
echo "Added test files from $test_subdir: $(echo $test_files | wc -w) files"
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
echo "Warning: Directory tests/integration/$test_type does not exist"
|
echo "Warning: Directory tests/integration/$test_subdir does not exist"
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
|
@ -217,14 +242,21 @@ echo ""
|
||||||
echo "=== Running all collected tests in a single pytest command ==="
|
echo "=== Running all collected tests in a single pytest command ==="
|
||||||
echo "Total test files: $(echo $TEST_FILES | wc -w)"
|
echo "Total test files: $(echo $TEST_FILES | wc -w)"
|
||||||
|
|
||||||
if uv run pytest -s -v $TEST_FILES \
|
set +e
|
||||||
|
uv run pytest -s -v $TEST_FILES \
|
||||||
--stack-config="$STACK_CONFIG" \
|
--stack-config="$STACK_CONFIG" \
|
||||||
-k "not( $EXCLUDE_TESTS )" \
|
-k "$PYTEST_PATTERN" \
|
||||||
--text-model="$TEXT_MODEL" \
|
--text-model="$TEXT_MODEL" \
|
||||||
--embedding-model=sentence-transformers/all-MiniLM-L6-v2 \
|
--embedding-model=sentence-transformers/all-MiniLM-L6-v2 \
|
||||||
--color=yes $EXTRA_PARAMS \
|
--color=yes $EXTRA_PARAMS \
|
||||||
--capture=tee-sys | tee pytest-${INFERENCE_MODE}-all.log; then
|
--capture=tee-sys
|
||||||
|
exit_code=$?
|
||||||
|
set -e
|
||||||
|
|
||||||
|
if [ $exit_code -eq 0 ]; then
|
||||||
echo "✅ All tests completed successfully"
|
echo "✅ All tests completed successfully"
|
||||||
|
elif [ $exit_code -eq 5 ]; then
|
||||||
|
echo "⚠️ No tests collected (pattern matched no tests)"
|
||||||
else
|
else
|
||||||
echo "❌ Tests failed"
|
echo "❌ Tests failed"
|
||||||
exit 1
|
exit 1
|
||||||
|
|
Binary file not shown.
|
@ -14,7 +14,7 @@
|
||||||
"models": [
|
"models": [
|
||||||
{
|
{
|
||||||
"model": "nomic-embed-text:latest",
|
"model": "nomic-embed-text:latest",
|
||||||
"modified_at": "2025-08-14T20:26:10.795125-07:00",
|
"modified_at": "2025-08-15T20:24:13.254634Z",
|
||||||
"digest": "0a109f422b47e3a30ba2b10eca18548e944e8a23073ee3f3e947efcf3c45e59f",
|
"digest": "0a109f422b47e3a30ba2b10eca18548e944e8a23073ee3f3e947efcf3c45e59f",
|
||||||
"size": 274302450,
|
"size": 274302450,
|
||||||
"details": {
|
"details": {
|
||||||
|
@ -28,41 +28,9 @@
|
||||||
"quantization_level": "F16"
|
"quantization_level": "F16"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"model": "llama3.2-vision:11b",
|
|
||||||
"modified_at": "2025-07-30T18:45:02.517873-07:00",
|
|
||||||
"digest": "6f2f9757ae97e8a3f8ea33d6adb2b11d93d9a35bef277cd2c0b1b5af8e8d0b1e",
|
|
||||||
"size": 7816589186,
|
|
||||||
"details": {
|
|
||||||
"parent_model": "",
|
|
||||||
"format": "gguf",
|
|
||||||
"family": "mllama",
|
|
||||||
"families": [
|
|
||||||
"mllama"
|
|
||||||
],
|
|
||||||
"parameter_size": "10.7B",
|
|
||||||
"quantization_level": "Q4_K_M"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"model": "llama3.2-vision:latest",
|
|
||||||
"modified_at": "2025-07-29T20:18:47.920468-07:00",
|
|
||||||
"digest": "6f2f9757ae97e8a3f8ea33d6adb2b11d93d9a35bef277cd2c0b1b5af8e8d0b1e",
|
|
||||||
"size": 7816589186,
|
|
||||||
"details": {
|
|
||||||
"parent_model": "",
|
|
||||||
"format": "gguf",
|
|
||||||
"family": "mllama",
|
|
||||||
"families": [
|
|
||||||
"mllama"
|
|
||||||
],
|
|
||||||
"parameter_size": "10.7B",
|
|
||||||
"quantization_level": "Q4_K_M"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"model": "llama-guard3:1b",
|
"model": "llama-guard3:1b",
|
||||||
"modified_at": "2025-07-25T14:39:44.978630-07:00",
|
"modified_at": "2025-07-31T04:44:58Z",
|
||||||
"digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
|
"digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
|
||||||
"size": 1600181919,
|
"size": 1600181919,
|
||||||
"details": {
|
"details": {
|
||||||
|
@ -78,7 +46,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"model": "all-minilm:l6-v2",
|
"model": "all-minilm:l6-v2",
|
||||||
"modified_at": "2025-07-24T15:15:11.129290-07:00",
|
"modified_at": "2025-07-31T04:42:15Z",
|
||||||
"digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
|
"digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
|
||||||
"size": 45960996,
|
"size": 45960996,
|
||||||
"details": {
|
"details": {
|
||||||
|
@ -92,57 +60,9 @@
|
||||||
"quantization_level": "F16"
|
"quantization_level": "F16"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"model": "llama3.2:1b",
|
|
||||||
"modified_at": "2025-07-17T22:02:24.953208-07:00",
|
|
||||||
"digest": "baf6a787fdffd633537aa2eb51cfd54cb93ff08e28040095462bb63daf552878",
|
|
||||||
"size": 1321098329,
|
|
||||||
"details": {
|
|
||||||
"parent_model": "",
|
|
||||||
"format": "gguf",
|
|
||||||
"family": "llama",
|
|
||||||
"families": [
|
|
||||||
"llama"
|
|
||||||
],
|
|
||||||
"parameter_size": "1.2B",
|
|
||||||
"quantization_level": "Q8_0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"model": "all-minilm:latest",
|
|
||||||
"modified_at": "2025-06-03T16:50:10.946583-07:00",
|
|
||||||
"digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
|
|
||||||
"size": 45960996,
|
|
||||||
"details": {
|
|
||||||
"parent_model": "",
|
|
||||||
"format": "gguf",
|
|
||||||
"family": "bert",
|
|
||||||
"families": [
|
|
||||||
"bert"
|
|
||||||
],
|
|
||||||
"parameter_size": "23M",
|
|
||||||
"quantization_level": "F16"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"model": "llama3.2:3b",
|
|
||||||
"modified_at": "2025-05-01T11:15:23.797447-07:00",
|
|
||||||
"digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
|
|
||||||
"size": 2019393189,
|
|
||||||
"details": {
|
|
||||||
"parent_model": "",
|
|
||||||
"format": "gguf",
|
|
||||||
"family": "llama",
|
|
||||||
"families": [
|
|
||||||
"llama"
|
|
||||||
],
|
|
||||||
"parameter_size": "3.2B",
|
|
||||||
"quantization_level": "Q4_K_M"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"modified_at": "2025-04-30T15:33:48.939665-07:00",
|
"modified_at": "2025-07-31T04:42:05Z",
|
||||||
"digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
|
"digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
|
||||||
"size": 6433703586,
|
"size": 6433703586,
|
||||||
"details": {
|
"details": {
|
||||||
|
|
|
@ -21,7 +21,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-04T22:55:14.141947Z",
|
"created_at": "2025-08-15T20:24:49.18651486Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -39,7 +39,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-04T22:55:14.194979Z",
|
"created_at": "2025-08-15T20:24:49.370611348Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -57,7 +57,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-04T22:55:14.248312Z",
|
"created_at": "2025-08-15T20:24:49.557000029Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -75,7 +75,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-04T22:55:14.301911Z",
|
"created_at": "2025-08-15T20:24:49.746777116Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -93,7 +93,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-04T22:55:14.354437Z",
|
"created_at": "2025-08-15T20:24:49.942233333Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -111,7 +111,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-04T22:55:14.406821Z",
|
"created_at": "2025-08-15T20:24:50.126788846Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -129,7 +129,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-04T22:55:14.457633Z",
|
"created_at": "2025-08-15T20:24:50.311346131Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -147,7 +147,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-04T22:55:14.507857Z",
|
"created_at": "2025-08-15T20:24:50.501507173Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -165,7 +165,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-04T22:55:14.558847Z",
|
"created_at": "2025-08-15T20:24:50.692296777Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -183,7 +183,7 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-04T22:55:14.609969Z",
|
"created_at": "2025-08-15T20:24:50.878846539Z",
|
||||||
"done": false,
|
"done": false,
|
||||||
"done_reason": null,
|
"done_reason": null,
|
||||||
"total_duration": null,
|
"total_duration": null,
|
||||||
|
@ -201,15 +201,15 @@
|
||||||
"__type__": "ollama._types.GenerateResponse",
|
"__type__": "ollama._types.GenerateResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"model": "llama3.2:3b-instruct-fp16",
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
"created_at": "2025-08-04T22:55:14.660997Z",
|
"created_at": "2025-08-15T20:24:51.063200561Z",
|
||||||
"done": true,
|
"done": true,
|
||||||
"done_reason": "stop",
|
"done_reason": "stop",
|
||||||
"total_duration": 715356542,
|
"total_duration": 33982453650,
|
||||||
"load_duration": 59747500,
|
"load_duration": 2909001805,
|
||||||
"prompt_eval_count": 341,
|
"prompt_eval_count": 341,
|
||||||
"prompt_eval_duration": 128000000,
|
"prompt_eval_duration": 29194357307,
|
||||||
"eval_count": 11,
|
"eval_count": 11,
|
||||||
"eval_duration": 526000000,
|
"eval_duration": 1878247732,
|
||||||
"response": "",
|
"response": "",
|
||||||
"thinking": null,
|
"thinking": null,
|
||||||
"context": null
|
"context": null
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue