mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-15 14:08:00 +00:00
feat(ci): add support for running vision inference tests (#2972)
This PR significantly refactors the Integration Tests workflow. The main goal behind the PR was to enable recording of vision tests which were never run as part of our CI ever before. During debugging, I ended up making several other changes refactoring and hopefully increasing the robustness of the workflow. After doing the experiments, I have updated the trigger event to be `pull_request_target` so this workflow can get write permissions by default but it will run with source code from the base (main) branch in the source repository only. If you do change the workflow, you'd need to experiment using the `workflow_dispatch` triggers. This should not be news to anyone using Github Actions (except me!) It is likely to be a little rocky though while I learn more about GitHub Actions, etc. Please be patient :) --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
This commit is contained in:
parent
709c974bd8
commit
27d866795c
108 changed files with 13985 additions and 15254 deletions
198
.github/actions/run-and-record-tests/action.yml
vendored
Normal file
198
.github/actions/run-and-record-tests/action.yml
vendored
Normal file
|
@ -0,0 +1,198 @@
|
|||
name: 'Run and Record Tests'
|
||||
description: 'Run integration tests and handle recording/artifact upload'
|
||||
|
||||
inputs:
|
||||
test-types:
|
||||
description: 'JSON array of test types to run'
|
||||
required: true
|
||||
stack-config:
|
||||
description: 'Stack configuration to use'
|
||||
required: true
|
||||
provider:
|
||||
description: 'Provider to use for tests'
|
||||
required: true
|
||||
inference-mode:
|
||||
description: 'Inference mode (record or replay)'
|
||||
required: true
|
||||
run-vision-tests:
|
||||
description: 'Whether to run vision tests'
|
||||
required: false
|
||||
default: 'false'
|
||||
|
||||
runs:
|
||||
using: 'composite'
|
||||
steps:
|
||||
- name: Check Storage and Memory Available Before Tests
|
||||
if: ${{ always() }}
|
||||
shell: bash
|
||||
run: |
|
||||
free -h
|
||||
df -h
|
||||
|
||||
- name: Set environment variables
|
||||
shell: bash
|
||||
run: |
|
||||
echo "LLAMA_STACK_CLIENT_TIMEOUT=300" >> $GITHUB_ENV
|
||||
echo "LLAMA_STACK_TEST_INFERENCE_MODE=${{ inputs.inference-mode }}" >> $GITHUB_ENV
|
||||
|
||||
# Configure provider-specific settings
|
||||
if [ "${{ inputs.provider }}" == "ollama" ]; then
|
||||
echo "OLLAMA_URL=http://0.0.0.0:11434" >> $GITHUB_ENV
|
||||
echo "TEXT_MODEL=ollama/llama3.2:3b-instruct-fp16" >> $GITHUB_ENV
|
||||
echo "SAFETY_MODEL=ollama/llama-guard3:1b" >> $GITHUB_ENV
|
||||
else
|
||||
echo "VLLM_URL=http://localhost:8000/v1" >> $GITHUB_ENV
|
||||
echo "TEXT_MODEL=vllm/meta-llama/Llama-3.2-1B-Instruct" >> $GITHUB_ENV
|
||||
fi
|
||||
|
||||
if [ "${{ inputs.run-vision-tests }}" == "true" ]; then
|
||||
echo "LLAMA_STACK_TEST_RECORDING_DIR=tests/integration/recordings/vision" >> $GITHUB_ENV
|
||||
else
|
||||
echo "LLAMA_STACK_TEST_RECORDING_DIR=tests/integration/recordings" >> $GITHUB_ENV
|
||||
fi
|
||||
|
||||
- name: Run Llama Stack Server
|
||||
if: ${{ contains(inputs.stack-config, 'server:') }}
|
||||
shell: bash
|
||||
run: |
|
||||
# Run this so pytest in a loop doesn't start-stop servers in a loop
|
||||
echo "Starting Llama Stack Server"
|
||||
nohup uv run llama stack run ci-tests --image-type venv > server.log 2>&1 &
|
||||
|
||||
echo "Waiting for Llama Stack Server to start"
|
||||
for i in {1..30}; do
|
||||
if curl -s http://localhost:8321/v1/health | grep -q "OK"; then
|
||||
echo "Llama Stack Server started"
|
||||
exit 0
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
|
||||
echo "Llama Stack Server failed to start"
|
||||
cat server.log
|
||||
exit 1
|
||||
|
||||
- name: Run Integration Tests
|
||||
shell: bash
|
||||
run: |
|
||||
stack_config="${{ inputs.stack-config }}"
|
||||
EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag"
|
||||
|
||||
# Configure provider-specific settings
|
||||
if [ "${{ inputs.provider }}" == "ollama" ]; then
|
||||
EXTRA_PARAMS="--safety-shield=llama-guard"
|
||||
else
|
||||
EXTRA_PARAMS=""
|
||||
EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls"
|
||||
fi
|
||||
|
||||
if [ "${{ inputs.run-vision-tests }}" == "true" ]; then
|
||||
if uv run pytest -s -v tests/integration/inference/test_vision_inference.py --stack-config=${stack_config} \
|
||||
-k "not( ${EXCLUDE_TESTS} )" \
|
||||
--vision-model=ollama/llama3.2-vision:11b \
|
||||
--embedding-model=sentence-transformers/all-MiniLM-L6-v2 \
|
||||
--color=yes ${EXTRA_PARAMS} \
|
||||
--capture=tee-sys | tee pytest-${{ inputs.inference-mode }}-vision.log; then
|
||||
echo "✅ Tests completed for vision"
|
||||
else
|
||||
echo "❌ Tests failed for vision"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Run non-vision tests
|
||||
TEST_TYPES='${{ inputs.test-types }}'
|
||||
echo "Test types to run: $TEST_TYPES"
|
||||
|
||||
# Collect all test files for the specified test types
|
||||
TEST_FILES=""
|
||||
for test_type in $(echo "$TEST_TYPES" | jq -r '.[]'); do
|
||||
# if provider is vllm, exclude the following tests: (safety, post_training, tool_runtime)
|
||||
if [ "${{ inputs.provider }}" == "vllm" ]; then
|
||||
if [ "$test_type" == "safety" ] || [ "$test_type" == "post_training" ] || [ "$test_type" == "tool_runtime" ]; then
|
||||
echo "Skipping $test_type for vllm provider"
|
||||
continue
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -d "tests/integration/$test_type" ]; then
|
||||
# Find all Python test files in this directory
|
||||
test_files=$(find tests/integration/$test_type -name "test_*.py" -o -name "*_test.py")
|
||||
if [ -n "$test_files" ]; then
|
||||
TEST_FILES="$TEST_FILES $test_files"
|
||||
echo "Added test files from $test_type: $(echo $test_files | wc -w) files"
|
||||
fi
|
||||
else
|
||||
echo "Warning: Directory tests/integration/$test_type does not exist"
|
||||
fi
|
||||
done
|
||||
|
||||
if [ -z "$TEST_FILES" ]; then
|
||||
echo "No test files found for the specified test types"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "=== Running all collected tests in a single pytest command ==="
|
||||
echo "Total test files: $(echo $TEST_FILES | wc -w)"
|
||||
|
||||
if uv run pytest -s -v $TEST_FILES --stack-config=${stack_config} \
|
||||
-k "not( ${EXCLUDE_TESTS} )" \
|
||||
--text-model=$TEXT_MODEL \
|
||||
--embedding-model=sentence-transformers/all-MiniLM-L6-v2 \
|
||||
--color=yes ${EXTRA_PARAMS} \
|
||||
--capture=tee-sys | tee pytest-${{ inputs.inference-mode }}-all.log; then
|
||||
echo "✅ All tests completed successfully"
|
||||
else
|
||||
echo "❌ Tests failed"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Check Storage and Memory Available After Tests
|
||||
if: ${{ always() }}
|
||||
shell: bash
|
||||
run: |
|
||||
free -h
|
||||
df -h
|
||||
|
||||
- name: Commit and push recordings
|
||||
if: ${{ inputs.inference-mode == 'record' }}
|
||||
shell: bash
|
||||
run: |
|
||||
echo "Checking for recording changes"
|
||||
git status --porcelain tests/integration/recordings/
|
||||
|
||||
if [[ -n $(git status --porcelain tests/integration/recordings/) ]]; then
|
||||
echo "New recordings detected, committing and pushing"
|
||||
git add tests/integration/recordings/
|
||||
|
||||
if [ "${{ inputs.run-vision-tests }}" == "true" ]; then
|
||||
git commit -m "Recordings update from CI (vision)"
|
||||
else
|
||||
git commit -m "Recordings update from CI"
|
||||
fi
|
||||
|
||||
git fetch origin ${{ github.event.pull_request.head.ref }}
|
||||
git rebase origin/${{ github.event.pull_request.head.ref }}
|
||||
echo "Rebased successfully"
|
||||
git push origin HEAD:${{ github.event.pull_request.head.ref }}
|
||||
echo "Pushed successfully"
|
||||
else
|
||||
echo "No recording changes"
|
||||
fi
|
||||
|
||||
- name: Write inference logs to file
|
||||
if: ${{ always() }}
|
||||
shell: bash
|
||||
run: |
|
||||
sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log || true
|
||||
|
||||
- name: Upload logs
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
with:
|
||||
name: ${{ inputs.inference-mode }}-logs-${{ github.run_id }}-${{ github.run_attempt || '' }}-${{ inputs.provider }}
|
||||
path: |
|
||||
*.log
|
||||
retention-days: 1
|
73
.github/actions/run-integration-tests/action.yml
vendored
73
.github/actions/run-integration-tests/action.yml
vendored
|
@ -1,73 +0,0 @@
|
|||
name: 'Run Integration Tests'
|
||||
description: 'Run integration tests with configurable execution mode and provider settings'
|
||||
|
||||
inputs:
|
||||
test-types:
|
||||
description: 'Test types to run (JSON array)'
|
||||
required: true
|
||||
stack-config:
|
||||
description: 'Stack configuration: "ci-tests" or "server:ci-tests"'
|
||||
required: true
|
||||
provider:
|
||||
description: 'Provider to use: "ollama" or "vllm"'
|
||||
required: true
|
||||
inference-mode:
|
||||
description: 'Inference mode: "record" or "replay"'
|
||||
required: true
|
||||
|
||||
outputs:
|
||||
logs-path:
|
||||
description: 'Path to generated log files'
|
||||
value: '*.log'
|
||||
|
||||
runs:
|
||||
using: 'composite'
|
||||
steps:
|
||||
- name: Run Integration Tests
|
||||
env:
|
||||
LLAMA_STACK_CLIENT_TIMEOUT: "300"
|
||||
LLAMA_STACK_TEST_RECORDING_DIR: "tests/integration/recordings"
|
||||
LLAMA_STACK_TEST_INFERENCE_MODE: ${{ inputs.inference-mode }}
|
||||
shell: bash
|
||||
run: |
|
||||
stack_config="${{ inputs.stack-config }}"
|
||||
EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag"
|
||||
|
||||
# Configure provider-specific settings
|
||||
if [ "${{ inputs.provider }}" == "ollama" ]; then
|
||||
export OLLAMA_URL="http://0.0.0.0:11434"
|
||||
export TEXT_MODEL="ollama/llama3.2:3b-instruct-fp16"
|
||||
export SAFETY_MODEL="ollama/llama-guard3:1b"
|
||||
EXTRA_PARAMS="--safety-shield=llama-guard"
|
||||
else
|
||||
export VLLM_URL="http://localhost:8000/v1"
|
||||
export TEXT_MODEL="vllm/meta-llama/Llama-3.2-1B-Instruct"
|
||||
EXTRA_PARAMS=""
|
||||
EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls"
|
||||
fi
|
||||
|
||||
TEST_TYPES='${{ inputs.test-types }}'
|
||||
echo "Test types to run: $TEST_TYPES"
|
||||
|
||||
for test_type in $(echo "$TEST_TYPES" | jq -r '.[]'); do
|
||||
# if provider is vllm, exclude the following tests: (safety, post_training, tool_runtime)
|
||||
if [ "${{ inputs.provider }}" == "vllm" ]; then
|
||||
if [ "$test_type" == "safety" ] || [ "$test_type" == "post_training" ] || [ "$test_type" == "tool_runtime" ]; then
|
||||
continue
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "=== Running tests for: $test_type ==="
|
||||
|
||||
if uv run pytest -s -v tests/integration/$test_type --stack-config=${stack_config} \
|
||||
-k "not( ${EXCLUDE_TESTS} )" \
|
||||
--text-model=$TEXT_MODEL \
|
||||
--embedding-model=sentence-transformers/all-MiniLM-L6-v2 \
|
||||
--color=yes ${EXTRA_PARAMS} \
|
||||
--capture=tee-sys | tee pytest-${{ inputs.inference-mode }}-$test_type.log; then
|
||||
echo "✅ Tests completed for $test_type"
|
||||
else
|
||||
echo "❌ Tests failed for $test_type"
|
||||
exit 1
|
||||
fi
|
||||
done
|
14
.github/actions/setup-ollama/action.yml
vendored
14
.github/actions/setup-ollama/action.yml
vendored
|
@ -1,11 +1,23 @@
|
|||
name: Setup Ollama
|
||||
description: Start Ollama
|
||||
inputs:
|
||||
run-vision-tests:
|
||||
description: 'Run vision tests: "true" or "false"'
|
||||
required: false
|
||||
default: 'false'
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Start Ollama
|
||||
shell: bash
|
||||
run: |
|
||||
docker run -d --name ollama -p 11434:11434 docker.io/leseb/ollama-with-models
|
||||
if [ "${{ inputs.run-vision-tests }}" == "true" ]; then
|
||||
image="ollama-with-vision-model"
|
||||
else
|
||||
image="ollama-with-models"
|
||||
fi
|
||||
|
||||
echo "Starting Ollama with image: $image"
|
||||
docker run -d --name ollama -p 11434:11434 docker.io/llamastack/$image
|
||||
echo "Verifying Ollama status..."
|
||||
timeout 30 bash -c 'while ! curl -s -L http://127.0.0.1:11434; do sleep 1 && echo "."; done'
|
||||
|
|
48
.github/actions/setup-test-environment/action.yml
vendored
Normal file
48
.github/actions/setup-test-environment/action.yml
vendored
Normal file
|
@ -0,0 +1,48 @@
|
|||
name: 'Setup Test Environment'
|
||||
description: 'Common setup steps for integration tests including dependencies, providers, and build'
|
||||
|
||||
inputs:
|
||||
python-version:
|
||||
description: 'Python version to use'
|
||||
required: true
|
||||
client-version:
|
||||
description: 'Client version (latest or published)'
|
||||
required: true
|
||||
provider:
|
||||
description: 'Provider to setup (ollama or vllm)'
|
||||
required: true
|
||||
default: 'ollama'
|
||||
run-vision-tests:
|
||||
description: 'Whether to setup provider for vision tests'
|
||||
required: false
|
||||
default: 'false'
|
||||
|
||||
runs:
|
||||
using: 'composite'
|
||||
steps:
|
||||
- name: Install dependencies
|
||||
uses: ./.github/actions/setup-runner
|
||||
with:
|
||||
python-version: ${{ inputs.python-version }}
|
||||
client-version: ${{ inputs.client-version }}
|
||||
|
||||
- name: Setup ollama
|
||||
if: ${{ inputs.provider == 'ollama' }}
|
||||
uses: ./.github/actions/setup-ollama
|
||||
with:
|
||||
run-vision-tests: ${{ inputs.run-vision-tests }}
|
||||
|
||||
- name: Setup vllm
|
||||
if: ${{ inputs.provider == 'vllm' }}
|
||||
uses: ./.github/actions/setup-vllm
|
||||
|
||||
- name: Build Llama Stack
|
||||
shell: bash
|
||||
run: |
|
||||
uv run llama stack build --template ci-tests --image-type venv
|
||||
|
||||
- name: Configure git for commits
|
||||
shell: bash
|
||||
run: |
|
||||
git config --local user.email "github-actions[bot]@users.noreply.github.com"
|
||||
git config --local user.name "github-actions[bot]"
|
1
.github/workflows/README.md
vendored
1
.github/workflows/README.md
vendored
|
@ -10,6 +10,7 @@ Llama Stack uses GitHub Actions for Continuous Integration (CI). Below is a tabl
|
|||
| SqlStore Integration Tests | [integration-sql-store-tests.yml](integration-sql-store-tests.yml) | Run the integration test suite with SqlStore |
|
||||
| Integration Tests | [integration-tests.yml](integration-tests.yml) | Run the integration test suite from tests/integration |
|
||||
| Vector IO Integration Tests | [integration-vector-io-tests.yml](integration-vector-io-tests.yml) | Run the integration test suite with various VectorIO providers |
|
||||
| Vision Inference Integration Tests | [integration-vision-tests.yml](integration-vision-tests.yml) | Run vision inference integration test suite from tests/integration/inference |
|
||||
| Pre-commit | [pre-commit.yml](pre-commit.yml) | Run pre-commit checks |
|
||||
| Test Llama Stack Build | [providers-build.yml](providers-build.yml) | Test llama stack build |
|
||||
| Python Package Build Test | [python-build-test.yml](python-build-test.yml) | Test building the llama-stack PyPI project |
|
||||
|
|
113
.github/workflows/integration-tests.yml
vendored
113
.github/workflows/integration-tests.yml
vendored
|
@ -5,7 +5,7 @@ run-name: Run the integration test suite from tests/integration
|
|||
on:
|
||||
push:
|
||||
branches: [ main ]
|
||||
pull_request:
|
||||
pull_request_target:
|
||||
branches: [ main ]
|
||||
types: [opened, synchronize, labeled]
|
||||
paths:
|
||||
|
@ -13,10 +13,10 @@ on:
|
|||
- 'tests/**'
|
||||
- 'uv.lock'
|
||||
- 'pyproject.toml'
|
||||
- 'requirements.txt'
|
||||
- '.github/workflows/integration-tests.yml' # This workflow
|
||||
- '.github/actions/setup-ollama/action.yml'
|
||||
- '.github/actions/run-integration-tests/action.yml'
|
||||
- '.github/actions/setup-test-environment/action.yml'
|
||||
- '.github/actions/run-and-record-tests/action.yml'
|
||||
schedule:
|
||||
# If changing the cron schedule, update the provider in the test-matrix job
|
||||
- cron: '0 0 * * *' # (test latest client) Daily at 12 AM UTC
|
||||
|
@ -31,6 +31,10 @@ on:
|
|||
description: 'Test against a specific provider'
|
||||
type: string
|
||||
default: 'ollama'
|
||||
force-inference-mode:
|
||||
description: 'Force inference mode (record or replay)'
|
||||
type: string
|
||||
default: ''
|
||||
|
||||
concurrency:
|
||||
# This creates three concurrency groups:
|
||||
|
@ -73,10 +77,16 @@ jobs:
|
|||
- name: Check if re-record-tests label exists
|
||||
id: check-rerecord-tests
|
||||
run: |
|
||||
if [[ "${{ contains(github.event.pull_request.labels.*.name, 're-record-tests') }}" == "true" ]]; then
|
||||
if [[ "${{ inputs.force-inference-mode }}" == "record" ]]; then
|
||||
echo "rerecord-tests=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
elif [[ "${{ inputs.force-inference-mode }}" == "replay" ]]; then
|
||||
echo "rerecord-tests=false" >> $GITHUB_OUTPUT
|
||||
else
|
||||
if [[ "${{ contains(github.event.pull_request.labels.*.name, 're-record-tests') }}" == "true" ]]; then
|
||||
echo "rerecord-tests=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "rerecord-tests=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
fi
|
||||
|
||||
record-tests:
|
||||
|
@ -92,65 +102,26 @@ jobs:
|
|||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
ref: ${{ github.event.pull_request.head.ref }}
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Install dependencies
|
||||
uses: ./.github/actions/setup-runner
|
||||
- name: Setup test environment
|
||||
uses: ./.github/actions/setup-test-environment
|
||||
with:
|
||||
python-version: "3.12" # Use single Python version for recording
|
||||
client-version: "latest"
|
||||
provider: ${{ inputs.test-provider || 'ollama' }}
|
||||
|
||||
- name: Setup ollama
|
||||
if: ${{ inputs.test-provider == 'ollama' }}
|
||||
uses: ./.github/actions/setup-ollama
|
||||
|
||||
- name: Setup vllm
|
||||
if: ${{ inputs.test-provider == 'vllm' }}
|
||||
uses: ./.github/actions/setup-vllm
|
||||
|
||||
- name: Build Llama Stack
|
||||
run: |
|
||||
uv run llama stack build --template ci-tests --image-type venv
|
||||
|
||||
- name: Configure git for commits
|
||||
run: |
|
||||
git config --local user.email "github-actions[bot]@users.noreply.github.com"
|
||||
git config --local user.name "github-actions[bot]"
|
||||
|
||||
- name: Run Integration Tests for All Types (Recording Mode)
|
||||
uses: ./.github/actions/run-integration-tests
|
||||
- name: Run and record tests
|
||||
uses: ./.github/actions/run-and-record-tests
|
||||
with:
|
||||
test-types: ${{ needs.discover-tests.outputs.test-types }}
|
||||
stack-config: 'server:ci-tests' # recording must be done with server since more tests are run
|
||||
provider: ${{ inputs.test-provider }}
|
||||
provider: ${{ inputs.test-provider || 'ollama' }}
|
||||
inference-mode: 'record'
|
||||
|
||||
- name: Commit and push recordings
|
||||
run: |
|
||||
if ! git diff --quiet tests/integration/recordings/; then
|
||||
echo "Committing recordings"
|
||||
git add tests/integration/recordings/
|
||||
git commit -m "Update recordings"
|
||||
echo "Pushing all recording commits to PR"
|
||||
git push origin HEAD:${{ github.head_ref }}
|
||||
else
|
||||
echo "No recording changes"
|
||||
fi
|
||||
|
||||
- name: Write inference logs to file
|
||||
if: ${{ always() }}
|
||||
run: |
|
||||
sudo docker logs ollama > ollama-recording.log || true
|
||||
|
||||
- name: Upload recording logs
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
with:
|
||||
name: recording-logs-${{ github.run_id }}
|
||||
path: |
|
||||
*.log
|
||||
retention-days: 1
|
||||
|
||||
run-tests:
|
||||
run-replay-mode-tests:
|
||||
# Skip this job if we're in recording mode (handled by record-tests job)
|
||||
if: ${{ needs.discover-tests.outputs.rerecord-tests != 'true' }}
|
||||
needs: discover-tests
|
||||
|
@ -169,41 +140,17 @@ jobs:
|
|||
- name: Checkout repository
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
- name: Install dependencies
|
||||
uses: ./.github/actions/setup-runner
|
||||
- name: Setup test environment
|
||||
uses: ./.github/actions/setup-test-environment
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
client-version: ${{ matrix.client-version }}
|
||||
provider: ${{ matrix.provider }}
|
||||
|
||||
- name: Build Llama Stack
|
||||
run: |
|
||||
uv run llama stack build --template ci-tests --image-type venv
|
||||
|
||||
- name: Check Storage and Memory Available Before Tests
|
||||
if: ${{ always() }}
|
||||
run: |
|
||||
free -h
|
||||
df -h
|
||||
|
||||
- name: Run Integration Tests (Replay Mode)
|
||||
uses: ./.github/actions/run-integration-tests
|
||||
- name: Run and record tests
|
||||
uses: ./.github/actions/run-and-record-tests
|
||||
with:
|
||||
test-types: ${{ needs.discover-tests.outputs.test-types }}
|
||||
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
|
||||
provider: ${{ matrix.provider }}
|
||||
inference-mode: 'replay'
|
||||
|
||||
- name: Check Storage and Memory Available After Tests
|
||||
if: ${{ always() }}
|
||||
run: |
|
||||
free -h
|
||||
df -h
|
||||
|
||||
- name: Upload test logs on failure
|
||||
if: ${{ failure() }}
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
with:
|
||||
name: test-logs-${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.provider }}-${{ matrix.client-type }}-${{ matrix.python-version }}-${{ matrix.client-version }}
|
||||
path: |
|
||||
*.log
|
||||
retention-days: 1
|
||||
|
|
141
.github/workflows/integration-vision-tests.yml
vendored
Normal file
141
.github/workflows/integration-vision-tests.yml
vendored
Normal file
|
@ -0,0 +1,141 @@
|
|||
name: Vision Inference Integration Tests
|
||||
|
||||
run-name: Run vision inference integration test suite from tests/integration/inference
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ main ]
|
||||
pull_request_target:
|
||||
branches: [ main ]
|
||||
types: [opened, synchronize, labeled]
|
||||
paths:
|
||||
- 'llama_stack/**'
|
||||
- 'tests/**'
|
||||
- 'uv.lock'
|
||||
- 'pyproject.toml'
|
||||
- '.github/workflows/integration-vision-tests.yml' # This workflow
|
||||
- '.github/actions/setup-ollama/action.yml'
|
||||
- '.github/actions/setup-test-environment/action.yml'
|
||||
- '.github/actions/run-and-record-tests/action.yml'
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
test-all-client-versions:
|
||||
description: 'Test against both the latest and published versions'
|
||||
type: boolean
|
||||
default: false
|
||||
force-inference-mode:
|
||||
description: 'Force inference mode (record or replay)'
|
||||
type: string
|
||||
default: ''
|
||||
|
||||
concurrency:
|
||||
# This creates three concurrency groups:
|
||||
# ${{ github.workflow }}-${{ github.ref }}-rerecord (for valid triggers with re-record-tests label)
|
||||
# ${{ github.workflow }}-${{ github.ref }}-replay (for valid triggers without re-record-tests label)
|
||||
# ${{ github.workflow }}-${{ github.ref }}-no-run (for invalid triggers that will be skipped)
|
||||
# The "no-run" group ensures that irrelevant label events don't interfere with the real workflows.
|
||||
group: >-
|
||||
${{ github.workflow }}-${{ github.ref }}-${{
|
||||
((github.event.action == 'opened' || github.event.action == 'synchronize') && 'replay') ||
|
||||
((github.event.action == 'labeled' && contains(github.event.pull_request.labels.*.name, 're-record-tests')) && 'rerecord' ||
|
||||
'no-run')
|
||||
}}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
discover-tests:
|
||||
if: |
|
||||
github.event.action == 'opened' ||
|
||||
github.event.action == 'synchronize' ||
|
||||
(github.event.action == 'labeled' && contains(github.event.pull_request.labels.*.name, 're-record-tests'))
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
rerecord-tests: ${{ steps.check-rerecord-tests.outputs.rerecord-tests }}
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
- name: Check if re-record-tests label exists
|
||||
id: check-rerecord-tests
|
||||
run: |
|
||||
if [[ "${{ inputs.force-inference-mode }}" == "record" ]]; then
|
||||
echo "rerecord-tests=true" >> $GITHUB_OUTPUT
|
||||
elif [[ "${{ inputs.force-inference-mode }}" == "replay" ]]; then
|
||||
echo "rerecord-tests=false" >> $GITHUB_OUTPUT
|
||||
else
|
||||
if [[ "${{ contains(github.event.pull_request.labels.*.name, 're-record-tests') }}" == "true" ]]; then
|
||||
echo "rerecord-tests=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "rerecord-tests=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
fi
|
||||
|
||||
record-tests:
|
||||
# Sequential job for recording to avoid SQLite conflicts
|
||||
if: ${{ needs.discover-tests.outputs.rerecord-tests == 'true' }}
|
||||
needs: discover-tests
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
pull-requests: write
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
ref: ${{ github.event.pull_request.head.ref }}
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Setup test environment
|
||||
uses: ./.github/actions/setup-test-environment
|
||||
with:
|
||||
python-version: "3.12" # Use single Python version for recording
|
||||
client-version: "latest"
|
||||
provider: 'ollama'
|
||||
run-vision-tests: 'true'
|
||||
|
||||
- name: Run and record tests
|
||||
uses: ./.github/actions/run-and-record-tests
|
||||
with:
|
||||
test-types: '["vision"]'
|
||||
stack-config: 'server:ci-tests' # re-recording must be done in server mode
|
||||
provider: 'ollama'
|
||||
inference-mode: 'record'
|
||||
run-vision-tests: 'true'
|
||||
|
||||
run-replay-mode-tests:
|
||||
# Skip this job if we're in recording mode (handled by record-tests job)
|
||||
if: ${{ needs.discover-tests.outputs.rerecord-tests != 'true' }}
|
||||
needs: discover-tests
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
client-type: [library]
|
||||
provider: [ollama]
|
||||
python-version: ["3.12"]
|
||||
client-version: ["latest"]
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
- name: Setup test environment
|
||||
uses: ./.github/actions/setup-test-environment
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
client-version: ${{ matrix.client-version }}
|
||||
provider: ${{ matrix.provider }}
|
||||
run-vision-tests: 'true'
|
||||
|
||||
- name: Run and record tests
|
||||
uses: ./.github/actions/run-and-record-tests
|
||||
with:
|
||||
test-types: '["vision"]'
|
||||
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
|
||||
provider: ${{ matrix.provider }}
|
||||
inference-mode: 'replay'
|
||||
run-vision-tests: 'true'
|
Loading…
Add table
Add a link
Reference in a new issue