mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-15 06:00:48 +00:00
ci: integrate vLLM inference tests with GitHub Actions workflows
Add vLLM provider support to integration test CI workflows alongside existing Ollama support. Configure provider-specific test execution where vLLM runs only inference specific tests (excluding vision tests) while Ollama continues to run the full test suite. This enables comprehensive CI testing of both inference providers but keeps the vLLM footprint small, this can be expanded later if it proves to not be too disruptive. Signed-off-by: Derek Higgins <derekh@redhat.com>
This commit is contained in:
parent
91a010fb12
commit
1f29aaa2e1
3 changed files with 46 additions and 42 deletions
|
@ -52,9 +52,9 @@ runs:
|
||||||
git add tests/integration/recordings/
|
git add tests/integration/recordings/
|
||||||
|
|
||||||
if [ "${{ inputs.run-vision-tests }}" == "true" ]; then
|
if [ "${{ inputs.run-vision-tests }}" == "true" ]; then
|
||||||
git commit -m "Recordings update from CI (vision)"
|
git commit -m "Recordings update from CI (vision) (${{ inputs.provider }})"
|
||||||
else
|
else
|
||||||
git commit -m "Recordings update from CI"
|
git commit -m "Recordings update from CI (${{ inputs.provider }})"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
git fetch origin ${{ github.event.pull_request.head.ref }}
|
git fetch origin ${{ github.event.pull_request.head.ref }}
|
||||||
|
@ -70,7 +70,8 @@ runs:
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log || true
|
sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log 2>&1 || true
|
||||||
|
sudo docker logs vllm > vllm-${{ inputs.inference-mode }}.log 2>&1 || true
|
||||||
|
|
||||||
- name: Upload logs
|
- name: Upload logs
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
|
|
46
.github/workflows/integration-tests.yml
vendored
46
.github/workflows/integration-tests.yml
vendored
|
@ -20,7 +20,6 @@ on:
|
||||||
schedule:
|
schedule:
|
||||||
# If changing the cron schedule, update the provider in the test-matrix job
|
# If changing the cron schedule, update the provider in the test-matrix job
|
||||||
- cron: '0 0 * * *' # (test latest client) Daily at 12 AM UTC
|
- cron: '0 0 * * *' # (test latest client) Daily at 12 AM UTC
|
||||||
- cron: '1 0 * * 0' # (test vllm) Weekly on Sunday at 1 AM UTC
|
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
inputs:
|
inputs:
|
||||||
test-all-client-versions:
|
test-all-client-versions:
|
||||||
|
@ -38,28 +37,7 @@ concurrency:
|
||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
discover-tests:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
outputs:
|
|
||||||
test-types: ${{ steps.generate-test-types.outputs.test-types }}
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
||||||
|
|
||||||
- name: Generate test types
|
|
||||||
id: generate-test-types
|
|
||||||
run: |
|
|
||||||
# Get test directories dynamically, excluding non-test directories
|
|
||||||
# NOTE: we are excluding post_training since the tests take too long
|
|
||||||
TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d |
|
|
||||||
sed 's|tests/integration/||' |
|
|
||||||
grep -Ev "^(__pycache__|fixtures|test_cases|recordings|non_ci|post_training)$" |
|
|
||||||
sort | jq -R -s -c 'split("\n")[:-1]')
|
|
||||||
echo "test-types=$TEST_TYPES" >> $GITHUB_OUTPUT
|
|
||||||
|
|
||||||
run-replay-mode-tests:
|
run-replay-mode-tests:
|
||||||
needs: discover-tests
|
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, vision={4})', matrix.client-type, matrix.provider, matrix.python-version, matrix.client-version, matrix.run-vision-tests) }}
|
name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, vision={4})', matrix.client-type, matrix.provider, matrix.python-version, matrix.client-version, matrix.run-vision-tests) }}
|
||||||
|
|
||||||
|
@ -68,11 +46,14 @@ jobs:
|
||||||
matrix:
|
matrix:
|
||||||
client-type: [library, server]
|
client-type: [library, server]
|
||||||
# Use vllm on weekly schedule, otherwise use test-provider input (defaults to ollama)
|
# Use vllm on weekly schedule, otherwise use test-provider input (defaults to ollama)
|
||||||
provider: ${{ (github.event.schedule == '1 0 * * 0') && fromJSON('["vllm"]') || fromJSON(format('["{0}"]', github.event.inputs.test-provider || 'ollama')) }}
|
provider: [ollama, vllm]
|
||||||
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
|
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
|
||||||
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
|
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
|
||||||
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
|
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
|
||||||
run-vision-tests: [true, false]
|
run-vision-tests: [true, false]
|
||||||
|
exclude:
|
||||||
|
- provider: vllm
|
||||||
|
run-vision-tests: true
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
|
@ -87,10 +68,27 @@ jobs:
|
||||||
run-vision-tests: ${{ matrix.run-vision-tests }}
|
run-vision-tests: ${{ matrix.run-vision-tests }}
|
||||||
inference-mode: 'replay'
|
inference-mode: 'replay'
|
||||||
|
|
||||||
|
- name: Generate test types
|
||||||
|
id: generate-test-types
|
||||||
|
run: |
|
||||||
|
# Only run inference tests for vllm as these are more likely to exercise the vllm provider
|
||||||
|
# TODO: Add agent tests for vllm
|
||||||
|
if [ ${{ matrix.provider }} == "vllm" ]; then
|
||||||
|
echo "test-types=[\"inference\"]" >> $GITHUB_OUTPUT
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
# Get test directories dynamically, excluding non-test directories
|
||||||
|
# NOTE: we are excluding post_training since the tests take too long
|
||||||
|
TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d |
|
||||||
|
sed 's|tests/integration/||' |
|
||||||
|
grep -Ev "^(__pycache__|fixtures|test_cases|recordings|non_ci|post_training)$" |
|
||||||
|
sort | jq -R -s -c 'split("\n")[:-1]')
|
||||||
|
echo "test-types=$TEST_TYPES" >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
uses: ./.github/actions/run-and-record-tests
|
uses: ./.github/actions/run-and-record-tests
|
||||||
with:
|
with:
|
||||||
test-types: ${{ needs.discover-tests.outputs.test-types }}
|
test-types: ${{ steps.generate-test-types.outputs.test-types }}
|
||||||
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
|
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
|
||||||
provider: ${{ matrix.provider }}
|
provider: ${{ matrix.provider }}
|
||||||
inference-mode: 'replay'
|
inference-mode: 'replay'
|
||||||
|
|
35
.github/workflows/record-integration-tests.yml
vendored
35
.github/workflows/record-integration-tests.yml
vendored
|
@ -15,12 +15,6 @@ on:
|
||||||
- '.github/actions/setup-ollama/action.yml'
|
- '.github/actions/setup-ollama/action.yml'
|
||||||
- '.github/actions/setup-test-environment/action.yml'
|
- '.github/actions/setup-test-environment/action.yml'
|
||||||
- '.github/actions/run-and-record-tests/action.yml'
|
- '.github/actions/run-and-record-tests/action.yml'
|
||||||
workflow_dispatch:
|
|
||||||
inputs:
|
|
||||||
test-provider:
|
|
||||||
description: 'Test against a specific provider'
|
|
||||||
type: string
|
|
||||||
default: 'ollama'
|
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: ${{ github.workflow }}-${{ github.ref }}
|
group: ${{ github.workflow }}-${{ github.ref }}
|
||||||
|
@ -42,12 +36,6 @@ jobs:
|
||||||
- name: Generate test types
|
- name: Generate test types
|
||||||
id: generate-test-types
|
id: generate-test-types
|
||||||
run: |
|
run: |
|
||||||
# Get test directories dynamically, excluding non-test directories
|
|
||||||
TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d -printf "%f\n" |
|
|
||||||
grep -Ev "^(__pycache__|fixtures|test_cases|recordings|post_training)$" |
|
|
||||||
sort | jq -R -s -c 'split("\n")[:-1]')
|
|
||||||
echo "test-types=$TEST_TYPES" >> $GITHUB_OUTPUT
|
|
||||||
|
|
||||||
labels=$(gh pr view ${{ github.event.pull_request.number }} --json labels --jq '.labels[].name')
|
labels=$(gh pr view ${{ github.event.pull_request.number }} --json labels --jq '.labels[].name')
|
||||||
echo "labels=$labels"
|
echo "labels=$labels"
|
||||||
|
|
||||||
|
@ -82,6 +70,10 @@ jobs:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
mode: ${{ fromJSON(needs.discover-tests.outputs.matrix-modes) }}
|
mode: ${{ fromJSON(needs.discover-tests.outputs.matrix-modes) }}
|
||||||
|
provider: [ollama, vllm]
|
||||||
|
exclude:
|
||||||
|
- mode: vision
|
||||||
|
provider: vllm
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
|
@ -90,20 +82,33 @@ jobs:
|
||||||
ref: ${{ github.event.pull_request.head.ref }}
|
ref: ${{ github.event.pull_request.head.ref }}
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: Generate test types
|
||||||
|
id: generate-test-types
|
||||||
|
run: |
|
||||||
|
if [ ${{ matrix.provider }} == "vllm" ]; then
|
||||||
|
echo "test-types=[\"inference\"]" >> $GITHUB_OUTPUT
|
||||||
|
else
|
||||||
|
# Get test directories dynamically, excluding non-test directories
|
||||||
|
TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d -printf "%f\n" |
|
||||||
|
grep -Ev "^(__pycache__|fixtures|test_cases|recordings|non_ci|post_training)$" |
|
||||||
|
sort | jq -R -s -c 'split("\n")[:-1]')
|
||||||
|
echo "test-types=$TEST_TYPES" >> $GITHUB_OUTPUT
|
||||||
|
fi
|
||||||
|
|
||||||
- name: Setup test environment
|
- name: Setup test environment
|
||||||
uses: ./.github/actions/setup-test-environment
|
uses: ./.github/actions/setup-test-environment
|
||||||
with:
|
with:
|
||||||
python-version: "3.12" # Use single Python version for recording
|
python-version: "3.12" # Use single Python version for recording
|
||||||
client-version: "latest"
|
client-version: "latest"
|
||||||
provider: ${{ inputs.test-provider || 'ollama' }}
|
provider: ${{ matrix.provider }}
|
||||||
run-vision-tests: ${{ matrix.mode == 'vision' && 'true' || 'false' }}
|
run-vision-tests: ${{ matrix.mode == 'vision' && 'true' || 'false' }}
|
||||||
inference-mode: 'record'
|
inference-mode: 'record'
|
||||||
|
|
||||||
- name: Run and record tests
|
- name: Run and record tests
|
||||||
uses: ./.github/actions/run-and-record-tests
|
uses: ./.github/actions/run-and-record-tests
|
||||||
with:
|
with:
|
||||||
test-types: ${{ needs.discover-tests.outputs.test-types }}
|
test-types: ${{ steps.generate-test-types.outputs.test-types }}
|
||||||
stack-config: 'server:ci-tests' # recording must be done with server since more tests are run
|
stack-config: 'server:ci-tests' # recording must be done with server since more tests are run
|
||||||
provider: ${{ inputs.test-provider || 'ollama' }}
|
provider: ${{ matrix.provider }}
|
||||||
inference-mode: 'record'
|
inference-mode: 'record'
|
||||||
run-vision-tests: ${{ matrix.mode == 'vision' && 'true' || 'false' }}
|
run-vision-tests: ${{ matrix.mode == 'vision' && 'true' || 'false' }}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue