mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-27 06:28:50 +00:00
test: Add VLLM provider support to integration tests
- Add setup-vllm GitHub action to start VLLM container - Extend integration test matrix to support both ollama and vllm providers - Make test setup conditional based on provider type - Add provider-specific environment variables and configurations TODO: investigate failing tests for vllm provider (safety, post_training and tool runtime) Also need a proper fix for #2713 (tmp fix for this in the first commit in this PR) Closes: #1648 Signed-off-by: Derek Higgins <derekh@redhat.com>
This commit is contained in:
parent
3e7ea4dd14
commit
7420c1db11
2 changed files with 76 additions and 13 deletions
27
.github/actions/setup-vllm/action.yml
vendored
Normal file
27
.github/actions/setup-vllm/action.yml
vendored
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
name: Setup VLLM
|
||||||
|
description: Start VLLM
|
||||||
|
runs:
|
||||||
|
using: "composite"
|
||||||
|
steps:
|
||||||
|
- name: Start VLLM
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
# Start vllm container
|
||||||
|
docker run -d \
|
||||||
|
--name vllm \
|
||||||
|
-p 8000:8000 \
|
||||||
|
--privileged=true \
|
||||||
|
quay.io/higginsd/vllm-cpu:65393ee064 \
|
||||||
|
--host 0.0.0.0 \
|
||||||
|
--port 8000 \
|
||||||
|
--enable-auto-tool-choice \
|
||||||
|
--tool-call-parser llama3_json \
|
||||||
|
--model /root/.cache/Llama-3.2-1B-Instruct \
|
||||||
|
--served-model-name meta-llama/Llama-3.2-1B-Instruct
|
||||||
|
|
||||||
|
# Wait for vllm to be ready
|
||||||
|
echo "Waiting for vllm to be ready..."
|
||||||
|
timeout 900 bash -c 'until curl -f http://localhost:8000/health; do
|
||||||
|
echo "Waiting for vllm..."
|
||||||
|
sleep 5
|
||||||
|
done'
|
62
.github/workflows/integration-tests.yml
vendored
62
.github/workflows/integration-tests.yml
vendored
|
@ -14,13 +14,19 @@ on:
|
||||||
- '.github/workflows/integration-tests.yml' # This workflow
|
- '.github/workflows/integration-tests.yml' # This workflow
|
||||||
- '.github/actions/setup-ollama/action.yml'
|
- '.github/actions/setup-ollama/action.yml'
|
||||||
schedule:
|
schedule:
|
||||||
- cron: '0 0 * * *' # Daily at 12 AM UTC
|
# If changing the cron schedule, update the provider in the test-matrix job
|
||||||
|
- cron: '0 0 * * *' # (test latest client) Daily at 12 AM UTC
|
||||||
|
- cron: '1 0 * * 0' # (test vllm) Weekly on Sunday at 1 AM UTC
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
inputs:
|
inputs:
|
||||||
test-all-client-versions:
|
test-all-client-versions:
|
||||||
description: 'Test against both the latest and published versions'
|
description: 'Test against both the latest and published versions'
|
||||||
type: boolean
|
type: boolean
|
||||||
default: false
|
default: false
|
||||||
|
test-provider:
|
||||||
|
description: 'Test against a specific provider'
|
||||||
|
type: string
|
||||||
|
default: 'ollama'
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: ${{ github.workflow }}-${{ github.ref }}
|
group: ${{ github.workflow }}-${{ github.ref }}
|
||||||
|
@ -53,8 +59,17 @@ jobs:
|
||||||
matrix:
|
matrix:
|
||||||
test-type: ${{ fromJson(needs.discover-tests.outputs.test-type) }}
|
test-type: ${{ fromJson(needs.discover-tests.outputs.test-type) }}
|
||||||
client-type: [library, server]
|
client-type: [library, server]
|
||||||
|
# Use vllm on weekly schedule, otherwise use test-provider input (defaults to ollama)
|
||||||
|
provider: ${{ (github.event.schedule == '1 0 * * 0') && fromJSON('["vllm"]') || fromJSON(format('["{0}"]', github.event.inputs.test-provider || 'ollama')) }}
|
||||||
python-version: ["3.12", "3.13"]
|
python-version: ["3.12", "3.13"]
|
||||||
client-version: ${{ (github.event_name == 'schedule' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
|
client-version: ${{ (github.event.schedule == '0 0 * * 0' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
|
||||||
|
exclude: # TODO: look into why these tests are failing and fix them
|
||||||
|
- provider: vllm
|
||||||
|
test-type: safety
|
||||||
|
- provider: vllm
|
||||||
|
test-type: post_training
|
||||||
|
- provider: vllm
|
||||||
|
test-type: tool_runtime
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
|
@ -67,8 +82,13 @@ jobs:
|
||||||
client-version: ${{ matrix.client-version }}
|
client-version: ${{ matrix.client-version }}
|
||||||
|
|
||||||
- name: Setup ollama
|
- name: Setup ollama
|
||||||
|
if: ${{ matrix.provider == 'ollama' }}
|
||||||
uses: ./.github/actions/setup-ollama
|
uses: ./.github/actions/setup-ollama
|
||||||
|
|
||||||
|
- name: Setup vllm
|
||||||
|
if: ${{ matrix.provider == 'vllm' }}
|
||||||
|
uses: ./.github/actions/setup-vllm
|
||||||
|
|
||||||
- name: Build Llama Stack
|
- name: Build Llama Stack
|
||||||
run: |
|
run: |
|
||||||
uv run llama stack build --template ci-tests --image-type venv
|
uv run llama stack build --template ci-tests --image-type venv
|
||||||
|
@ -81,10 +101,6 @@ jobs:
|
||||||
|
|
||||||
- name: Run Integration Tests
|
- name: Run Integration Tests
|
||||||
env:
|
env:
|
||||||
OLLAMA_INFERENCE_MODEL: "llama3.2:3b-instruct-fp16" # for server tests
|
|
||||||
ENABLE_OLLAMA: "ollama" # for server tests
|
|
||||||
OLLAMA_URL: "http://0.0.0.0:11434"
|
|
||||||
SAFETY_MODEL: "llama-guard3:1b"
|
|
||||||
LLAMA_STACK_CLIENT_TIMEOUT: "300" # Increased timeout for eval operations
|
LLAMA_STACK_CLIENT_TIMEOUT: "300" # Increased timeout for eval operations
|
||||||
# Use 'shell' to get pipefail behavior
|
# Use 'shell' to get pipefail behavior
|
||||||
# https://docs.github.com/en/actions/reference/workflow-syntax-for-github-actions#exit-codes-and-error-action-preference
|
# https://docs.github.com/en/actions/reference/workflow-syntax-for-github-actions#exit-codes-and-error-action-preference
|
||||||
|
@ -96,12 +112,31 @@ jobs:
|
||||||
else
|
else
|
||||||
stack_config="server:ci-tests"
|
stack_config="server:ci-tests"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag"
|
||||||
|
if [ "${{ matrix.provider }}" == "ollama" ]; then
|
||||||
|
export ENABLE_OLLAMA="ollama"
|
||||||
|
export OLLAMA_URL="http://0.0.0.0:11434"
|
||||||
|
export OLLAMA_INFERENCE_MODEL="llama3.2:3b-instruct-fp16"
|
||||||
|
export TEXT_MODEL=ollama/$OLLAMA_INFERENCE_MODEL
|
||||||
|
export SAFETY_MODEL="llama-guard3:1b"
|
||||||
|
EXTRA_PARAMS="--safety-shield=$SAFETY_MODEL"
|
||||||
|
else
|
||||||
|
export ENABLE_VLLM="vllm"
|
||||||
|
export VLLM_URL="http://localhost:8000/v1"
|
||||||
|
export VLLM_INFERENCE_MODEL="meta-llama/Llama-3.2-1B-Instruct"
|
||||||
|
export TEXT_MODEL=vllm/$VLLM_INFERENCE_MODEL
|
||||||
|
# TODO: remove the not(test_inference_store_tool_calls) once we can get the tool called consistently
|
||||||
|
EXTRA_PARAMS=
|
||||||
|
EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls"
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
uv run pytest -s -v tests/integration/${{ matrix.test-type }} --stack-config=${stack_config} \
|
uv run pytest -s -v tests/integration/${{ matrix.test-type }} --stack-config=${stack_config} \
|
||||||
-k "not(builtin_tool or safety_with_image or code_interpreter or test_rag)" \
|
-k "not( ${EXCLUDE_TESTS} )" \
|
||||||
--text-model="ollama/llama3.2:3b-instruct-fp16" \
|
--text-model=$TEXT_MODEL \
|
||||||
--embedding-model=sentence-transformers/all-MiniLM-L6-v2 \
|
--embedding-model=sentence-transformers/all-MiniLM-L6-v2 \
|
||||||
--safety-shield=$SAFETY_MODEL \
|
--color=yes ${EXTRA_PARAMS} \
|
||||||
--color=yes \
|
|
||||||
--capture=tee-sys | tee pytest-${{ matrix.test-type }}.log
|
--capture=tee-sys | tee pytest-${{ matrix.test-type }}.log
|
||||||
|
|
||||||
- name: Check Storage and Memory Available After Tests
|
- name: Check Storage and Memory Available After Tests
|
||||||
|
@ -110,16 +145,17 @@ jobs:
|
||||||
free -h
|
free -h
|
||||||
df -h
|
df -h
|
||||||
|
|
||||||
- name: Write ollama logs to file
|
- name: Write inference logs to file
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
run: |
|
run: |
|
||||||
sudo docker logs ollama > ollama.log
|
sudo docker logs ollama > ollama.log || true
|
||||||
|
sudo docker logs vllm > vllm.log || true
|
||||||
|
|
||||||
- name: Upload all logs to artifacts
|
- name: Upload all logs to artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||||
with:
|
with:
|
||||||
name: logs-${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.client-type }}-${{ matrix.test-type }}-${{ matrix.python-version }}-${{ matrix.client-version }}
|
name: logs-${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.provider }}-${{ matrix.client-type }}-${{ matrix.test-type }}-${{ matrix.python-version }}-${{ matrix.client-version }}
|
||||||
path: |
|
path: |
|
||||||
*.log
|
*.log
|
||||||
retention-days: 1
|
retention-days: 1
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue