test: Add VLLM provider support to integration tests

- Add setup-vllm GitHub action to start VLLM container - Extend integration test matrix to support both ollama and vllm providers - Make test setup conditional based on provider type - Add provider-specific environment variables and configurations TODO: investigate failing tests for vllm provider (safety, post_training and tool runtime) Also need a proper fix for #2713 (tmp fix for this in the first commit in this PR) Closes: #1648 Signed-off-by: Derek Higgins <derekh@redhat.com>
2025-12-23 03:19:42 +00:00 · 2025-07-14 11:38:10 +01:00 · 2025-07-14 11:38:10 +01:00 · 7420c1db11
commit 7420c1db11
parent 3e7ea4dd14
2 changed files with 76 additions and 13 deletions
--- a/.github/actions/setup-vllm/action.yml
+++ b/.github/actions/setup-vllm/action.yml
@ -0,0 +1,27 @@
 name: Setup VLLM
 description: Start VLLM
 runs:
  using: "composite"
  steps:
    - name: Start VLLM
      shell: bash
      run: |
        # Start vllm container
        docker run -d \
          --name vllm \
          -p 8000:8000 \
          --privileged=true \
          quay.io/higginsd/vllm-cpu:65393ee064 \
          --host 0.0.0.0 \
          --port 8000 \
          --enable-auto-tool-choice \
          --tool-call-parser llama3_json \
          --model /root/.cache/Llama-3.2-1B-Instruct \
          --served-model-name meta-llama/Llama-3.2-1B-Instruct
          # Wait for vllm to be ready
          echo "Waiting for vllm to be ready..."
          timeout 900 bash -c 'until curl -f http://localhost:8000/health; do
            echo "Waiting for vllm..."
            sleep 5
          done'
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@ -14,13 +14,19 @@ on:
      - '.github/workflows/integration-tests.yml' # This workflow
      - '.github/actions/setup-ollama/action.yml'
  schedule:
-    - cron: '0 0 * * *'  # Daily at 12 AM UTC
+    # If changing the cron schedule, update the provider in the test-matrix job
    - cron: '0 0 * * *'  # (test latest client) Daily at 12 AM UTC
    - cron: '1 0 * * 0'  # (test vllm) Weekly on Sunday at 1 AM UTC
  workflow_dispatch:
    inputs:
      test-all-client-versions:
        description: 'Test against both the latest and published versions'
        type: boolean
        default: false
      test-provider:
        description: 'Test against a specific provider'
        type: string
        default: 'ollama'
 concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
@ -53,8 +59,17 @@ jobs:
      matrix:
        test-type: ${{ fromJson(needs.discover-tests.outputs.test-type) }}
        client-type: [library, server]
        # Use vllm on weekly schedule, otherwise use test-provider input (defaults to ollama)
        provider: ${{ (github.event.schedule == '1 0 * * 0') && fromJSON('["vllm"]') || fromJSON(format('["{0}"]', github.event.inputs.test-provider || 'ollama')) }}
        python-version: ["3.12", "3.13"]
-        client-version: ${{ (github.event_name == 'schedule' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
+        client-version: ${{ (github.event.schedule == '0 0 * * 0' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
        exclude: # TODO: look into why these tests are failing and fix them
          - provider: vllm
            test-type: safety
          - provider: vllm
            test-type: post_training
          - provider: vllm
            test-type: tool_runtime
    steps:
      - name: Checkout repository
@ -67,8 +82,13 @@ jobs:
          client-version: ${{ matrix.client-version }}
      - name: Setup ollama
        if: ${{ matrix.provider == 'ollama' }}
        uses: ./.github/actions/setup-ollama
      - name: Setup vllm
        if: ${{ matrix.provider == 'vllm' }}
        uses: ./.github/actions/setup-vllm
      - name: Build Llama Stack
        run: |
          uv run llama stack build --template ci-tests --image-type venv
@ -81,10 +101,6 @@ jobs:
      - name: Run Integration Tests
        env:
          OLLAMA_INFERENCE_MODEL: "llama3.2:3b-instruct-fp16" # for server tests
          ENABLE_OLLAMA: "ollama" # for server tests
          OLLAMA_URL: "http://0.0.0.0:11434"
          SAFETY_MODEL: "llama-guard3:1b"
          LLAMA_STACK_CLIENT_TIMEOUT: "300" # Increased timeout for eval operations
        # Use 'shell' to get pipefail behavior
        # https://docs.github.com/en/actions/reference/workflow-syntax-for-github-actions#exit-codes-and-error-action-preference
@ -96,12 +112,31 @@ jobs:
          else
            stack_config="server:ci-tests"
          fi
          EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag"
          if [ "${{ matrix.provider }}" == "ollama" ]; then
            export ENABLE_OLLAMA="ollama"
            export OLLAMA_URL="http://0.0.0.0:11434"
            export OLLAMA_INFERENCE_MODEL="llama3.2:3b-instruct-fp16"
            export TEXT_MODEL=ollama/$OLLAMA_INFERENCE_MODEL
            export SAFETY_MODEL="llama-guard3:1b"
            EXTRA_PARAMS="--safety-shield=$SAFETY_MODEL"
          else
            export ENABLE_VLLM="vllm"
            export VLLM_URL="http://localhost:8000/v1"
            export VLLM_INFERENCE_MODEL="meta-llama/Llama-3.2-1B-Instruct"
            export TEXT_MODEL=vllm/$VLLM_INFERENCE_MODEL
            # TODO: remove the not(test_inference_store_tool_calls) once we can get the tool called consistently
            EXTRA_PARAMS=
            EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls"
          fi
          uv run pytest -s -v tests/integration/${{ matrix.test-type }} --stack-config=${stack_config} \
-            -k "not(builtin_tool or safety_with_image or code_interpreter or test_rag)" \
+            -k "not( ${EXCLUDE_TESTS} )" \
-            --text-model="ollama/llama3.2:3b-instruct-fp16" \
+            --text-model=$TEXT_MODEL \
            --embedding-model=sentence-transformers/all-MiniLM-L6-v2 \
-            --safety-shield=$SAFETY_MODEL \
+            --color=yes ${EXTRA_PARAMS} \
            --color=yes \
            --capture=tee-sys | tee pytest-${{ matrix.test-type }}.log
      - name: Check Storage and Memory Available After Tests
@ -110,16 +145,17 @@ jobs:
          free -h
          df -h
-      - name: Write ollama logs to file
+      - name: Write inference logs to file
        if: ${{ always() }}
        run: |
-          sudo docker logs ollama > ollama.log
+          sudo docker logs ollama > ollama.log || true
          sudo docker logs vllm > vllm.log || true
      - name: Upload all logs to artifacts
        if: ${{ always() }}
        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
        with:
-          name: logs-${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.client-type }}-${{ matrix.test-type }}-${{ matrix.python-version }}-${{ matrix.client-version }}
+          name: logs-${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.provider }}-${{ matrix.client-type }}-${{ matrix.test-type }}-${{ matrix.python-version }}-${{ matrix.client-version }}
          path: |
            *.log
          retention-days: 1