From 7d9004f19923c3ba7734cc1f6c31308a8a8e29fc Mon Sep 17 00:00:00 2001
From: Derek Higgins <derekh@redhat.com>
Date: Wed, 24 Sep 2025 14:53:36 +0100
Subject: [PATCH 1/3] ci: test adjustments for Qwen3-0.6B

Signed-off-by: Derek Higgins <derekh@redhat.com>
---
 tests/integration/test_cases/inference/chat_completion.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_cases/inference/chat_completion.json b/tests/integration/test_cases/inference/chat_completion.json
index 99add7b66..03c0a8dc9 100644
--- a/tests/integration/test_cases/inference/chat_completion.json
+++ b/tests/integration/test_cases/inference/chat_completion.json
@@ -259,7 +259,7 @@
       "messages": [
         {
           "role": "system",
-          "content": "You are a helpful assistant. Michael Jordan was born in 1963. He played basketball for the Chicago Bulls for 15 seasons."
+          "content": "You are a helpful assistant. Michael Jordan was born in 1963. His first name is \"Michael\", He played basketball for the Chicago Bulls for 15 seasons and was drafted in 1984"
         },
         {
           "role": "user",

From 666d6a6fc05ffe14c9a9960155f98c3b199b5f08 Mon Sep 17 00:00:00 2001
From: Derek Higgins <derekh@redhat.com>
Date: Wed, 24 Sep 2025 17:29:51 +0100
Subject: [PATCH 2/3] ci: Switch vllm config to qwen3

It preforms better in tool calling and structured tests

Signed-off-by: Derek Higgins <derekh@redhat.com>
---
 .github/actions/setup-vllm/action.yml | 9 +++++----
 scripts/integration-tests.sh          | 5 -----
 tests/integration/suites.py           | 2 +-
 3 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/.github/actions/setup-vllm/action.yml b/.github/actions/setup-vllm/action.yml
index 17ebd42f2..34ced0998 100644
--- a/.github/actions/setup-vllm/action.yml
+++ b/.github/actions/setup-vllm/action.yml
@@ -11,13 +11,14 @@ runs:
           --name vllm \
           -p 8000:8000 \
           --privileged=true \
-          quay.io/higginsd/vllm-cpu:65393ee064 \
+          quay.io/higginsd/vllm-cpu:65393ee064-qwen3 \
           --host 0.0.0.0 \
           --port 8000 \
           --enable-auto-tool-choice \
-          --tool-call-parser llama3_json \
-          --model /root/.cache/Llama-3.2-1B-Instruct \
-          --served-model-name meta-llama/Llama-3.2-1B-Instruct
+          --tool-call-parser hermes \
+          --model /root/.cache/Qwen3-0.6B \
+          --served-model-name Qwen/Qwen3-0.6B \
+          --max-model-len 8192
 
           # Wait for vllm to be ready
           echo "Waiting for vllm to be ready..."
diff --git a/scripts/integration-tests.sh b/scripts/integration-tests.sh
index eee60951d..971eddf5a 100755
--- a/scripts/integration-tests.sh
+++ b/scripts/integration-tests.sh
@@ -212,11 +212,6 @@ fi
 echo "=== Running Integration Tests ==="
 EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag"
 
-# Additional exclusions for vllm setup
-if [[ "$TEST_SETUP" == "vllm" ]]; then
-    EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls"
-fi
-
 PYTEST_PATTERN="not( $EXCLUDE_TESTS )"
 if [[ -n "$TEST_PATTERN" ]]; then
     PYTEST_PATTERN="${PYTEST_PATTERN} and $TEST_PATTERN"
diff --git a/tests/integration/suites.py b/tests/integration/suites.py
index d8c283a0a..02d0a032a 100644
--- a/tests/integration/suites.py
+++ b/tests/integration/suites.py
@@ -78,7 +78,7 @@ SETUP_DEFINITIONS: dict[str, Setup] = {
             "VLLM_URL": "http://localhost:8000/v1",
         },
         defaults={
-            "text_model": "vllm/meta-llama/Llama-3.2-1B-Instruct",
+            "text_model": "vllm/Qwen/Qwen3-0.6B",
             "embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
         },
     ),

From 746e9c91a94e2a68706bfc10d7c0faf68975aa66 Mon Sep 17 00:00:00 2001
From: Derek Higgins <derekh@redhat.com>
Date: Wed, 13 Aug 2025 14:19:52 +0100
Subject: [PATCH 3/3] ci: integrate vLLM inference tests with GitHub Actions
 workflows

Add vLLM provider support to integration test CI workflows alongside
existing Ollama support. Configure provider-specific test execution
where vLLM runs only inference specific tests (excluding vision tests) while
Ollama continues to run the full test suite.

This enables comprehensive CI testing of both inference providers but
keeps the vLLM footprint small, this can be expanded later if it proves
to not be too disruptive.

Signed-off-by: Derek Higgins <derekh@redhat.com>
---
 .../actions/run-and-record-tests/action.yml   |  6 +++--
 .github/workflows/integration-tests.yml       | 23 +++++++++++++++----
 tests/integration/suites.py                   |  5 ++++
 3 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/.github/actions/run-and-record-tests/action.yml b/.github/actions/run-and-record-tests/action.yml
index a3eb31d9f..cec4adbb5 100644
--- a/.github/actions/run-and-record-tests/action.yml
+++ b/.github/actions/run-and-record-tests/action.yml
@@ -68,7 +68,8 @@ runs:
           echo "New recordings detected, committing and pushing"
           git add tests/integration/recordings/
 
-          git commit -m "Recordings update from CI (suite: ${{ inputs.suite }})"
+          git commit -m "Recordings update from CI (setup: ${{ inputs.setup }}, suite: ${{ inputs.suite }})"
+
           git fetch origin ${{ github.ref_name }}
           git rebase origin/${{ github.ref_name }}
           echo "Rebased successfully"
@@ -82,7 +83,8 @@ runs:
       if: ${{ always() }}
       shell: bash
       run: |
-        sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log || true
+        sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log 2>&1 || true
+        sudo docker logs vllm > vllm-${{ inputs.inference-mode }}.log 2>&1 || true
 
     - name: Upload logs
       if: ${{ always() }}
diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml
index 711eccd9e..fe23dea8e 100644
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@@ -21,7 +21,6 @@ on:
   schedule:
     # If changing the cron schedule, update the provider in the test-matrix job
     - cron: '0 0 * * *'  # (test latest client) Daily at 12 AM UTC
-    - cron: '1 0 * * 0'  # (test vllm) Weekly on Sunday at 1 AM UTC
   workflow_dispatch:
     inputs:
       test-all-client-versions:
@@ -48,24 +47,38 @@ jobs:
       fail-fast: false
       matrix:
         client-type: [library, server]
-        # Use vllm on weekly schedule, otherwise use test-setup input (defaults to ollama)
-        setup: ${{ (github.event.schedule == '1 0 * * 0') && fromJSON('["vllm"]') || fromJSON(format('["{0}"]', github.event.inputs.test-setup || 'ollama')) }}
         # Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
         python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
         client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
+        setup: [ollama, vllm]
         suite: [base, vision]
+        exclude:
+          - setup: vllm
+            suite: vision
+
 
     steps:
       - name: Checkout repository
         uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
 
+      # This could in theory be done in the matrix, but it was getting too complex
+      - name: Update Matrix
+        id: update-matrix
+        run: |
+          REWRITTEN_SUITE="${{ matrix.suite }}"
+          if [[ "${{ matrix.setup }}" == "vllm" && "${{ matrix.suite }}" == "base" ]]; then
+            REWRITTEN_SUITE="base-vllm-subset"
+          fi
+          echo "suite=${REWRITTEN_SUITE}" >> $GITHUB_OUTPUT
+          echo "Rewritten suite: ${REWRITTEN_SUITE}"
+
       - name: Setup test environment
         uses: ./.github/actions/setup-test-environment
         with:
           python-version: ${{ matrix.python-version }}
           client-version: ${{ matrix.client-version }}
           setup: ${{ matrix.setup }}
-          suite: ${{ matrix.suite }}
+          suite: ${{ steps.update-matrix.outputs.suite }}
           inference-mode: 'replay'
 
       - name: Run tests
@@ -74,4 +87,4 @@ jobs:
           stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
           setup: ${{ matrix.setup }}
           inference-mode: 'replay'
-          suite: ${{ matrix.suite }}
+          suite: ${{ steps.update-matrix.outputs.suite }}
diff --git a/tests/integration/suites.py b/tests/integration/suites.py
index 02d0a032a..081c03851 100644
--- a/tests/integration/suites.py
+++ b/tests/integration/suites.py
@@ -147,6 +147,11 @@ SUITE_DEFINITIONS: dict[str, Suite] = {
         roots=base_roots,
         default_setup="ollama",
     ),
+    "base-vllm-subset": Suite(
+        name="base-vllm-subset",
+        roots=["tests/integration/inference"],
+        default_setup="vllm",
+    ),
     "responses": Suite(
         name="responses",
         roots=["tests/integration/responses"],