diff --git a/.github/actions/run-and-record-tests/action.yml b/.github/actions/run-and-record-tests/action.yml
index a3eb31d9f..cec4adbb5 100644
--- a/.github/actions/run-and-record-tests/action.yml
+++ b/.github/actions/run-and-record-tests/action.yml
@@ -68,7 +68,8 @@ runs:
           echo "New recordings detected, committing and pushing"
           git add tests/integration/recordings/
 
-          git commit -m "Recordings update from CI (suite: ${{ inputs.suite }})"
+          git commit -m "Recordings update from CI (setup: ${{ inputs.setup }}, suite: ${{ inputs.suite }})"
+
           git fetch origin ${{ github.ref_name }}
           git rebase origin/${{ github.ref_name }}
           echo "Rebased successfully"
@@ -82,7 +83,8 @@ runs:
       if: ${{ always() }}
       shell: bash
       run: |
-        sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log || true
+        sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log 2>&1 || true
+        sudo docker logs vllm > vllm-${{ inputs.inference-mode }}.log 2>&1 || true
 
     - name: Upload logs
       if: ${{ always() }}
diff --git a/.github/actions/setup-vllm/action.yml b/.github/actions/setup-vllm/action.yml
index 17ebd42f2..34ced0998 100644
--- a/.github/actions/setup-vllm/action.yml
+++ b/.github/actions/setup-vllm/action.yml
@@ -11,13 +11,14 @@ runs:
           --name vllm \
           -p 8000:8000 \
           --privileged=true \
-          quay.io/higginsd/vllm-cpu:65393ee064 \
+          quay.io/higginsd/vllm-cpu:65393ee064-qwen3 \
           --host 0.0.0.0 \
           --port 8000 \
           --enable-auto-tool-choice \
-          --tool-call-parser llama3_json \
-          --model /root/.cache/Llama-3.2-1B-Instruct \
-          --served-model-name meta-llama/Llama-3.2-1B-Instruct
+          --tool-call-parser hermes \
+          --model /root/.cache/Qwen3-0.6B \
+          --served-model-name Qwen/Qwen3-0.6B \
+          --max-model-len 8192
 
           # Wait for vllm to be ready
           echo "Waiting for vllm to be ready..."
diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml
index 711eccd9e..fe23dea8e 100644
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@@ -21,7 +21,6 @@ on:
   schedule:
     # If changing the cron schedule, update the provider in the test-matrix job
     - cron: '0 0 * * *'  # (test latest client) Daily at 12 AM UTC
-    - cron: '1 0 * * 0'  # (test vllm) Weekly on Sunday at 1 AM UTC
   workflow_dispatch:
     inputs:
       test-all-client-versions:
@@ -48,24 +47,38 @@ jobs:
       fail-fast: false
       matrix:
         client-type: [library, server]
-        # Use vllm on weekly schedule, otherwise use test-setup input (defaults to ollama)
-        setup: ${{ (github.event.schedule == '1 0 * * 0') && fromJSON('["vllm"]') || fromJSON(format('["{0}"]', github.event.inputs.test-setup || 'ollama')) }}
         # Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
         python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
         client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
+        setup: [ollama, vllm]
         suite: [base, vision]
+        exclude:
+          - setup: vllm
+            suite: vision
+
 
     steps:
       - name: Checkout repository
         uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
 
+      # This could in theory be done in the matrix, but it was getting too complex
+      - name: Update Matrix
+        id: update-matrix
+        run: |
+          REWRITTEN_SUITE="${{ matrix.suite }}"
+          if [[ "${{ matrix.setup }}" == "vllm" && "${{ matrix.suite }}" == "base" ]]; then
+            REWRITTEN_SUITE="base-vllm-subset"
+          fi
+          echo "suite=${REWRITTEN_SUITE}" >> $GITHUB_OUTPUT
+          echo "Rewritten suite: ${REWRITTEN_SUITE}"
+
       - name: Setup test environment
         uses: ./.github/actions/setup-test-environment
         with:
           python-version: ${{ matrix.python-version }}
           client-version: ${{ matrix.client-version }}
           setup: ${{ matrix.setup }}
-          suite: ${{ matrix.suite }}
+          suite: ${{ steps.update-matrix.outputs.suite }}
           inference-mode: 'replay'
 
       - name: Run tests
@@ -74,4 +87,4 @@ jobs:
           stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
           setup: ${{ matrix.setup }}
           inference-mode: 'replay'
-          suite: ${{ matrix.suite }}
+          suite: ${{ steps.update-matrix.outputs.suite }}
diff --git a/scripts/integration-tests.sh b/scripts/integration-tests.sh
index eee60951d..971eddf5a 100755
--- a/scripts/integration-tests.sh
+++ b/scripts/integration-tests.sh
@@ -212,11 +212,6 @@ fi
 echo "=== Running Integration Tests ==="
 EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag"
 
-# Additional exclusions for vllm setup
-if [[ "$TEST_SETUP" == "vllm" ]]; then
-    EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls"
-fi
-
 PYTEST_PATTERN="not( $EXCLUDE_TESTS )"
 if [[ -n "$TEST_PATTERN" ]]; then
     PYTEST_PATTERN="${PYTEST_PATTERN} and $TEST_PATTERN"
diff --git a/tests/integration/suites.py b/tests/integration/suites.py
index d8c283a0a..081c03851 100644
--- a/tests/integration/suites.py
+++ b/tests/integration/suites.py
@@ -78,7 +78,7 @@ SETUP_DEFINITIONS: dict[str, Setup] = {
             "VLLM_URL": "http://localhost:8000/v1",
         },
         defaults={
-            "text_model": "vllm/meta-llama/Llama-3.2-1B-Instruct",
+            "text_model": "vllm/Qwen/Qwen3-0.6B",
             "embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
         },
     ),
@@ -147,6 +147,11 @@ SUITE_DEFINITIONS: dict[str, Suite] = {
         roots=base_roots,
         default_setup="ollama",
     ),
+    "base-vllm-subset": Suite(
+        name="base-vllm-subset",
+        roots=["tests/integration/inference"],
+        default_setup="vllm",
+    ),
     "responses": Suite(
         name="responses",
         roots=["tests/integration/responses"],
diff --git a/tests/integration/test_cases/inference/chat_completion.json b/tests/integration/test_cases/inference/chat_completion.json
index 99add7b66..03c0a8dc9 100644
--- a/tests/integration/test_cases/inference/chat_completion.json
+++ b/tests/integration/test_cases/inference/chat_completion.json
@@ -259,7 +259,7 @@
       "messages": [
         {
           "role": "system",
-          "content": "You are a helpful assistant. Michael Jordan was born in 1963. He played basketball for the Chicago Bulls for 15 seasons."
+          "content": "You are a helpful assistant. Michael Jordan was born in 1963. His first name is \"Michael\", He played basketball for the Chicago Bulls for 15 seasons and was drafted in 1984"
         },
         {
           "role": "user",