ci: Switch vllm config to qwen3

It preforms better in tool calling and structured tests Signed-off-by: Derek Higgins <derekh@redhat.com>
2025-10-04 04:04:14 +00:00 · 2025-09-24 17:29:51 +01:00 · 2025-09-24 17:29:51 +01:00 · 666d6a6fc0
commit 666d6a6fc0
parent 7d9004f199
3 changed files with 6 additions and 10 deletions
--- a/.github/actions/setup-vllm/action.yml
+++ b/.github/actions/setup-vllm/action.yml
@ -11,13 +11,14 @@ runs:
          --name vllm \
          -p 8000:8000 \
          --privileged=true \
-          quay.io/higginsd/vllm-cpu:65393ee064 \
+          quay.io/higginsd/vllm-cpu:65393ee064-qwen3 \
          --host 0.0.0.0 \
          --port 8000 \
          --enable-auto-tool-choice \
-          --tool-call-parser llama3_json \
+          --tool-call-parser hermes \
-          --model /root/.cache/Llama-3.2-1B-Instruct \
+          --model /root/.cache/Qwen3-0.6B \
-          --served-model-name meta-llama/Llama-3.2-1B-Instruct
+          --served-model-name Qwen/Qwen3-0.6B \
          --max-model-len 8192
          # Wait for vllm to be ready
          echo "Waiting for vllm to be ready..."
--- a/scripts/integration-tests.sh
+++ b/scripts/integration-tests.sh
@ -212,11 +212,6 @@ fi
 echo "=== Running Integration Tests ==="
 EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag"
 # Additional exclusions for vllm setup
 if [[ "$TEST_SETUP" == "vllm" ]]; then
    EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls"
 fi
 PYTEST_PATTERN="not( $EXCLUDE_TESTS )"
 if [[ -n "$TEST_PATTERN" ]]; then
    PYTEST_PATTERN="${PYTEST_PATTERN} and $TEST_PATTERN"
--- a/tests/integration/suites.py
+++ b/tests/integration/suites.py
@ -78,7 +78,7 @@ SETUP_DEFINITIONS: dict[str, Setup] = {
            "VLLM_URL": "http://localhost:8000/v1",
        },
        defaults={
-            "text_model": "vllm/meta-llama/Llama-3.2-1B-Instruct",
+            "text_model": "vllm/Qwen/Qwen3-0.6B",
            "embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
        },
    ),