diff --git a/.github/actions/setup-vllm/action.yml b/.github/actions/setup-vllm/action.yml index 17ebd42f2..34ced0998 100644 --- a/.github/actions/setup-vllm/action.yml +++ b/.github/actions/setup-vllm/action.yml @@ -11,13 +11,14 @@ runs: --name vllm \ -p 8000:8000 \ --privileged=true \ - quay.io/higginsd/vllm-cpu:65393ee064 \ + quay.io/higginsd/vllm-cpu:65393ee064-qwen3 \ --host 0.0.0.0 \ --port 8000 \ --enable-auto-tool-choice \ - --tool-call-parser llama3_json \ - --model /root/.cache/Llama-3.2-1B-Instruct \ - --served-model-name meta-llama/Llama-3.2-1B-Instruct + --tool-call-parser hermes \ + --model /root/.cache/Qwen3-0.6B \ + --served-model-name Qwen/Qwen3-0.6B \ + --max-model-len 8192 # Wait for vllm to be ready echo "Waiting for vllm to be ready..." diff --git a/scripts/integration-tests.sh b/scripts/integration-tests.sh index eee60951d..971eddf5a 100755 --- a/scripts/integration-tests.sh +++ b/scripts/integration-tests.sh @@ -212,11 +212,6 @@ fi echo "=== Running Integration Tests ===" EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag" -# Additional exclusions for vllm setup -if [[ "$TEST_SETUP" == "vllm" ]]; then - EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls" -fi - PYTEST_PATTERN="not( $EXCLUDE_TESTS )" if [[ -n "$TEST_PATTERN" ]]; then PYTEST_PATTERN="${PYTEST_PATTERN} and $TEST_PATTERN" diff --git a/tests/integration/suites.py b/tests/integration/suites.py index d8c283a0a..02d0a032a 100644 --- a/tests/integration/suites.py +++ b/tests/integration/suites.py @@ -78,7 +78,7 @@ SETUP_DEFINITIONS: dict[str, Setup] = { "VLLM_URL": "http://localhost:8000/v1", }, defaults={ - "text_model": "vllm/meta-llama/Llama-3.2-1B-Instruct", + "text_model": "vllm/Qwen/Qwen3-0.6B", "embedding_model": "sentence-transformers/all-MiniLM-L6-v2", }, ),