From 89517655848ccef022b5c136fb61b80854bb70ea Mon Sep 17 00:00:00 2001
From: Derek Higgins <derekh@redhat.com>
Date: Thu, 11 Sep 2025 16:17:22 +0100
Subject: [PATCH] test: improve test reliability and model compatibility

- Update earth question to be more specific with multiple choice format
  to prevent Llama-3.2-1B-Instruct from rambling about other planets
- Skip test_text_chat_completion_structured_output as it sometimes
  times out during CI execution again with Llama-3.2-1B-Instruct on vllm

Signed-off-by: Derek Higgins <derekh@redhat.com>
---
 scripts/integration-tests.sh                                | 2 +-
 tests/integration/responses/fixtures/test_cases.py          | 4 ++--
 tests/integration/test_cases/inference/chat_completion.json | 2 +-
 tests/integration/test_cases/openai/responses.json          | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/scripts/integration-tests.sh b/scripts/integration-tests.sh
index eee60951d..38a5531d9 100755
--- a/scripts/integration-tests.sh
+++ b/scripts/integration-tests.sh
@@ -214,7 +214,7 @@ EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag
 
 # Additional exclusions for vllm setup
 if [[ "$TEST_SETUP" == "vllm" ]]; then
-    EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls"
+    EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls or test_text_chat_completion_structured_output"
 fi
 
 PYTEST_PATTERN="not( $EXCLUDE_TESTS )"
diff --git a/tests/integration/responses/fixtures/test_cases.py b/tests/integration/responses/fixtures/test_cases.py
index bdd1a5d81..06129a675 100644
--- a/tests/integration/responses/fixtures/test_cases.py
+++ b/tests/integration/responses/fixtures/test_cases.py
@@ -29,7 +29,7 @@ class ResponsesTestCase(BaseModel):
 basic_test_cases = [
     pytest.param(
         ResponsesTestCase(
-            input="Which planet do humans live on?",
+            input="Humans live on which planet: Mars, Venus, or Earth?",
             expected="earth",
         ),
         id="earth",
@@ -76,7 +76,7 @@ multi_turn_test_cases = [
             input="",  # Not used for multi-turn
             expected="",  # Not used for multi-turn
             turns=[
-                ("Which planet do humans live on?", "earth"),
+                ("Humans live on which planet: Mars, Venus, or Earth?", "earth"),
                 ("What is the name of the planet from your previous response?", "earth"),
             ],
         ),
diff --git a/tests/integration/test_cases/inference/chat_completion.json b/tests/integration/test_cases/inference/chat_completion.json
index 203fc51a5..dcf83630c 100644
--- a/tests/integration/test_cases/inference/chat_completion.json
+++ b/tests/integration/test_cases/inference/chat_completion.json
@@ -1,7 +1,7 @@
 {
   "non_streaming_01": {
     "data": {
-      "question": "Which planet do humans live on?",
+      "question": "Humans live on which planet: Mars, Venus, or Earth?",
       "expected": "Earth"
     }
   },
diff --git a/tests/integration/test_cases/openai/responses.json b/tests/integration/test_cases/openai/responses.json
index d17d0cd4f..75b9f51be 100644
--- a/tests/integration/test_cases/openai/responses.json
+++ b/tests/integration/test_cases/openai/responses.json
@@ -1,7 +1,7 @@
 {
   "non_streaming_01": {
     "data": {
-      "question": "Which planet do humans live on?",
+      "question": "Humans live on which planet: Mars, Venus, or Earth?",
       "expected": "Earth"
     }
   },