test: improve test reliability and model compatibility

- Update earth question to be more specific with multiple choice format to prevent Llama-3.2-1B-Instruct from rambling about other planets - Skip test_text_chat_completion_structured_output as it sometimes times out during CI execution again with Llama-3.2-1B-Instruct on vllm Signed-off-by: Derek Higgins <derekh@redhat.com>
2025-12-08 03:00:56 +00:00 · 2025-09-11 16:17:22 +01:00 · 2025-09-11 16:17:22 +01:00 · 8951765584
commit 8951765584
parent 2f58d87c22
4 changed files with 5 additions and 5 deletions
--- a/tests/integration/responses/fixtures/test_cases.py
+++ b/tests/integration/responses/fixtures/test_cases.py
@ -29,7 +29,7 @@ class ResponsesTestCase(BaseModel):
 basic_test_cases = [
    pytest.param(
        ResponsesTestCase(
-            input="Which planet do humans live on?",
+            input="Humans live on which planet: Mars, Venus, or Earth?",
            expected="earth",
        ),
        id="earth",
@ -76,7 +76,7 @@ multi_turn_test_cases = [
            input="",  # Not used for multi-turn
            expected="",  # Not used for multi-turn
            turns=[
-                ("Which planet do humans live on?", "earth"),
+                ("Humans live on which planet: Mars, Venus, or Earth?", "earth"),
                ("What is the name of the planet from your previous response?", "earth"),
            ],
        ),
--- a/tests/integration/test_cases/inference/chat_completion.json
+++ b/tests/integration/test_cases/inference/chat_completion.json
@ -1,7 +1,7 @@
 {
  "non_streaming_01": {
    "data": {
-      "question": "Which planet do humans live on?",
+      "question": "Humans live on which planet: Mars, Venus, or Earth?",
      "expected": "Earth"
    }
  },
--- a/tests/integration/test_cases/openai/responses.json
+++ b/tests/integration/test_cases/openai/responses.json
@ -1,7 +1,7 @@
 {
  "non_streaming_01": {
    "data": {
-      "question": "Which planet do humans live on?",
+      "question": "Humans live on which planet: Mars, Venus, or Earth?",
      "expected": "Earth"
    }
  },