test: improve test reliability and model compatibility

- Update earth question to be more specific with multiple choice format to prevent Llama-3.2-1B-Instruct from rambling about other planets - Skip test_text_chat_completion_structured_output as it sometimes times out during CI execution again with Llama-3.2-1B-Instruct on vllm Signed-off-by: Derek Higgins <derekh@redhat.com>
2025-10-04 04:04:14 +00:00 · 2025-09-11 16:17:22 +01:00 · 2025-09-11 16:17:22 +01:00 · 8951765584
commit 8951765584
parent 2f58d87c22
4 changed files with 5 additions and 5 deletions
--- a/tests/integration/responses/fixtures/test_cases.py
+++ b/tests/integration/responses/fixtures/test_cases.py
@ -29,7 +29,7 @@ class ResponsesTestCase(BaseModel):
 basic_test_cases = [
    pytest.param(
        ResponsesTestCase(
-            input="Which planet do humans live on?",
+            input="Humans live on which planet: Mars, Venus, or Earth?",
            expected="earth",
        ),
        id="earth",
@ -76,7 +76,7 @@ multi_turn_test_cases = [
            input="",  # Not used for multi-turn
            expected="",  # Not used for multi-turn
            turns=[
-                ("Which planet do humans live on?", "earth"),
+                ("Humans live on which planet: Mars, Venus, or Earth?", "earth"),
                ("What is the name of the planet from your previous response?", "earth"),
            ],
        ),