From 89517655848ccef022b5c136fb61b80854bb70ea Mon Sep 17 00:00:00 2001 From: Derek Higgins Date: Thu, 11 Sep 2025 16:17:22 +0100 Subject: [PATCH] test: improve test reliability and model compatibility - Update earth question to be more specific with multiple choice format to prevent Llama-3.2-1B-Instruct from rambling about other planets - Skip test_text_chat_completion_structured_output as it sometimes times out during CI execution again with Llama-3.2-1B-Instruct on vllm Signed-off-by: Derek Higgins --- scripts/integration-tests.sh | 2 +- tests/integration/responses/fixtures/test_cases.py | 4 ++-- tests/integration/test_cases/inference/chat_completion.json | 2 +- tests/integration/test_cases/openai/responses.json | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/integration-tests.sh b/scripts/integration-tests.sh index eee60951d..38a5531d9 100755 --- a/scripts/integration-tests.sh +++ b/scripts/integration-tests.sh @@ -214,7 +214,7 @@ EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag # Additional exclusions for vllm setup if [[ "$TEST_SETUP" == "vllm" ]]; then - EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls" + EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls or test_text_chat_completion_structured_output" fi PYTEST_PATTERN="not( $EXCLUDE_TESTS )" diff --git a/tests/integration/responses/fixtures/test_cases.py b/tests/integration/responses/fixtures/test_cases.py index bdd1a5d81..06129a675 100644 --- a/tests/integration/responses/fixtures/test_cases.py +++ b/tests/integration/responses/fixtures/test_cases.py @@ -29,7 +29,7 @@ class ResponsesTestCase(BaseModel): basic_test_cases = [ pytest.param( ResponsesTestCase( - input="Which planet do humans live on?", + input="Humans live on which planet: Mars, Venus, or Earth?", expected="earth", ), id="earth", @@ -76,7 +76,7 @@ multi_turn_test_cases = [ input="", # Not used for multi-turn expected="", # Not used for multi-turn turns=[ - ("Which planet do humans live on?", "earth"), + ("Humans live on which planet: Mars, Venus, or Earth?", "earth"), ("What is the name of the planet from your previous response?", "earth"), ], ), diff --git a/tests/integration/test_cases/inference/chat_completion.json b/tests/integration/test_cases/inference/chat_completion.json index 203fc51a5..dcf83630c 100644 --- a/tests/integration/test_cases/inference/chat_completion.json +++ b/tests/integration/test_cases/inference/chat_completion.json @@ -1,7 +1,7 @@ { "non_streaming_01": { "data": { - "question": "Which planet do humans live on?", + "question": "Humans live on which planet: Mars, Venus, or Earth?", "expected": "Earth" } }, diff --git a/tests/integration/test_cases/openai/responses.json b/tests/integration/test_cases/openai/responses.json index d17d0cd4f..75b9f51be 100644 --- a/tests/integration/test_cases/openai/responses.json +++ b/tests/integration/test_cases/openai/responses.json @@ -1,7 +1,7 @@ { "non_streaming_01": { "data": { - "question": "Which planet do humans live on?", + "question": "Humans live on which planet: Mars, Venus, or Earth?", "expected": "Earth" } },