chore: default to pytest asyncio-mode=auto (#2730)

# What does this PR do? previously, developers who ran `./scripts/unit-tests.sh` would get `asyncio-mode=auto`, which meant `@pytest.mark.asyncio` and `@pytest_asyncio.fixture` were redundent. developers who ran `pytest` directly would get pytest's default (strict mode), would run into errors leading them to add `@pytest.mark.asyncio` / `@pytest_asyncio.fixture` to their code. with this change - - `asyncio_mode=auto` is included in `pyproject.toml` making behavior consistent for all invocations of pytest - removes all redundant `@pytest_asyncio.fixture` and `@pytest.mark.asyncio` - for good measure, requires `pytest>=8.4` and `pytest-asyncio>=1.0` ## Test Plan - `./scripts/unit-tests.sh` - `uv run pytest tests/unit`
2025-07-15 01:26:10 +00:00 · 2025-07-11 16:00:24 -04:00 · 2025-07-11 16:00:24 -04:00 · 30b2e6a495
commit 30b2e6a495
parent 2ebc172f33
35 changed files with 29 additions and 239 deletions
--- a/tests/unit/providers/inference/test_remote_vllm.py
+++ b/tests/unit/providers/inference/test_remote_vllm.py
@ -14,7 +14,6 @@ from typing import Any
 from unittest.mock import AsyncMock, MagicMock, patch

 import pytest
-import pytest_asyncio
 from openai.types.chat.chat_completion_chunk import (
    ChatCompletionChunk as OpenAIChatCompletionChunk,
 )
@ -103,7 +102,7 @@ def mock_openai_models_list():
        yield mock_list


-@pytest_asyncio.fixture(scope="module")
+@pytest.fixture(scope="module")
 async def vllm_inference_adapter():
    config = VLLMInferenceAdapterConfig(url="http://mocked.localhost:12345")
    inference_adapter = VLLMInferenceAdapter(config)
@ -112,7 +111,6 @@ async def vllm_inference_adapter():
    return inference_adapter


-@pytest.mark.asyncio
 async def test_register_model_checks_vllm(mock_openai_models_list, vllm_inference_adapter):
    async def mock_openai_models():
        yield OpenAIModel(id="foo", created=1, object="model", owned_by="test")
@ -125,7 +123,6 @@ async def test_register_model_checks_vllm(mock_openai_models_list, vllm_inferenc
    mock_openai_models_list.assert_called()


-@pytest.mark.asyncio
 async def test_old_vllm_tool_choice(vllm_inference_adapter):
    """
    Test that we set tool_choice to none when no tools are in use
@ -149,7 +146,6 @@ async def test_old_vllm_tool_choice(vllm_inference_adapter):
        assert request.tool_config.tool_choice == ToolChoice.none


-@pytest.mark.asyncio
 async def test_tool_call_response(vllm_inference_adapter):
    """Verify that tool call arguments from a CompletionMessage are correctly converted
    into the expected JSON format."""
@ -192,7 +188,6 @@ async def test_tool_call_response(vllm_inference_adapter):
        ]


-@pytest.mark.asyncio
 async def test_tool_call_delta_empty_tool_call_buf():
    """
    Test that we don't generate extra chunks when processing a
@ -222,7 +217,6 @@ async def test_tool_call_delta_empty_tool_call_buf():
    assert chunks[1].event.stop_reason == StopReason.end_of_turn


-@pytest.mark.asyncio
 async def test_tool_call_delta_streaming_arguments_dict():
    async def mock_stream():
        mock_chunk_1 = OpenAIChatCompletionChunk(
@ -297,7 +291,6 @@ async def test_tool_call_delta_streaming_arguments_dict():
    assert chunks[2].event.event_type.value == "complete"


-@pytest.mark.asyncio
 async def test_multiple_tool_calls():
    async def mock_stream():
        mock_chunk_1 = OpenAIChatCompletionChunk(
@ -376,7 +369,6 @@ async def test_multiple_tool_calls():
    assert chunks[3].event.event_type.value == "complete"


-@pytest.mark.asyncio
 async def test_process_vllm_chat_completion_stream_response_no_choices():
    """
    Test that we don't error out when vLLM returns no choices for a
@ -453,7 +445,6 @@ def test_chat_completion_doesnt_block_event_loop(caplog):
    assert not asyncio_warnings


-@pytest.mark.asyncio
 async def test_get_params_empty_tools(vllm_inference_adapter):
    request = ChatCompletionRequest(
        tools=[],
@ -464,7 +455,6 @@ async def test_get_params_empty_tools(vllm_inference_adapter):
    assert "tools" not in params


-@pytest.mark.asyncio
 async def test_process_vllm_chat_completion_stream_response_tool_call_args_last_chunk():
    """
    Tests the edge case where the model returns the arguments for the tool call in the same chunk that
@ -543,7 +533,6 @@ async def test_process_vllm_chat_completion_stream_response_tool_call_args_last_
    assert chunks[-2].event.delta.tool_call.arguments == mock_tool_arguments


-@pytest.mark.asyncio
 async def test_process_vllm_chat_completion_stream_response_no_finish_reason():
    """
    Tests the edge case where the model requests a tool call and stays idle without explicitly providing the
@ -596,7 +585,6 @@ async def test_process_vllm_chat_completion_stream_response_no_finish_reason():
    assert chunks[-2].event.delta.tool_call.arguments == mock_tool_arguments


-@pytest.mark.asyncio
 async def test_process_vllm_chat_completion_stream_response_tool_without_args():
    """
    Tests the edge case where no arguments are provided for the tool call.
@ -645,7 +633,6 @@ async def test_process_vllm_chat_completion_stream_response_tool_without_args():
    assert chunks[-2].event.delta.tool_call.arguments == {}


-@pytest.mark.asyncio
 async def test_health_status_success(vllm_inference_adapter):
    """
    Test the health method of VLLM InferenceAdapter when the connection is successful.
@ -679,7 +666,6 @@ async def test_health_status_success(vllm_inference_adapter):
        mock_models.list.assert_called_once()


-@pytest.mark.asyncio
 async def test_health_status_failure(vllm_inference_adapter):
    """
    Test the health method of VLLM InferenceAdapter when the connection fails.