chore: default to pytest asyncio-mode=auto (#2730)

# What does this PR do?

previously, developers who ran `./scripts/unit-tests.sh` would get
`asyncio-mode=auto`, which meant `@pytest.mark.asyncio` and
`@pytest_asyncio.fixture` were redundent. developers who ran `pytest`
directly would get pytest's default (strict mode), would run into errors
leading them to add `@pytest.mark.asyncio` / `@pytest_asyncio.fixture`
to their code.

with this change -
- `asyncio_mode=auto` is included in `pyproject.toml` making behavior
consistent for all invocations of pytest
- removes all redundant `@pytest_asyncio.fixture` and
`@pytest.mark.asyncio`
 - for good measure, requires `pytest>=8.4` and `pytest-asyncio>=1.0`

## Test Plan

- `./scripts/unit-tests.sh`
- `uv run pytest tests/unit`
This commit is contained in:
Matthew Farrellee 2025-07-11 16:00:24 -04:00 committed by GitHub
parent 2ebc172f33
commit 30b2e6a495
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
35 changed files with 29 additions and 239 deletions

View file

@ -14,7 +14,6 @@ from typing import Any
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
import pytest_asyncio
from openai.types.chat.chat_completion_chunk import (
ChatCompletionChunk as OpenAIChatCompletionChunk,
)
@ -103,7 +102,7 @@ def mock_openai_models_list():
yield mock_list
@pytest_asyncio.fixture(scope="module")
@pytest.fixture(scope="module")
async def vllm_inference_adapter():
config = VLLMInferenceAdapterConfig(url="http://mocked.localhost:12345")
inference_adapter = VLLMInferenceAdapter(config)
@ -112,7 +111,6 @@ async def vllm_inference_adapter():
return inference_adapter
@pytest.mark.asyncio
async def test_register_model_checks_vllm(mock_openai_models_list, vllm_inference_adapter):
async def mock_openai_models():
yield OpenAIModel(id="foo", created=1, object="model", owned_by="test")
@ -125,7 +123,6 @@ async def test_register_model_checks_vllm(mock_openai_models_list, vllm_inferenc
mock_openai_models_list.assert_called()
@pytest.mark.asyncio
async def test_old_vllm_tool_choice(vllm_inference_adapter):
"""
Test that we set tool_choice to none when no tools are in use
@ -149,7 +146,6 @@ async def test_old_vllm_tool_choice(vllm_inference_adapter):
assert request.tool_config.tool_choice == ToolChoice.none
@pytest.mark.asyncio
async def test_tool_call_response(vllm_inference_adapter):
"""Verify that tool call arguments from a CompletionMessage are correctly converted
into the expected JSON format."""
@ -192,7 +188,6 @@ async def test_tool_call_response(vllm_inference_adapter):
]
@pytest.mark.asyncio
async def test_tool_call_delta_empty_tool_call_buf():
"""
Test that we don't generate extra chunks when processing a
@ -222,7 +217,6 @@ async def test_tool_call_delta_empty_tool_call_buf():
assert chunks[1].event.stop_reason == StopReason.end_of_turn
@pytest.mark.asyncio
async def test_tool_call_delta_streaming_arguments_dict():
async def mock_stream():
mock_chunk_1 = OpenAIChatCompletionChunk(
@ -297,7 +291,6 @@ async def test_tool_call_delta_streaming_arguments_dict():
assert chunks[2].event.event_type.value == "complete"
@pytest.mark.asyncio
async def test_multiple_tool_calls():
async def mock_stream():
mock_chunk_1 = OpenAIChatCompletionChunk(
@ -376,7 +369,6 @@ async def test_multiple_tool_calls():
assert chunks[3].event.event_type.value == "complete"
@pytest.mark.asyncio
async def test_process_vllm_chat_completion_stream_response_no_choices():
"""
Test that we don't error out when vLLM returns no choices for a
@ -453,7 +445,6 @@ def test_chat_completion_doesnt_block_event_loop(caplog):
assert not asyncio_warnings
@pytest.mark.asyncio
async def test_get_params_empty_tools(vllm_inference_adapter):
request = ChatCompletionRequest(
tools=[],
@ -464,7 +455,6 @@ async def test_get_params_empty_tools(vllm_inference_adapter):
assert "tools" not in params
@pytest.mark.asyncio
async def test_process_vllm_chat_completion_stream_response_tool_call_args_last_chunk():
"""
Tests the edge case where the model returns the arguments for the tool call in the same chunk that
@ -543,7 +533,6 @@ async def test_process_vllm_chat_completion_stream_response_tool_call_args_last_
assert chunks[-2].event.delta.tool_call.arguments == mock_tool_arguments
@pytest.mark.asyncio
async def test_process_vllm_chat_completion_stream_response_no_finish_reason():
"""
Tests the edge case where the model requests a tool call and stays idle without explicitly providing the
@ -596,7 +585,6 @@ async def test_process_vllm_chat_completion_stream_response_no_finish_reason():
assert chunks[-2].event.delta.tool_call.arguments == mock_tool_arguments
@pytest.mark.asyncio
async def test_process_vllm_chat_completion_stream_response_tool_without_args():
"""
Tests the edge case where no arguments are provided for the tool call.
@ -645,7 +633,6 @@ async def test_process_vllm_chat_completion_stream_response_tool_without_args():
assert chunks[-2].event.delta.tool_call.arguments == {}
@pytest.mark.asyncio
async def test_health_status_success(vllm_inference_adapter):
"""
Test the health method of VLLM InferenceAdapter when the connection is successful.
@ -679,7 +666,6 @@ async def test_health_status_success(vllm_inference_adapter):
mock_models.list.assert_called_once()
@pytest.mark.asyncio
async def test_health_status_failure(vllm_inference_adapter):
"""
Test the health method of VLLM InferenceAdapter when the connection fails.