mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-07 02:58:21 +00:00
test: use a more robust mechanism for detect loop blocking
This commit is contained in:
parent
0bdfc71f8d
commit
8fa2fb4f33
1 changed files with 38 additions and 17 deletions
|
@ -5,6 +5,7 @@
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import contextlib
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import threading
|
import threading
|
||||||
|
@ -182,13 +183,41 @@ async def test_process_vllm_chat_completion_stream_response_no_choices():
|
||||||
assert len(chunks) == 0
|
assert len(chunks) == 0
|
||||||
|
|
||||||
|
|
||||||
def test_chat_completion_doesnt_block_event_loop(caplog):
|
@contextlib.asynccontextmanager
|
||||||
loop = asyncio.new_event_loop()
|
async def detect_blocking(threshold=0.1):
|
||||||
loop.set_debug(True)
|
"""Context manager to detect blocking in an event loop."""
|
||||||
caplog.set_level(logging.WARNING)
|
block_detected = False
|
||||||
|
watchdog_active = True
|
||||||
|
|
||||||
# Log when event loop is blocked for more than 200ms
|
last_check = time.monotonic()
|
||||||
loop.slow_callback_duration = 0.2
|
|
||||||
|
async def watchdog():
|
||||||
|
nonlocal block_detected, last_check
|
||||||
|
while watchdog_active:
|
||||||
|
now = time.monotonic()
|
||||||
|
# If our check is significantly delayed, we might be blocked
|
||||||
|
if now - last_check > threshold:
|
||||||
|
block_detected = True
|
||||||
|
last_check = now
|
||||||
|
await asyncio.sleep(threshold / 3)
|
||||||
|
|
||||||
|
watchdog_task = asyncio.create_task(watchdog())
|
||||||
|
|
||||||
|
try:
|
||||||
|
yield
|
||||||
|
finally:
|
||||||
|
watchdog_active = False
|
||||||
|
watchdog_task.cancel()
|
||||||
|
try:
|
||||||
|
await watchdog_task
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
assert not block_detected, f"Event loop was blocked for more than {threshold}s"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_chat_completion_doesnt_block_event_loop():
|
||||||
# Sleep for 500ms in our delayed http response
|
# Sleep for 500ms in our delayed http response
|
||||||
sleep_time = 0.5
|
sleep_time = 0.5
|
||||||
|
|
||||||
|
@ -220,15 +249,7 @@ def test_chat_completion_doesnt_block_event_loop(caplog):
|
||||||
with MockInferenceAdapterWithSleep(sleep_time, mock_response) as inference_adapter:
|
with MockInferenceAdapterWithSleep(sleep_time, mock_response) as inference_adapter:
|
||||||
inference_adapter.model_store = AsyncMock()
|
inference_adapter.model_store = AsyncMock()
|
||||||
inference_adapter.model_store.get_model.return_value = mock_model
|
inference_adapter.model_store.get_model.return_value = mock_model
|
||||||
loop.run_until_complete(inference_adapter.initialize())
|
await inference_adapter.initialize()
|
||||||
|
|
||||||
# Clear the logs so far and run the actual chat completion we care about
|
async with detect_blocking(0.1):
|
||||||
caplog.clear()
|
await do_chat_completion()
|
||||||
loop.run_until_complete(do_chat_completion())
|
|
||||||
|
|
||||||
# Ensure we don't have any asyncio warnings in the captured log
|
|
||||||
# records from our chat completion call. A message gets logged
|
|
||||||
# here any time we exceed the slow_callback_duration configured
|
|
||||||
# above.
|
|
||||||
asyncio_warnings = [record.message for record in caplog.records if record.name == "asyncio"]
|
|
||||||
assert not asyncio_warnings
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue