use task and async generator instead of a thread

This commit is contained in:
Matthew Farrellee 2025-09-12 16:41:48 -04:00
parent a673484e21
commit c6403706b4

View file

@ -378,17 +378,17 @@ def patch_inference_clients():
def patched_models_list(self, *args, **kwargs): def patched_models_list(self, *args, **kwargs):
import asyncio import asyncio
import concurrent.futures
with concurrent.futures.ThreadPoolExecutor() as executor: task = asyncio.create_task(
future = executor.submit( _patched_inference_method(_original_methods["models_list"], self, "openai", "/v1/models", *args, **kwargs)
lambda: asyncio.run(
_patched_inference_method(
_original_methods["models_list"], self, "openai", "/v1/models", *args, **kwargs
) )
)
) async def _iter():
return future.result() result = await task
async for item in result:
yield item
return _iter()
# Apply OpenAI patches # Apply OpenAI patches
AsyncChatCompletions.create = patched_chat_completions_create AsyncChatCompletions.create = patched_chat_completions_create