use task and async generator instead of a thread

2025-12-18 18:49:48 +00:00 · 2025-09-12 16:41:48 -04:00 · 2025-09-12 16:41:48 -04:00 · c6403706b4
commit c6403706b4
parent a673484e21
1 changed files with 10 additions and 10 deletions
--- a/llama_stack/testing/inference_recorder.py
+++ b/llama_stack/testing/inference_recorder.py
@ -378,17 +378,17 @@ def patch_inference_clients():
    def patched_models_list(self, *args, **kwargs):
        import asyncio
        import concurrent.futures
-        with concurrent.futures.ThreadPoolExecutor() as executor:
+        task = asyncio.create_task(
-            future = executor.submit(
+            _patched_inference_method(_original_methods["models_list"], self, "openai", "/v1/models", *args, **kwargs)
                lambda: asyncio.run(
                    _patched_inference_method(
                        _original_methods["models_list"], self, "openai", "/v1/models", *args, **kwargs
        )
-                )
+
-            )
+        async def _iter():
-            return future.result()
+            result = await task
            async for item in result:
                yield item
        return _iter()
    # Apply OpenAI patches
    AsyncChatCompletions.create = patched_chat_completions_create