use task and async generator instead of a thread

2025-10-04 20:14:13 +00:00 · 2025-09-12 16:41:48 -04:00 · 2025-09-12 16:41:48 -04:00 · c6403706b4
commit c6403706b4
parent a673484e21
1 changed files with 10 additions and 10 deletions
--- a/llama_stack/testing/inference_recorder.py
+++ b/llama_stack/testing/inference_recorder.py
@ -378,17 +378,17 @@ def patch_inference_clients():

    def patched_models_list(self, *args, **kwargs):
        import asyncio
-        import concurrent.futures

-        with concurrent.futures.ThreadPoolExecutor() as executor:
-            future = executor.submit(
-                lambda: asyncio.run(
-                    _patched_inference_method(
-                        _original_methods["models_list"], self, "openai", "/v1/models", *args, **kwargs
+        task = asyncio.create_task(
+            _patched_inference_method(_original_methods["models_list"], self, "openai", "/v1/models", *args, **kwargs)
        )
-                )
-            )
-            return future.result()
+
+        async def _iter():
+            result = await task
+            async for item in result:
+                yield item
+
+        return _iter()

    # Apply OpenAI patches
    AsyncChatCompletions.create = patched_chat_completions_create