diff --git a/llama_stack/testing/inference_recorder.py b/llama_stack/testing/inference_recorder.py
index be1961ecc..e09513e4d 100644
--- a/llama_stack/testing/inference_recorder.py
+++ b/llama_stack/testing/inference_recorder.py
@@ -378,17 +378,17 @@ def patch_inference_clients():
 
     def patched_models_list(self, *args, **kwargs):
         import asyncio
-        import concurrent.futures
 
-        with concurrent.futures.ThreadPoolExecutor() as executor:
-            future = executor.submit(
-                lambda: asyncio.run(
-                    _patched_inference_method(
-                        _original_methods["models_list"], self, "openai", "/v1/models", *args, **kwargs
-                    )
-                )
-            )
-            return future.result()
+        task = asyncio.create_task(
+            _patched_inference_method(_original_methods["models_list"], self, "openai", "/v1/models", *args, **kwargs)
+        )
+
+        async def _iter():
+            result = await task
+            async for item in result:
+                yield item
+
+        return _iter()
 
     # Apply OpenAI patches
     AsyncChatCompletions.create = patched_chat_completions_create