mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 12:07:34 +00:00
use task and async generator instead of a thread
This commit is contained in:
parent
a673484e21
commit
c6403706b4
1 changed files with 10 additions and 10 deletions
|
@ -378,17 +378,17 @@ def patch_inference_clients():
|
||||||
|
|
||||||
def patched_models_list(self, *args, **kwargs):
|
def patched_models_list(self, *args, **kwargs):
|
||||||
import asyncio
|
import asyncio
|
||||||
import concurrent.futures
|
|
||||||
|
|
||||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
task = asyncio.create_task(
|
||||||
future = executor.submit(
|
_patched_inference_method(_original_methods["models_list"], self, "openai", "/v1/models", *args, **kwargs)
|
||||||
lambda: asyncio.run(
|
|
||||||
_patched_inference_method(
|
|
||||||
_original_methods["models_list"], self, "openai", "/v1/models", *args, **kwargs
|
|
||||||
)
|
)
|
||||||
)
|
|
||||||
)
|
async def _iter():
|
||||||
return future.result()
|
result = await task
|
||||||
|
async for item in result:
|
||||||
|
yield item
|
||||||
|
|
||||||
|
return _iter()
|
||||||
|
|
||||||
# Apply OpenAI patches
|
# Apply OpenAI patches
|
||||||
AsyncChatCompletions.create = patched_chat_completions_create
|
AsyncChatCompletions.create = patched_chat_completions_create
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue