fix(router.py): support batch completions fastest response streaming

This commit is contained in:
Krrish Dholakia 2024-05-28 21:51:09 -07:00
parent f168e35629
commit e3000504f9
2 changed files with 39 additions and 3 deletions

View file

@ -771,13 +771,13 @@ class Router:
models = [m.strip() for m in model.split(",")]
async def _async_completion_no_exceptions(
model: str, messages: List[Dict[str, str]], **kwargs: Any
model: str, messages: List[Dict[str, str]], stream: bool, **kwargs: Any
) -> Union[ModelResponse, CustomStreamWrapper, Exception]:
"""
Wrapper around self.acompletion that catches exceptions and returns them as a result
"""
try:
return await self.acompletion(model=model, messages=messages, **kwargs)
return await self.acompletion(model=model, messages=messages, stream=stream, **kwargs) # type: ignore
except asyncio.CancelledError:
verbose_router_logger.debug(
"Received 'task.cancel'. Cancelling call w/ model={}.".format(model)
@ -813,7 +813,7 @@ class Router:
for model in models:
task = asyncio.create_task(
_async_completion_no_exceptions(
model=model, messages=messages, **kwargs
model=model, messages=messages, stream=stream, **kwargs
)
)
pending_tasks.append(task)