fix(router.py): support batch completions fastest response streaming

2025-04-25 18:54:30 +00:00 · 2024-05-28 21:51:09 -07:00 · 2024-05-28 21:51:09 -07:00 · e3000504f9
commit e3000504f9
parent f168e35629
2 changed files with 39 additions and 3 deletions
--- a/litellm/router.py
+++ b/litellm/router.py
@ -771,13 +771,13 @@ class Router:
        models = [m.strip() for m in model.split(",")]

        async def _async_completion_no_exceptions(
-            model: str, messages: List[Dict[str, str]], **kwargs: Any
+            model: str, messages: List[Dict[str, str]], stream: bool, **kwargs: Any
        ) -> Union[ModelResponse, CustomStreamWrapper, Exception]:
            """
            Wrapper around self.acompletion that catches exceptions and returns them as a result
            """
            try:
-                return await self.acompletion(model=model, messages=messages, **kwargs)
+                return await self.acompletion(model=model, messages=messages, stream=stream, **kwargs)  # type: ignore
            except asyncio.CancelledError:
                verbose_router_logger.debug(
                    "Received 'task.cancel'. Cancelling call w/ model={}.".format(model)
@ -813,7 +813,7 @@ class Router:
        for model in models:
            task = asyncio.create_task(
                _async_completion_no_exceptions(
-                    model=model, messages=messages, **kwargs
+                    model=model, messages=messages, stream=stream, **kwargs
                )
            )
            pending_tasks.append(task)