feat(proxy_server.py): enable batch completion fastest response calls on proxy

introduces new `fastest_response` flag for enabling the call
2025-04-25 10:44:24 +00:00 · 2024-05-28 20:09:31 -07:00 · 2024-05-28 20:09:31 -07:00 · 20106715d5
commit 20106715d5
parent ecd182eb6a
3 changed files with 32 additions and 3 deletions
--- a/litellm/router.py
+++ b/litellm/router.py
@ -804,9 +804,16 @@ class Router:
            pending_tasks.append(task)

        responses = await asyncio.gather(*_tasks, return_exceptions=True)
-        if isinstance(responses[0], Exception):
+        if isinstance(responses[0], Exception) or isinstance(
+            responses[0], BaseException
+        ):
            raise responses[0]
-        return responses[0]  # return first value from list
+        _response: Union[ModelResponse, CustomStreamWrapper] = responses[
+            0
+        ]  # return first value from list
+
+        _response._hidden_params["fastest_response_batch_completion"] = True
+        return _response

    def image_generation(self, prompt: str, model: str, **kwargs):
        try: