perf(router.py): don't use asyncio.wait for - just pass it to the completion call for timeouts

2025-04-25 18:54:30 +00:00 · 2024-01-06 17:05:55 +05:30 · 2024-01-06 17:05:55 +05:30 · 2d8d7e3569
commit 2d8d7e3569
parent 712f89b4f1
1 changed files with 6 additions and 10 deletions
--- a/litellm/router.py
+++ b/litellm/router.py
@ -352,18 +352,16 @@ class Router:
            else:
                model_client = potential_model_client
            self.total_calls[model_name] += 1
-            response = await asyncio.wait_for(
+            response = await litellm.acompletion(
                litellm.acompletion(
                    **{
                        **data,
                        "messages": messages,
                        "caching": self.cache_responses,
                        "client": model_client,
                        "timeout": self.timeout,
                        **kwargs,
                    }
-                ),
+                )
                timeout=self.timeout,
            )
            self.success_calls[model_name] += 1
            return response
        except Exception as e:
@ -614,18 +612,16 @@ class Router:
            else:
                model_client = potential_model_client
            self.total_calls[model_name] += 1
-            response = await asyncio.wait_for(
+            response = await litellm.atext_completion(
                litellm.atext_completion(
                    **{
                        **data,
                        "prompt": prompt,
                        "caching": self.cache_responses,
                        "client": model_client,
                        "timeout": self.timeout,
                        **kwargs,
                    }
-                ),
+                )
                timeout=self.timeout,
            )
            self.success_calls[model_name] += 1
            return response
        except Exception as e: