From 909e8470a70d5ded82e3368cec8f0f52f5a5a010 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 4 May 2024 19:11:57 -0700
Subject: [PATCH] fix - undo local dev changes

---
 litellm/router.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/litellm/router.py b/litellm/router.py
index 40671fdb9d..c4e407a250 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -1707,7 +1707,7 @@ class Router:
                     response = original_function(*args, **kwargs)
                     return response
 
-                except Exception as most_recent_exception:
+                except Exception as e:
                     ## LOGGING
                     kwargs = self.log_retry(kwargs=kwargs, e=e)
                     remaining_retries = num_retries - current_attempt
@@ -1717,7 +1717,7 @@ class Router:
                         num_retries=num_retries,
                     )
                     time.sleep(_timeout)
-            raise most_recent_exception
+            raise original_exception
 
     ### HELPER FUNCTIONS
 
@@ -1852,7 +1852,7 @@ class Router:
                 exception_status = 500
         _should_retry = litellm._should_retry(status_code=exception_status)
 
-        if updated_fails > self.allowed_fails:
+        if updated_fails > self.allowed_fails or _should_retry == False:
             # get the current cooldown list for that minute
             cooldown_key = f"{current_minute}:cooldown_models"  # group cooldown models by minute to reduce number of redis calls
             cached_value = self.cache.get_cache(key=cooldown_key)