forked from phoenix/litellm-mirror
fix return num retries in exceptions
This commit is contained in:
parent
5be8c95c6e
commit
6b59aeb603
1 changed files with 7 additions and 3 deletions
|
@ -1544,6 +1544,10 @@ class Router:
|
||||||
num_retries=num_retries,
|
num_retries=num_retries,
|
||||||
)
|
)
|
||||||
await asyncio.sleep(_timeout)
|
await asyncio.sleep(_timeout)
|
||||||
|
try:
|
||||||
|
original_exception.message += f"\nNumber Retries = {current_attempt}"
|
||||||
|
except:
|
||||||
|
pass
|
||||||
raise original_exception
|
raise original_exception
|
||||||
|
|
||||||
def function_with_fallbacks(self, *args, **kwargs):
|
def function_with_fallbacks(self, *args, **kwargs):
|
||||||
|
@ -1703,7 +1707,7 @@ class Router:
|
||||||
response = original_function(*args, **kwargs)
|
response = original_function(*args, **kwargs)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as most_recent_exception:
|
||||||
## LOGGING
|
## LOGGING
|
||||||
kwargs = self.log_retry(kwargs=kwargs, e=e)
|
kwargs = self.log_retry(kwargs=kwargs, e=e)
|
||||||
remaining_retries = num_retries - current_attempt
|
remaining_retries = num_retries - current_attempt
|
||||||
|
@ -1713,7 +1717,7 @@ class Router:
|
||||||
num_retries=num_retries,
|
num_retries=num_retries,
|
||||||
)
|
)
|
||||||
time.sleep(_timeout)
|
time.sleep(_timeout)
|
||||||
raise original_exception
|
raise most_recent_exception
|
||||||
|
|
||||||
### HELPER FUNCTIONS
|
### HELPER FUNCTIONS
|
||||||
|
|
||||||
|
@ -1848,7 +1852,7 @@ class Router:
|
||||||
exception_status = 500
|
exception_status = 500
|
||||||
_should_retry = litellm._should_retry(status_code=exception_status)
|
_should_retry = litellm._should_retry(status_code=exception_status)
|
||||||
|
|
||||||
if updated_fails > self.allowed_fails or _should_retry == False:
|
if updated_fails > self.allowed_fails:
|
||||||
# get the current cooldown list for that minute
|
# get the current cooldown list for that minute
|
||||||
cooldown_key = f"{current_minute}:cooldown_models" # group cooldown models by minute to reduce number of redis calls
|
cooldown_key = f"{current_minute}:cooldown_models" # group cooldown models by minute to reduce number of redis calls
|
||||||
cached_value = self.cache.get_cache(key=cooldown_key)
|
cached_value = self.cache.get_cache(key=cooldown_key)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue