mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
fix(router.py): fix default retry logic
This commit is contained in:
parent
f84e0f4a24
commit
f1b2405fe0
6 changed files with 63 additions and 57 deletions
|
@ -50,7 +50,7 @@ class Router:
|
|||
model_names: List = []
|
||||
cache_responses: Optional[bool] = False
|
||||
default_cache_time_seconds: int = 1 * 60 * 60 # 1 hour
|
||||
num_retries: int = 0
|
||||
num_retries: int = openai.DEFAULT_MAX_RETRIES
|
||||
tenacity = None
|
||||
leastbusy_logger: Optional[LeastBusyLoggingHandler] = None
|
||||
lowesttpm_logger: Optional[LowestTPMLoggingHandler] = None
|
||||
|
@ -70,7 +70,7 @@ class Router:
|
|||
] = None, # if you want to cache across model groups
|
||||
client_ttl: int = 3600, # ttl for cached clients - will re-initialize after this time in seconds
|
||||
## RELIABILITY ##
|
||||
num_retries: int = 0,
|
||||
num_retries: Optional[int] = None,
|
||||
timeout: Optional[float] = None,
|
||||
default_litellm_params={}, # default params for Router.chat.completion.create
|
||||
default_max_parallel_requests: Optional[int] = None,
|
||||
|
@ -229,7 +229,12 @@ class Router:
|
|||
self.failed_calls = (
|
||||
InMemoryCache()
|
||||
) # cache to track failed call per deployment, if num failed calls within 1 minute > allowed fails, then add it to cooldown
|
||||
self.num_retries = num_retries or litellm.num_retries or 0
|
||||
|
||||
if num_retries is not None:
|
||||
self.num_retries = num_retries
|
||||
elif litellm.num_retries is not None:
|
||||
self.num_retries = litellm.num_retries
|
||||
|
||||
self.timeout = timeout or litellm.request_timeout
|
||||
|
||||
self.retry_after = retry_after
|
||||
|
@ -428,6 +433,7 @@ class Router:
|
|||
kwargs["messages"] = messages
|
||||
kwargs["original_function"] = self._acompletion
|
||||
kwargs["num_retries"] = kwargs.get("num_retries", self.num_retries)
|
||||
|
||||
timeout = kwargs.get("request_timeout", self.timeout)
|
||||
kwargs.setdefault("metadata", {}).update({"model_group": model})
|
||||
|
||||
|
@ -1415,10 +1421,12 @@ class Router:
|
|||
context_window_fallbacks = kwargs.pop(
|
||||
"context_window_fallbacks", self.context_window_fallbacks
|
||||
)
|
||||
verbose_router_logger.debug(
|
||||
f"async function w/ retries: original_function - {original_function}"
|
||||
)
|
||||
|
||||
num_retries = kwargs.pop("num_retries")
|
||||
|
||||
verbose_router_logger.debug(
|
||||
f"async function w/ retries: original_function - {original_function}, num_retries - {num_retries}"
|
||||
)
|
||||
try:
|
||||
# if the function call is successful, no exception will be raised and we'll break out of the loop
|
||||
response = await original_function(*args, **kwargs)
|
||||
|
@ -1986,7 +1994,9 @@ class Router:
|
|||
stream_timeout = litellm.get_secret(stream_timeout_env_name)
|
||||
litellm_params["stream_timeout"] = stream_timeout
|
||||
|
||||
max_retries = litellm_params.pop("max_retries", 2)
|
||||
max_retries = litellm_params.pop(
|
||||
"max_retries", 0
|
||||
) # router handles retry logic
|
||||
if isinstance(max_retries, str) and max_retries.startswith("os.environ/"):
|
||||
max_retries_env_name = max_retries.replace("os.environ/", "")
|
||||
max_retries = litellm.get_secret(max_retries_env_name)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue