mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 19:24:27 +00:00
fix(parallel_request_limiter.py): use redis cache, if available for rate limiting across instances
Fixes https://github.com/BerriAI/litellm/issues/4148
This commit is contained in:
parent
408a18d433
commit
77328e4a28
4 changed files with 75 additions and 56 deletions
|
@ -162,8 +162,12 @@ class ProxyLogging:
|
|||
## INITIALIZE LITELLM CALLBACKS ##
|
||||
self.call_details: dict = {}
|
||||
self.call_details["user_api_key_cache"] = user_api_key_cache
|
||||
self.internal_usage_cache = DualCache()
|
||||
self.max_parallel_request_limiter = _PROXY_MaxParallelRequestsHandler()
|
||||
self.internal_usage_cache = DualCache(
|
||||
default_in_memory_ttl=1
|
||||
) # ping redis cache every 1s
|
||||
self.max_parallel_request_limiter = _PROXY_MaxParallelRequestsHandler(
|
||||
self.internal_usage_cache
|
||||
)
|
||||
self.max_budget_limiter = _PROXY_MaxBudgetLimiter()
|
||||
self.cache_control_check = _PROXY_CacheControlCheck()
|
||||
self.alerting: Optional[List] = None
|
||||
|
@ -189,39 +193,45 @@ class ProxyLogging:
|
|||
|
||||
def update_values(
|
||||
self,
|
||||
alerting: Optional[List],
|
||||
alerting_threshold: Optional[float],
|
||||
redis_cache: Optional[RedisCache],
|
||||
alerting: Optional[List] = None,
|
||||
alerting_threshold: Optional[float] = None,
|
||||
redis_cache: Optional[RedisCache] = None,
|
||||
alert_types: Optional[List[AlertType]] = None,
|
||||
alerting_args: Optional[dict] = None,
|
||||
):
|
||||
self.alerting = alerting
|
||||
updated_slack_alerting: bool = False
|
||||
if self.alerting is not None:
|
||||
self.alerting = alerting
|
||||
updated_slack_alerting = True
|
||||
if alerting_threshold is not None:
|
||||
self.alerting_threshold = alerting_threshold
|
||||
updated_slack_alerting = True
|
||||
if alert_types is not None:
|
||||
self.alert_types = alert_types
|
||||
updated_slack_alerting = True
|
||||
|
||||
self.slack_alerting_instance.update_values(
|
||||
alerting=self.alerting,
|
||||
alerting_threshold=self.alerting_threshold,
|
||||
alert_types=self.alert_types,
|
||||
alerting_args=alerting_args,
|
||||
)
|
||||
if updated_slack_alerting is True:
|
||||
self.slack_alerting_instance.update_values(
|
||||
alerting=self.alerting,
|
||||
alerting_threshold=self.alerting_threshold,
|
||||
alert_types=self.alert_types,
|
||||
alerting_args=alerting_args,
|
||||
)
|
||||
|
||||
if (
|
||||
self.alerting is not None
|
||||
and "slack" in self.alerting
|
||||
and "daily_reports" in self.alert_types
|
||||
):
|
||||
# NOTE: ENSURE we only add callbacks when alerting is on
|
||||
# We should NOT add callbacks when alerting is off
|
||||
litellm.callbacks.append(self.slack_alerting_instance) # type: ignore
|
||||
if (
|
||||
self.alerting is not None
|
||||
and "slack" in self.alerting
|
||||
and "daily_reports" in self.alert_types
|
||||
):
|
||||
# NOTE: ENSURE we only add callbacks when alerting is on
|
||||
# We should NOT add callbacks when alerting is off
|
||||
litellm.callbacks.append(self.slack_alerting_instance) # type: ignore
|
||||
|
||||
if redis_cache is not None:
|
||||
self.internal_usage_cache.redis_cache = redis_cache
|
||||
|
||||
def _init_litellm_callbacks(self):
|
||||
print_verbose(f"INITIALIZING LITELLM CALLBACKS!")
|
||||
print_verbose("INITIALIZING LITELLM CALLBACKS!")
|
||||
self.service_logging_obj = ServiceLogging()
|
||||
litellm.callbacks.append(self.max_parallel_request_limiter)
|
||||
litellm.callbacks.append(self.max_budget_limiter)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue