fix(parallel_request_limiter.py): use redis cache, if available for rate limiting across instances

Fixes https://github.com/BerriAI/litellm/issues/4148
This commit is contained in:
Krrish Dholakia 2024-06-12 10:35:48 -07:00
parent c059352908
commit 76c9b715f2
4 changed files with 75 additions and 56 deletions

View file

@ -2852,6 +2852,7 @@ class ProxyConfig:
use_azure_key_vault = general_settings.get("use_azure_key_vault", False)
load_from_azure_key_vault(use_azure_key_vault=use_azure_key_vault)
### ALERTING ###
proxy_logging_obj.update_values(
alerting=general_settings.get("alerting", None),
alerting_threshold=general_settings.get("alerting_threshold", 600),
@ -3963,6 +3964,11 @@ async def startup_event():
db_writer_client = HTTPHandler()
## UPDATE INTERNAL USAGE CACHE ##
proxy_logging_obj.update_values(
redis_cache=redis_usage_cache
) # used by parallel request limiter for rate limiting keys across instances
proxy_logging_obj._init_litellm_callbacks() # INITIALIZE LITELLM CALLBACKS ON SERVER STARTUP <- do this to catch any logging errors on startup, not when calls are being made
if "daily_reports" in proxy_logging_obj.slack_alerting_instance.alert_types: