fix(parallel_request_limiter.py): use redis cache, if available for rate limiting across instances

Fixes https://github.com/BerriAI/litellm/issues/4148
2025-04-26 03:04:13 +00:00 · 2024-06-12 10:35:48 -07:00 · 2024-06-12 10:35:48 -07:00 · 76c9b715f2
commit 76c9b715f2
parent c059352908
4 changed files with 75 additions and 56 deletions
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -2852,6 +2852,7 @@ class ProxyConfig:
            use_azure_key_vault = general_settings.get("use_azure_key_vault", False)
            load_from_azure_key_vault(use_azure_key_vault=use_azure_key_vault)
            ### ALERTING ###
+
            proxy_logging_obj.update_values(
                alerting=general_settings.get("alerting", None),
                alerting_threshold=general_settings.get("alerting_threshold", 600),
@ -3963,6 +3964,11 @@ async def startup_event():

    db_writer_client = HTTPHandler()

+    ## UPDATE INTERNAL USAGE CACHE ##
+    proxy_logging_obj.update_values(
+        redis_cache=redis_usage_cache
+    )  # used by parallel request limiter for rate limiting keys across instances
+
    proxy_logging_obj._init_litellm_callbacks()  # INITIALIZE LITELLM CALLBACKS ON SERVER STARTUP <- do this to catch any logging errors on startup, not when calls are being made

    if "daily_reports" in proxy_logging_obj.slack_alerting_instance.alert_types: