From a4baa61888af1c00dab309b51d2cc76596b34a2c Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Tue, 15 Apr 2025 17:04:09 -0700
Subject: [PATCH] refactor(parallel_request_limiter.py): initial commit moving
 max parallel request handler to use base routing strategy

allows for better redis / internal memory cache usage
---
 litellm/proxy/_new_secret_config.yaml           | 3 +++
 litellm/proxy/hooks/parallel_request_limiter.py | 3 ++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
index e166133cec..75e7823822 100644
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@@ -34,6 +34,9 @@ litellm_settings:
   num_retries: 0
   callbacks: ["prometheus"]
   check_provider_endpoint: true
+  cache: true
+  cache_params:
+    type: redis
 
 files_settings:
   - custom_llm_provider: gemini
diff --git a/litellm/proxy/hooks/parallel_request_limiter.py b/litellm/proxy/hooks/parallel_request_limiter.py
index 242c013d67..878b1788a4 100644
--- a/litellm/proxy/hooks/parallel_request_limiter.py
+++ b/litellm/proxy/hooks/parallel_request_limiter.py
@@ -16,6 +16,7 @@ from litellm.proxy.auth.auth_utils import (
     get_key_model_rpm_limit,
     get_key_model_tpm_limit,
 )
+from litellm.router_strategy.base_routing_strategy import BaseRoutingStrategy
 
 if TYPE_CHECKING:
     from opentelemetry.trace import Span as _Span
@@ -38,7 +39,7 @@ class CacheObject(TypedDict):
     request_count_end_user_id: Optional[dict]
 
 
-class _PROXY_MaxParallelRequestsHandler(CustomLogger):
+class _PROXY_MaxParallelRequestsHandler(BaseRoutingStrategy, CustomLogger):
     # Class variables or attributes
     def __init__(self, internal_usage_cache: InternalUsageCache):
         self.internal_usage_cache = internal_usage_cache