refactor(parallel_request_limiter.py): initial commit moving max parallel request handler to use base routing strategy

allows for better redis / internal memory cache usage
2025-04-25 18:54:30 +00:00 · 2025-04-15 17:04:09 -07:00 · 2025-04-15 17:04:09 -07:00 · a4baa61888
commit a4baa61888
parent ef80d25f16
2 changed files with 5 additions and 1 deletions
--- a/litellm/proxy/hooks/parallel_request_limiter.py
+++ b/litellm/proxy/hooks/parallel_request_limiter.py
@ -16,6 +16,7 @@ from litellm.proxy.auth.auth_utils import (
    get_key_model_rpm_limit,
    get_key_model_tpm_limit,
 )
+from litellm.router_strategy.base_routing_strategy import BaseRoutingStrategy

 if TYPE_CHECKING:
    from opentelemetry.trace import Span as _Span
@ -38,7 +39,7 @@ class CacheObject(TypedDict):
    request_count_end_user_id: Optional[dict]


-class _PROXY_MaxParallelRequestsHandler(CustomLogger):
+class _PROXY_MaxParallelRequestsHandler(BaseRoutingStrategy, CustomLogger):
    # Class variables or attributes
    def __init__(self, internal_usage_cache: InternalUsageCache):
        self.internal_usage_cache = internal_usage_cache