From a4baa61888af1c00dab309b51d2cc76596b34a2c Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Tue, 15 Apr 2025 17:04:09 -0700 Subject: [PATCH] refactor(parallel_request_limiter.py): initial commit moving max parallel request handler to use base routing strategy allows for better redis / internal memory cache usage --- litellm/proxy/_new_secret_config.yaml | 3 +++ litellm/proxy/hooks/parallel_request_limiter.py | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index e166133cec..75e7823822 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -34,6 +34,9 @@ litellm_settings: num_retries: 0 callbacks: ["prometheus"] check_provider_endpoint: true + cache: true + cache_params: + type: redis files_settings: - custom_llm_provider: gemini diff --git a/litellm/proxy/hooks/parallel_request_limiter.py b/litellm/proxy/hooks/parallel_request_limiter.py index 242c013d67..878b1788a4 100644 --- a/litellm/proxy/hooks/parallel_request_limiter.py +++ b/litellm/proxy/hooks/parallel_request_limiter.py @@ -16,6 +16,7 @@ from litellm.proxy.auth.auth_utils import ( get_key_model_rpm_limit, get_key_model_tpm_limit, ) +from litellm.router_strategy.base_routing_strategy import BaseRoutingStrategy if TYPE_CHECKING: from opentelemetry.trace import Span as _Span @@ -38,7 +39,7 @@ class CacheObject(TypedDict): request_count_end_user_id: Optional[dict] -class _PROXY_MaxParallelRequestsHandler(CustomLogger): +class _PROXY_MaxParallelRequestsHandler(BaseRoutingStrategy, CustomLogger): # Class variables or attributes def __init__(self, internal_usage_cache: InternalUsageCache): self.internal_usage_cache = internal_usage_cache