refactor(parallel_request_limiter.py): initial commit moving max parallel request handler to use base routing strategy

allows for better redis / internal memory cache usage
2025-04-28 04:04:31 +00:00 · 2025-04-15 17:04:09 -07:00 · 2025-04-15 17:04:09 -07:00 · a4baa61888
commit a4baa61888
parent ef80d25f16
2 changed files with 5 additions and 1 deletions
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -34,6 +34,9 @@ litellm_settings:
  num_retries: 0
  callbacks: ["prometheus"]
  check_provider_endpoint: true
  cache: true
  cache_params:
    type: redis
 files_settings:
  - custom_llm_provider: gemini
--- a/litellm/proxy/hooks/parallel_request_limiter.py
+++ b/litellm/proxy/hooks/parallel_request_limiter.py
@ -16,6 +16,7 @@ from litellm.proxy.auth.auth_utils import (
    get_key_model_rpm_limit,
    get_key_model_tpm_limit,
 )
 from litellm.router_strategy.base_routing_strategy import BaseRoutingStrategy
 if TYPE_CHECKING:
    from opentelemetry.trace import Span as _Span
@ -38,7 +39,7 @@ class CacheObject(TypedDict):
    request_count_end_user_id: Optional[dict]
-class _PROXY_MaxParallelRequestsHandler(CustomLogger):
+class _PROXY_MaxParallelRequestsHandler(BaseRoutingStrategy, CustomLogger):
    # Class variables or attributes
    def __init__(self, internal_usage_cache: InternalUsageCache):
        self.internal_usage_cache = internal_usage_cache