mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-28 04:04:31 +00:00
refactor(parallel_request_limiter.py): initial commit moving max parallel request handler to use base routing strategy
allows for better redis / internal memory cache usage
This commit is contained in:
parent
ef80d25f16
commit
a4baa61888
2 changed files with 5 additions and 1 deletions
|
@ -34,6 +34,9 @@ litellm_settings:
|
||||||
num_retries: 0
|
num_retries: 0
|
||||||
callbacks: ["prometheus"]
|
callbacks: ["prometheus"]
|
||||||
check_provider_endpoint: true
|
check_provider_endpoint: true
|
||||||
|
cache: true
|
||||||
|
cache_params:
|
||||||
|
type: redis
|
||||||
|
|
||||||
files_settings:
|
files_settings:
|
||||||
- custom_llm_provider: gemini
|
- custom_llm_provider: gemini
|
||||||
|
|
|
@ -16,6 +16,7 @@ from litellm.proxy.auth.auth_utils import (
|
||||||
get_key_model_rpm_limit,
|
get_key_model_rpm_limit,
|
||||||
get_key_model_tpm_limit,
|
get_key_model_tpm_limit,
|
||||||
)
|
)
|
||||||
|
from litellm.router_strategy.base_routing_strategy import BaseRoutingStrategy
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from opentelemetry.trace import Span as _Span
|
from opentelemetry.trace import Span as _Span
|
||||||
|
@ -38,7 +39,7 @@ class CacheObject(TypedDict):
|
||||||
request_count_end_user_id: Optional[dict]
|
request_count_end_user_id: Optional[dict]
|
||||||
|
|
||||||
|
|
||||||
class _PROXY_MaxParallelRequestsHandler(CustomLogger):
|
class _PROXY_MaxParallelRequestsHandler(BaseRoutingStrategy, CustomLogger):
|
||||||
# Class variables or attributes
|
# Class variables or attributes
|
||||||
def __init__(self, internal_usage_cache: InternalUsageCache):
|
def __init__(self, internal_usage_cache: InternalUsageCache):
|
||||||
self.internal_usage_cache = internal_usage_cache
|
self.internal_usage_cache = internal_usage_cache
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue