mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
[Perf Proxy] parallel request limiter - use one cache update call (#5932)
* fix parallel request limiter - use one cache update call * ci/cd run again * run ci/cd again * use docker username password * fix config.yml * fix config * fix config * fix config.yml * ci/cd run again * use correct typing for batch set cache * fix async_set_cache_pipeline * fix only check user id tpm / rpm limits when limits set * fix test_openai_azure_embedding_with_oidc_and_cf
This commit is contained in:
parent
71f68ac185
commit
f4613a100d
7 changed files with 56 additions and 36 deletions
|
@ -327,8 +327,13 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
|
|||
user_api_key_dict=user_api_key_dict,
|
||||
)
|
||||
# get user tpm/rpm limits
|
||||
if _user_id_rate_limits is not None and isinstance(
|
||||
_user_id_rate_limits, dict
|
||||
if (
|
||||
_user_id_rate_limits is not None
|
||||
and isinstance(_user_id_rate_limits, dict)
|
||||
and (
|
||||
_user_id_rate_limits.get("tpm_limit", None) is not None
|
||||
or _user_id_rate_limits.get("rpm_limit", None) is not None
|
||||
)
|
||||
):
|
||||
user_tpm_limit = _user_id_rate_limits.get("tpm_limit", None)
|
||||
user_rpm_limit = _user_id_rate_limits.get("rpm_limit", None)
|
||||
|
@ -472,6 +477,8 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
|
|||
# Update usage - API Key
|
||||
# ------------
|
||||
|
||||
values_to_update_in_cache = []
|
||||
|
||||
if user_api_key is not None:
|
||||
request_count_api_key = (
|
||||
f"{user_api_key}::{precise_minute}::request_count"
|
||||
|
@ -495,12 +502,7 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
|
|||
self.print_verbose(
|
||||
f"updated_value in success call: {new_val}, precise_minute: {precise_minute}"
|
||||
)
|
||||
await self.internal_usage_cache.async_set_cache(
|
||||
request_count_api_key,
|
||||
new_val,
|
||||
ttl=60,
|
||||
litellm_parent_otel_span=litellm_parent_otel_span,
|
||||
) # store in cache for 1 min.
|
||||
values_to_update_in_cache.append((request_count_api_key, new_val))
|
||||
|
||||
# ------------
|
||||
# Update usage - model group + API Key
|
||||
|
@ -536,12 +538,7 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
|
|||
self.print_verbose(
|
||||
f"updated_value in success call: {new_val}, precise_minute: {precise_minute}"
|
||||
)
|
||||
await self.internal_usage_cache.async_set_cache(
|
||||
request_count_api_key,
|
||||
new_val,
|
||||
ttl=60,
|
||||
litellm_parent_otel_span=litellm_parent_otel_span,
|
||||
)
|
||||
values_to_update_in_cache.append((request_count_api_key, new_val))
|
||||
|
||||
# ------------
|
||||
# Update usage - User
|
||||
|
@ -574,12 +571,7 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
|
|||
self.print_verbose(
|
||||
f"updated_value in success call: {new_val}, precise_minute: {precise_minute}"
|
||||
)
|
||||
await self.internal_usage_cache.async_set_cache(
|
||||
request_count_api_key,
|
||||
new_val,
|
||||
ttl=60,
|
||||
litellm_parent_otel_span=litellm_parent_otel_span,
|
||||
) # store in cache for 1 min.
|
||||
values_to_update_in_cache.append((request_count_api_key, new_val))
|
||||
|
||||
# ------------
|
||||
# Update usage - Team
|
||||
|
@ -612,12 +604,7 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
|
|||
self.print_verbose(
|
||||
f"updated_value in success call: {new_val}, precise_minute: {precise_minute}"
|
||||
)
|
||||
await self.internal_usage_cache.async_set_cache(
|
||||
request_count_api_key,
|
||||
new_val,
|
||||
ttl=60,
|
||||
litellm_parent_otel_span=litellm_parent_otel_span,
|
||||
) # store in cache for 1 min.
|
||||
values_to_update_in_cache.append((request_count_api_key, new_val))
|
||||
|
||||
# ------------
|
||||
# Update usage - End User
|
||||
|
@ -650,13 +637,13 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
|
|||
self.print_verbose(
|
||||
f"updated_value in success call: {new_val}, precise_minute: {precise_minute}"
|
||||
)
|
||||
await self.internal_usage_cache.async_set_cache(
|
||||
request_count_api_key,
|
||||
new_val,
|
||||
ttl=60,
|
||||
litellm_parent_otel_span=litellm_parent_otel_span,
|
||||
) # store in cache for 1 min.
|
||||
values_to_update_in_cache.append((request_count_api_key, new_val))
|
||||
|
||||
await self.internal_usage_cache.async_batch_set_cache(
|
||||
cache_list=values_to_update_in_cache,
|
||||
ttl=60,
|
||||
litellm_parent_otel_span=litellm_parent_otel_span,
|
||||
)
|
||||
except Exception as e:
|
||||
self.print_verbose(e) # noqa
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue