[Perf Proxy] parallel request limiter - use one cache update call (#5932)

* fix parallel request limiter - use one cache update call

* ci/cd run again

* run ci/cd again

* use docker username password

* fix config.yml

* fix config

* fix config

* fix config.yml

* ci/cd run again

* use correct typing for batch set cache

* fix async_set_cache_pipeline

* fix only check user id tpm / rpm limits when limits set

* fix test_openai_azure_embedding_with_oidc_and_cf
This commit is contained in:
Ishaan Jaff 2024-09-27 10:26:15 -07:00
parent 71f68ac185
commit f4613a100d
7 changed files with 56 additions and 36 deletions

View file

@ -327,8 +327,13 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
user_api_key_dict=user_api_key_dict,
)
# get user tpm/rpm limits
if _user_id_rate_limits is not None and isinstance(
_user_id_rate_limits, dict
if (
_user_id_rate_limits is not None
and isinstance(_user_id_rate_limits, dict)
and (
_user_id_rate_limits.get("tpm_limit", None) is not None
or _user_id_rate_limits.get("rpm_limit", None) is not None
)
):
user_tpm_limit = _user_id_rate_limits.get("tpm_limit", None)
user_rpm_limit = _user_id_rate_limits.get("rpm_limit", None)
@ -472,6 +477,8 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
# Update usage - API Key
# ------------
values_to_update_in_cache = []
if user_api_key is not None:
request_count_api_key = (
f"{user_api_key}::{precise_minute}::request_count"
@ -495,12 +502,7 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
self.print_verbose(
f"updated_value in success call: {new_val}, precise_minute: {precise_minute}"
)
await self.internal_usage_cache.async_set_cache(
request_count_api_key,
new_val,
ttl=60,
litellm_parent_otel_span=litellm_parent_otel_span,
) # store in cache for 1 min.
values_to_update_in_cache.append((request_count_api_key, new_val))
# ------------
# Update usage - model group + API Key
@ -536,12 +538,7 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
self.print_verbose(
f"updated_value in success call: {new_val}, precise_minute: {precise_minute}"
)
await self.internal_usage_cache.async_set_cache(
request_count_api_key,
new_val,
ttl=60,
litellm_parent_otel_span=litellm_parent_otel_span,
)
values_to_update_in_cache.append((request_count_api_key, new_val))
# ------------
# Update usage - User
@ -574,12 +571,7 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
self.print_verbose(
f"updated_value in success call: {new_val}, precise_minute: {precise_minute}"
)
await self.internal_usage_cache.async_set_cache(
request_count_api_key,
new_val,
ttl=60,
litellm_parent_otel_span=litellm_parent_otel_span,
) # store in cache for 1 min.
values_to_update_in_cache.append((request_count_api_key, new_val))
# ------------
# Update usage - Team
@ -612,12 +604,7 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
self.print_verbose(
f"updated_value in success call: {new_val}, precise_minute: {precise_minute}"
)
await self.internal_usage_cache.async_set_cache(
request_count_api_key,
new_val,
ttl=60,
litellm_parent_otel_span=litellm_parent_otel_span,
) # store in cache for 1 min.
values_to_update_in_cache.append((request_count_api_key, new_val))
# ------------
# Update usage - End User
@ -650,13 +637,13 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
self.print_verbose(
f"updated_value in success call: {new_val}, precise_minute: {precise_minute}"
)
await self.internal_usage_cache.async_set_cache(
request_count_api_key,
new_val,
ttl=60,
litellm_parent_otel_span=litellm_parent_otel_span,
) # store in cache for 1 min.
values_to_update_in_cache.append((request_count_api_key, new_val))
await self.internal_usage_cache.async_batch_set_cache(
cache_list=values_to_update_in_cache,
ttl=60,
litellm_parent_otel_span=litellm_parent_otel_span,
)
except Exception as e:
self.print_verbose(e) # noqa