From 6c7d1d5c96800cefb31dadbe3850c94b31b13021 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 28 Sep 2024 19:41:35 -0700 Subject: [PATCH] fix(parallel_request_limiter.py): only update hidden params, don't set new (can lead to errors for responses where attribute can't be set) --- .../proxy/hooks/parallel_request_limiter.py | 56 +++++++++++-------- 1 file changed, 32 insertions(+), 24 deletions(-) diff --git a/litellm/proxy/hooks/parallel_request_limiter.py b/litellm/proxy/hooks/parallel_request_limiter.py index 55ea06e74..786e98bfd 100644 --- a/litellm/proxy/hooks/parallel_request_limiter.py +++ b/litellm/proxy/hooks/parallel_request_limiter.py @@ -791,29 +791,37 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger): ) key_tpm_limit = user_api_key_dict.tpm_limit - _hidden_params = getattr(response, "_hidden_params", {}) or {} - if isinstance(_hidden_params, BaseModel): - _hidden_params = _hidden_params.model_dump() - _additional_headers = _hidden_params.get("additional_headers", {}) or {} - if key_remaining_rpm_limit is not None: - _additional_headers["x-ratelimit-remaining-requests"] = ( - key_remaining_rpm_limit - ) - if key_rpm_limit is not None: - _additional_headers["x-ratelimit-limit-requests"] = key_rpm_limit - if key_remaining_tpm_limit is not None: - _additional_headers["x-ratelimit-remaining-tokens"] = ( - key_remaining_tpm_limit - ) - if key_tpm_limit is not None: - _additional_headers["x-ratelimit-limit-tokens"] = key_tpm_limit + if hasattr(response, "_hidden_params"): + _hidden_params = getattr(response, "_hidden_params") + else: + _hidden_params = None + if _hidden_params is not None and ( + isinstance(_hidden_params, BaseModel) or isinstance(_hidden_params, dict) + ): + if isinstance(_hidden_params, BaseModel): + _hidden_params = _hidden_params.model_dump() - setattr( - response, - "_hidden_params", - {**_hidden_params, "additional_headers": _additional_headers}, - ) + _additional_headers = _hidden_params.get("additional_headers", {}) or {} - return await super().async_post_call_success_hook( - data, user_api_key_dict, response - ) + if key_remaining_rpm_limit is not None: + _additional_headers["x-ratelimit-remaining-requests"] = ( + key_remaining_rpm_limit + ) + if key_rpm_limit is not None: + _additional_headers["x-ratelimit-limit-requests"] = key_rpm_limit + if key_remaining_tpm_limit is not None: + _additional_headers["x-ratelimit-remaining-tokens"] = ( + key_remaining_tpm_limit + ) + if key_tpm_limit is not None: + _additional_headers["x-ratelimit-limit-tokens"] = key_tpm_limit + + setattr( + response, + "_hidden_params", + {**_hidden_params, "additional_headers": _additional_headers}, + ) + + return await super().async_post_call_success_hook( + data, user_api_key_dict, response + )