forked from phoenix/litellm-mirror
fix(parallel_request_limiter.py): only update hidden params, don't set new (can lead to errors for responses where attribute can't be set)
This commit is contained in:
parent
fa64b6ca24
commit
6c7d1d5c96
1 changed files with 32 additions and 24 deletions
|
@ -791,29 +791,37 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
|
||||||
)
|
)
|
||||||
key_tpm_limit = user_api_key_dict.tpm_limit
|
key_tpm_limit = user_api_key_dict.tpm_limit
|
||||||
|
|
||||||
_hidden_params = getattr(response, "_hidden_params", {}) or {}
|
if hasattr(response, "_hidden_params"):
|
||||||
if isinstance(_hidden_params, BaseModel):
|
_hidden_params = getattr(response, "_hidden_params")
|
||||||
_hidden_params = _hidden_params.model_dump()
|
else:
|
||||||
_additional_headers = _hidden_params.get("additional_headers", {}) or {}
|
_hidden_params = None
|
||||||
if key_remaining_rpm_limit is not None:
|
if _hidden_params is not None and (
|
||||||
_additional_headers["x-ratelimit-remaining-requests"] = (
|
isinstance(_hidden_params, BaseModel) or isinstance(_hidden_params, dict)
|
||||||
key_remaining_rpm_limit
|
):
|
||||||
)
|
if isinstance(_hidden_params, BaseModel):
|
||||||
if key_rpm_limit is not None:
|
_hidden_params = _hidden_params.model_dump()
|
||||||
_additional_headers["x-ratelimit-limit-requests"] = key_rpm_limit
|
|
||||||
if key_remaining_tpm_limit is not None:
|
|
||||||
_additional_headers["x-ratelimit-remaining-tokens"] = (
|
|
||||||
key_remaining_tpm_limit
|
|
||||||
)
|
|
||||||
if key_tpm_limit is not None:
|
|
||||||
_additional_headers["x-ratelimit-limit-tokens"] = key_tpm_limit
|
|
||||||
|
|
||||||
setattr(
|
_additional_headers = _hidden_params.get("additional_headers", {}) or {}
|
||||||
response,
|
|
||||||
"_hidden_params",
|
|
||||||
{**_hidden_params, "additional_headers": _additional_headers},
|
|
||||||
)
|
|
||||||
|
|
||||||
return await super().async_post_call_success_hook(
|
if key_remaining_rpm_limit is not None:
|
||||||
data, user_api_key_dict, response
|
_additional_headers["x-ratelimit-remaining-requests"] = (
|
||||||
)
|
key_remaining_rpm_limit
|
||||||
|
)
|
||||||
|
if key_rpm_limit is not None:
|
||||||
|
_additional_headers["x-ratelimit-limit-requests"] = key_rpm_limit
|
||||||
|
if key_remaining_tpm_limit is not None:
|
||||||
|
_additional_headers["x-ratelimit-remaining-tokens"] = (
|
||||||
|
key_remaining_tpm_limit
|
||||||
|
)
|
||||||
|
if key_tpm_limit is not None:
|
||||||
|
_additional_headers["x-ratelimit-limit-tokens"] = key_tpm_limit
|
||||||
|
|
||||||
|
setattr(
|
||||||
|
response,
|
||||||
|
"_hidden_params",
|
||||||
|
{**_hidden_params, "additional_headers": _additional_headers},
|
||||||
|
)
|
||||||
|
|
||||||
|
return await super().async_post_call_success_hook(
|
||||||
|
data, user_api_key_dict, response
|
||||||
|
)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue