mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
(minor fix proxy) Clarify Proxy Rate limit errors are showing hash of litellm virtual key (#7210)
* fix clarify rate limit errors are showing litellm virtual key * fix constants.py * update test * fix test parallel limiter
This commit is contained in:
parent
36862d0a98
commit
3de32f4106
3 changed files with 10 additions and 6 deletions
|
@ -72,3 +72,6 @@ LITELLM_CHAT_PROVIDERS = [
|
||||||
MAX_SPENDLOG_ROWS_TO_QUERY = (
|
MAX_SPENDLOG_ROWS_TO_QUERY = (
|
||||||
1_000_000 # if spendLogs has more than 1M rows, do not query the DB
|
1_000_000 # if spendLogs has more than 1M rows, do not query the DB
|
||||||
)
|
)
|
||||||
|
# makes it clear this is a rate limit error for a litellm virtual key
|
||||||
|
RATE_LIMIT_ERROR_MESSAGE_FOR_VIRTUAL_KEY = "LiteLLM Virtual Key user_api_key_hash"
|
||||||
|
|
||||||
|
|
|
@ -10,6 +10,7 @@ from pydantic import BaseModel
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import DualCache, ModelResponse
|
from litellm import DualCache, ModelResponse
|
||||||
from litellm._logging import verbose_proxy_logger
|
from litellm._logging import verbose_proxy_logger
|
||||||
|
from litellm.constants import RATE_LIMIT_ERROR_MESSAGE_FOR_VIRTUAL_KEY
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
from litellm.litellm_core_utils.core_helpers import _get_parent_otel_span_from_kwargs
|
from litellm.litellm_core_utils.core_helpers import _get_parent_otel_span_from_kwargs
|
||||||
from litellm.proxy._types import CurrentItemRateLimit, UserAPIKeyAuth
|
from litellm.proxy._types import CurrentItemRateLimit, UserAPIKeyAuth
|
||||||
|
@ -271,7 +272,7 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
|
||||||
pass
|
pass
|
||||||
elif max_parallel_requests == 0 or tpm_limit == 0 or rpm_limit == 0:
|
elif max_parallel_requests == 0 or tpm_limit == 0 or rpm_limit == 0:
|
||||||
return self.raise_rate_limit_error(
|
return self.raise_rate_limit_error(
|
||||||
additional_details=f"Hit limit for api_key: {api_key}. max_parallel_requests: {max_parallel_requests}, tpm_limit: {tpm_limit}, rpm_limit: {rpm_limit}"
|
additional_details=f"Hit limit for {RATE_LIMIT_ERROR_MESSAGE_FOR_VIRTUAL_KEY}: {api_key}. max_parallel_requests: {max_parallel_requests}, tpm_limit: {tpm_limit}, rpm_limit: {rpm_limit}"
|
||||||
)
|
)
|
||||||
elif current is None:
|
elif current is None:
|
||||||
new_val = {
|
new_val = {
|
||||||
|
@ -294,7 +295,7 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
|
||||||
values_to_update_in_cache.append((request_count_api_key, new_val))
|
values_to_update_in_cache.append((request_count_api_key, new_val))
|
||||||
else:
|
else:
|
||||||
return self.raise_rate_limit_error(
|
return self.raise_rate_limit_error(
|
||||||
additional_details=f"Hit limit for api_key: {api_key}. tpm_limit: {tpm_limit}, current_tpm {current['current_tpm']} , rpm_limit: {rpm_limit} current rpm {current['current_rpm']} "
|
additional_details=f"Hit limit for {RATE_LIMIT_ERROR_MESSAGE_FOR_VIRTUAL_KEY}: {api_key}. tpm_limit: {tpm_limit}, current_tpm {current['current_tpm']} , rpm_limit: {rpm_limit} current rpm {current['current_rpm']} "
|
||||||
)
|
)
|
||||||
|
|
||||||
# Check if request under RPM/TPM per model for a given API Key
|
# Check if request under RPM/TPM per model for a given API Key
|
||||||
|
@ -344,14 +345,14 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
|
||||||
and current["current_tpm"] >= tpm_limit_for_model
|
and current["current_tpm"] >= tpm_limit_for_model
|
||||||
):
|
):
|
||||||
return self.raise_rate_limit_error(
|
return self.raise_rate_limit_error(
|
||||||
additional_details=f"Hit TPM limit for model: {_model} on api_key: {api_key}. tpm_limit: {tpm_limit_for_model}, current_tpm {current['current_tpm']} "
|
additional_details=f"Hit TPM limit for model: {_model} on {RATE_LIMIT_ERROR_MESSAGE_FOR_VIRTUAL_KEY}: {api_key}. tpm_limit: {tpm_limit_for_model}, current_tpm {current['current_tpm']} "
|
||||||
)
|
)
|
||||||
elif (
|
elif (
|
||||||
rpm_limit_for_model is not None
|
rpm_limit_for_model is not None
|
||||||
and current["current_rpm"] >= rpm_limit_for_model
|
and current["current_rpm"] >= rpm_limit_for_model
|
||||||
):
|
):
|
||||||
return self.raise_rate_limit_error(
|
return self.raise_rate_limit_error(
|
||||||
additional_details=f"Hit RPM limit for model: {_model} on api_key: {api_key}. rpm_limit: {rpm_limit_for_model}, current_rpm {current['current_rpm']} "
|
additional_details=f"Hit RPM limit for model: {_model} on {RATE_LIMIT_ERROR_MESSAGE_FOR_VIRTUAL_KEY}: {api_key}. rpm_limit: {rpm_limit_for_model}, current_rpm {current['current_rpm']} "
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
values_to_update_in_cache.append((request_count_api_key, new_val))
|
values_to_update_in_cache.append((request_count_api_key, new_val))
|
||||||
|
|
|
@ -1096,7 +1096,7 @@ async def test_pre_call_hook_rpm_limits_per_model():
|
||||||
assert e.status_code == 429
|
assert e.status_code == 429
|
||||||
print("got error=", e)
|
print("got error=", e)
|
||||||
assert (
|
assert (
|
||||||
"limit reached Hit RPM limit for model: azure-model on api_key: c11e7177eb60c80cf983ddf8ca98f2dc1272d4c612204ce9bedd2460b18939cc"
|
"limit reached Hit RPM limit for model: azure-model on LiteLLM Virtual Key user_api_key_hash: c11e7177eb60c80cf983ddf8ca98f2dc1272d4c612204ce9bedd2460b18939cc"
|
||||||
in str(e)
|
in str(e)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -1205,7 +1205,7 @@ async def test_pre_call_hook_tpm_limits_per_model():
|
||||||
assert e.status_code == 429
|
assert e.status_code == 429
|
||||||
print("got error=", e)
|
print("got error=", e)
|
||||||
assert (
|
assert (
|
||||||
"request limit reached Hit TPM limit for model: azure-model on api_key"
|
"request limit reached Hit TPM limit for model: azure-model on LiteLLM Virtual Key user_api_key_hash"
|
||||||
in str(e)
|
in str(e)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue