mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 19:54:13 +00:00
fix(proxy/utils.py): check cache before alerting user
This commit is contained in:
parent
265d32679e
commit
7fe02405e0
2 changed files with 22 additions and 9 deletions
|
@ -298,6 +298,7 @@ class ProxyLogging:
|
||||||
return
|
return
|
||||||
else:
|
else:
|
||||||
user_info = str(user_info)
|
user_info = str(user_info)
|
||||||
|
|
||||||
# percent of max_budget left to spend
|
# percent of max_budget left to spend
|
||||||
if user_max_budget > 0:
|
if user_max_budget > 0:
|
||||||
percent_left = (user_max_budget - user_current_spend) / user_max_budget
|
percent_left = (user_max_budget - user_current_spend) / user_max_budget
|
||||||
|
@ -317,22 +318,35 @@ class ProxyLogging:
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
## PREVENTITIVE ALERTING ## - https://github.com/BerriAI/litellm/issues/2727
|
||||||
|
# - Alert once within 28d period
|
||||||
|
# - Cache this information
|
||||||
|
# - Don't re-alert, if alert already sent
|
||||||
|
_cache: DualCache = self.call_details["user_api_key_cache"]
|
||||||
|
|
||||||
# check if 5% of max budget is left
|
# check if 5% of max budget is left
|
||||||
if percent_left <= 0.05:
|
if percent_left <= 0.05:
|
||||||
message = "5% budget left for" + user_info
|
message = "5% budget left for" + user_info
|
||||||
|
result = await _cache.async_get_cache(key=message)
|
||||||
|
if result is None:
|
||||||
await self.alerting_handler(
|
await self.alerting_handler(
|
||||||
message=message,
|
message=message,
|
||||||
level="Medium",
|
level="Medium",
|
||||||
)
|
)
|
||||||
|
await _cache.async_set_cache(key=message, value="SENT", ttl=2419200)
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
# check if 15% of max budget is left
|
# check if 15% of max budget is left
|
||||||
if percent_left <= 0.15:
|
if percent_left <= 0.15:
|
||||||
message = "15% budget left for" + user_info
|
message = "15% budget left for" + user_info
|
||||||
|
result = await _cache.async_get_cache(key=message)
|
||||||
|
if result is None:
|
||||||
await self.alerting_handler(
|
await self.alerting_handler(
|
||||||
message=message,
|
message=message,
|
||||||
level="Low",
|
level="Low",
|
||||||
)
|
)
|
||||||
|
await _cache.async_set_cache(key=message, value="SENT", ttl=2419200)
|
||||||
return
|
return
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
|
@ -2775,7 +2775,6 @@ def client(original_function):
|
||||||
or isinstance(e, openai.Timeout)
|
or isinstance(e, openai.Timeout)
|
||||||
or isinstance(e, openai.APIConnectionError)
|
or isinstance(e, openai.APIConnectionError)
|
||||||
):
|
):
|
||||||
print_verbose(f"RETRY TRIGGERED!")
|
|
||||||
kwargs["num_retries"] = num_retries
|
kwargs["num_retries"] = num_retries
|
||||||
return litellm.completion_with_retries(*args, **kwargs)
|
return litellm.completion_with_retries(*args, **kwargs)
|
||||||
elif (
|
elif (
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue