fix(proxy/utils.py): check cache before alerting user

2025-04-27 19:54:13 +00:00 · 2024-03-27 20:09:15 -07:00 · 2024-03-27 20:09:15 -07:00 · 7fe02405e0
commit 7fe02405e0
parent 265d32679e
2 changed files with 22 additions and 9 deletions
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@ -298,6 +298,7 @@ class ProxyLogging:
            return
        else:
            user_info = str(user_info)
        # percent of max_budget left to spend
        if user_max_budget > 0:
            percent_left = (user_max_budget - user_current_spend) / user_max_budget
@ -317,22 +318,35 @@ class ProxyLogging:
            )
            return
        ## PREVENTITIVE ALERTING ## - https://github.com/BerriAI/litellm/issues/2727
        # - Alert once within 28d period
        # - Cache this information
        # - Don't re-alert, if alert already sent
        _cache: DualCache = self.call_details["user_api_key_cache"]
        # check if 5% of max budget is left
        if percent_left <= 0.05:
            message = "5% budget left for" + user_info
            result = await _cache.async_get_cache(key=message)
            if result is None:
                await self.alerting_handler(
                    message=message,
                    level="Medium",
                )
                await _cache.async_set_cache(key=message, value="SENT", ttl=2419200)
            return
        # check if 15% of max budget is left
        if percent_left <= 0.15:
            message = "15% budget left for" + user_info
            result = await _cache.async_get_cache(key=message)
            if result is None:
                await self.alerting_handler(
                    message=message,
                    level="Low",
                )
                await _cache.async_set_cache(key=message, value="SENT", ttl=2419200)
            return
        return
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -2775,7 +2775,6 @@ def client(original_function):
                        or isinstance(e, openai.Timeout)
                        or isinstance(e, openai.APIConnectionError)
                    ):
                        print_verbose(f"RETRY TRIGGERED!")
                        kwargs["num_retries"] = num_retries
                        return litellm.completion_with_retries(*args, **kwargs)
                elif (