diff --git a/litellm/exceptions.py b/litellm/exceptions.py index d189b7ebe..abddb108a 100644 --- a/litellm/exceptions.py +++ b/litellm/exceptions.py @@ -314,6 +314,7 @@ class BudgetExceededError(Exception): self.current_cost = current_cost self.max_budget = max_budget message = f"Budget has been exceeded! Current cost: {current_cost}, Max budget: {max_budget}" + self.message = message super().__init__(message) diff --git a/litellm/integrations/slack_alerting.py b/litellm/integrations/slack_alerting.py index c1fdc553b..08af88562 100644 --- a/litellm/integrations/slack_alerting.py +++ b/litellm/integrations/slack_alerting.py @@ -1443,7 +1443,9 @@ Model Info: if response.status_code == 200: pass else: - print("Error sending slack alert. Error=", response.text) # noqa + verbose_proxy_logger.debug( + "Error sending slack alert. Error=", response.text + ) async def async_log_success_event(self, kwargs, response_obj, start_time, end_time): """Log deployment latency""" diff --git a/litellm/proxy/auth/auth_checks.py b/litellm/proxy/auth/auth_checks.py index d06469b71..a6e97960e 100644 --- a/litellm/proxy/auth/auth_checks.py +++ b/litellm/proxy/auth/auth_checks.py @@ -193,13 +193,27 @@ async def get_end_user_object( if end_user_id is None: return None _key = "end_user_id:{}".format(end_user_id) + + def check_in_budget(end_user_obj: LiteLLM_EndUserTable): + if end_user_obj.litellm_budget_table is None: + return + end_user_budget = end_user_obj.litellm_budget_table.max_budget + if end_user_budget is not None and end_user_obj.spend > end_user_budget: + raise litellm.BudgetExceededError( + current_cost=end_user_obj.spend, max_budget=end_user_budget + ) + # check if in cache cached_user_obj = await user_api_key_cache.async_get_cache(key=_key) if cached_user_obj is not None: if isinstance(cached_user_obj, dict): - return LiteLLM_EndUserTable(**cached_user_obj) + return_obj = LiteLLM_EndUserTable(**cached_user_obj) + check_in_budget(end_user_obj=return_obj) + return return_obj elif isinstance(cached_user_obj, LiteLLM_EndUserTable): - return cached_user_obj + return_obj = cached_user_obj + check_in_budget(end_user_obj=return_obj) + return return_obj # else, check db try: response = await prisma_client.db.litellm_endusertable.find_unique( @@ -217,8 +231,12 @@ async def get_end_user_object( _response = LiteLLM_EndUserTable(**response.dict()) + check_in_budget(end_user_obj=_response) + return _response except Exception as e: # if end-user not in db + if isinstance(e, litellm.BudgetExceededError): + raise e return None diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 2e9c25665..f75a1e9d1 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -722,6 +722,8 @@ async def user_api_key_auth( budget_info.max_budget ) except Exception as e: + if isinstance(e, litellm.BudgetExceededError): + raise e verbose_proxy_logger.debug( "Unable to find user in db. Error - {}".format(str(e)) ) @@ -1410,6 +1412,10 @@ async def user_api_key_auth( raise Exception() except Exception as e: traceback.print_exc() + if isinstance(e, litellm.BudgetExceededError): + raise ProxyException( + message=e.message, type="auth_error", param=None, code=400 + ) if isinstance(e, HTTPException): raise ProxyException( message=getattr(e, "detail", f"Authentication Error({str(e)})"),