diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index b06faac328..7cb2714f42 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -61,6 +61,8 @@ model_list: litellm_settings: fallbacks: [{"openai-gpt-3.5": ["azure-gpt-3.5"]}] success_callback: ['langfuse'] + max_budget: 0.025 # global budget for proxy + budget_duration: 30d # global budget duration, will reset after 30d # cache: True # setting callback class # callbacks: custom_callbacks.proxy_handler_instance # sets litellm.callbacks = [proxy_handler_instance] diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index b53484b868..1de047cf9a 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -407,9 +407,19 @@ async def user_api_key_auth( user_max_budget is not None and user_current_spend is not None ): + asyncio.create_task( + proxy_logging_obj.budget_alerts( + user_max_budget=user_max_budget, + user_current_spend=user_current_spend, + type="user_and_proxy_budget", + user_info=_user, + ) + ) + + _user_id = _user.get("user_id", None) if user_current_spend > user_max_budget: raise Exception( - f"ExceededBudget: User {valid_token.user_id} has exceeded their budget. Current spend: {user_current_spend}; Max Budget: {user_max_budget}" + f"ExceededBudget: User {_user_id} has exceeded their budget. Current spend: {user_current_spend}; Max Budget: {user_max_budget}" ) else: # Token exists, not expired now check if its in budget for the user @@ -422,6 +432,15 @@ async def user_api_key_auth( user_max_budget is not None and user_current_spend is not None ): + asyncio.create_task( + proxy_logging_obj.budget_alerts( + user_max_budget=user_max_budget, + user_current_spend=user_current_spend, + type="user_budget", + user_info=user_id_information, + ) + ) + if user_current_spend > user_max_budget: raise Exception( f"ExceededBudget: User {valid_token.user_id} has exceeded their budget. Current spend: {user_current_spend}; Max Budget: {user_max_budget}" @@ -448,6 +467,15 @@ async def user_api_key_auth( # Check 4. Token Spend is under budget if valid_token.spend is not None and valid_token.max_budget is not None: + asyncio.create_task( + proxy_logging_obj.budget_alerts( + user_max_budget=valid_token.max_budget, + user_current_spend=valid_token.spend, + type="token_budget", + user_info=valid_token, + ) + ) + if valid_token.spend > valid_token.max_budget: raise Exception( f"ExceededTokenBudget: Current spend for token: {valid_token.spend}; Max Budget for Token: {valid_token.max_budget}" @@ -1625,11 +1653,12 @@ async def startup_event(): user_id="default_user_id", ) - if ( - prisma_client is not None - and litellm.max_budget > 0 - and litellm.budget_duration is not None - ): + if prisma_client is not None and litellm.max_budget > 0: + if litellm.budget_duration is None: + raise Exception( + "budget_duration not set on Proxy. budget_duration is required to use max_budget." + ) + # add proxy budget to db in the user table await generate_key_helper_fn( user_id=litellm_proxy_budget_name, diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index faa73d70b8..43c61e3975 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -181,6 +181,62 @@ class ProxyLogging: level="Low", ) + async def budget_alerts( + self, + type: Literal["token_budget", "user_budget", "user_and_proxy_budget"], + user_max_budget: float, + user_current_spend: float, + user_info=None, + ): + if self.alerting is None: + # do nothing if alerting is not switched on + return + + if type == "user_and_proxy_budget": + user_info = dict(user_info) + user_id = user_info["user_id"] + max_budget = user_info["max_budget"] + spend = user_info["spend"] + user_email = user_info["user_email"] + user_info = f"""\nUser ID: {user_id}\nMax Budget: {max_budget}\nSpend: {spend}\nUser Email: {user_email}""" + else: + user_info = str(user_info) + # percent of max_budget left to spend + percent_left = (user_max_budget - user_current_spend) / user_max_budget + verbose_proxy_logger.debug( + f"Budget Alerts: Percent left: {percent_left} for {user_info}" + ) + + # check if crossed budget + if user_current_spend >= user_max_budget: + verbose_proxy_logger.debug(f"Budget Crossed for {user_info}") + message = "Budget Crossed for" + user_info + await self.alerting_handler( + message=message, + level="High", + ) + return + + # check if 5% of max budget is left + if percent_left <= 0.05: + message = "5% budget left for" + user_info + await self.alerting_handler( + message=message, + level="Medium", + ) + return + + # check if 15% of max budget is left + if percent_left <= 0.15: + message = "15% budget left for" + user_info + await self.alerting_handler( + message=message, + level="Low", + ) + return + + return + async def alerting_handler( self, message: str, level: Literal["Low", "Medium", "High"] ): @@ -191,6 +247,8 @@ class ProxyLogging: - Requests are hanging - Calls are failing - DB Read/Writes are failing + - Proxy Close to max budget + - Key Close to max budget Parameters: level: str - Low|Medium|High - if calls might fail (Medium) or are failing (High); Currently, no alerts would be 'Low'.