From d328a4bad0bd68acf7404e1ffba5793929104f1f Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Thu, 25 Jan 2024 09:58:43 -0800 Subject: [PATCH 01/10] v0 basic structure --- litellm/proxy/proxy_server.py | 26 ++++++++++++++++++++++++++ litellm/proxy/utils.py | 10 ++++++++++ 2 files changed, 36 insertions(+) diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index c13849e2a..8b42b94a9 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -407,6 +407,14 @@ async def user_api_key_auth( user_max_budget is not None and user_current_spend is not None ): + asyncio.create_task( + proxy_logging_obj.budget_alerts( + user_max_budget=user_max_budget, + user_current_spend=user_current_spend, + type="user_and_proxy_budget", + user_info=_user, + ) + ) if user_current_spend > user_max_budget: raise Exception( f"ExceededBudget: User {valid_token.user_id} has exceeded their budget. Current spend: {user_current_spend}; Max Budget: {user_max_budget}" @@ -422,6 +430,15 @@ async def user_api_key_auth( user_max_budget is not None and user_current_spend is not None ): + asyncio.create_task( + proxy_logging_obj.budget_alerts( + user_max_budget=user_max_budget, + user_current_spend=user_current_spend, + type="user_budget", + user_info=user_id_information, + ) + ) + if user_current_spend > user_max_budget: raise Exception( f"ExceededBudget: User {valid_token.user_id} has exceeded their budget. Current spend: {user_current_spend}; Max Budget: {user_max_budget}" @@ -448,6 +465,15 @@ async def user_api_key_auth( # Check 4. Token Spend is under budget if valid_token.spend is not None and valid_token.max_budget is not None: + asyncio.create_task( + proxy_logging_obj.budget_alerts( + user_max_budget=valid_token.max_budget, + user_current_spend=valid_token.spend, + type="token_budget", + user_info=valid_token, + ) + ) + if valid_token.spend > valid_token.max_budget: raise Exception( f"ExceededTokenBudget: Current spend for token: {valid_token.spend}; Max Budget for Token: {valid_token.max_budget}" diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index faa73d70b..998540578 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -181,6 +181,14 @@ class ProxyLogging: level="Low", ) + async def budget_alerts( + self, + type: Literal["token_budget", "user_budget", "user_and_proxy_budget"], + user_max_budget: float, + user_current_spend: float, + ): + pass + async def alerting_handler( self, message: str, level: Literal["Low", "Medium", "High"] ): @@ -191,6 +199,8 @@ class ProxyLogging: - Requests are hanging - Calls are failing - DB Read/Writes are failing + - Proxy Close to max budget + - Key Close to max budget Parameters: level: str - Low|Medium|High - if calls might fail (Medium) or are failing (High); Currently, no alerts would be 'Low'. From 1ab713c76c2d2692896b4b6d2abad604b4398e09 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Thu, 25 Jan 2024 10:01:32 -0800 Subject: [PATCH 02/10] (feat) alerts proxy budgets --- litellm/proxy/utils.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index 998540578..2130d0e01 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -186,7 +186,23 @@ class ProxyLogging: type: Literal["token_budget", "user_budget", "user_and_proxy_budget"], user_max_budget: float, user_current_spend: float, + user_info=None, ): + # percent of max_budget left to spend + percent_left = (user_max_budget - user_current_spend) / user_max_budget + + # check if 15% of max budget is left + if percent_left <= 0.15: + pass + + # check if 5% of max budget is left + if percent_left <= 0.05: + pass + + # check if crossed budget + if user_current_spend >= user_max_budget: + pass + pass async def alerting_handler( From 3ef2afb0e4b8caeecaa2d22ccfc1ef50796c5ff1 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Thu, 25 Jan 2024 11:18:06 -0800 Subject: [PATCH 03/10] (feat) slack alerting budgets --- litellm/proxy/utils.py | 45 +++++++++++++++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index 2130d0e01..e955fb4a0 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -188,20 +188,47 @@ class ProxyLogging: user_current_spend: float, user_info=None, ): + if type == "user_and_proxy_budget": + user_info = dict(user_info) + user_id = user_info["user_id"] + max_budget = user_info["max_budget"] + spend = user_info["spend"] + user_email = user_info["user_email"] + user_info = f"""\nUser ID: {user_id}\nMax Budget: {max_budget}\nSpend: {spend}\nUser Email: {user_email}""" + else: + user_info = str(user_info) # percent of max_budget left to spend percent_left = (user_max_budget - user_current_spend) / user_max_budget - - # check if 15% of max budget is left - if percent_left <= 0.15: - pass - - # check if 5% of max budget is left - if percent_left <= 0.05: - pass + verbose_proxy_logger.debug( + f"Bduget Alerts: Percent left: {percent_left} for {user_info}" + ) # check if crossed budget if user_current_spend >= user_max_budget: - pass + message = "Budget Crossed for" + user_info + await self.alerting_handler( + message=message, + level="High", + ) + return + + # check if 5% of max budget is left + if percent_left <= 0.05: + message = "5 Percent budget left for" + user_info + await self.alerting_handler( + message=message, + level="Medium", + ) + return + + # check if 15% of max budget is left + if percent_left <= 0.15: + message = "15 Percent budget left for" + user_info + await self.alerting_handler( + message=message, + level="Low", + ) + return pass From 126b87e3fa786e4cdc15092e7616b4eb7fa7410c Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Thu, 25 Jan 2024 11:32:05 -0800 Subject: [PATCH 04/10] (fix) raise exception budget_duration is set and max_budget is Not --- litellm/proxy/proxy_server.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 8b42b94a9..6004fd836 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -1651,11 +1651,12 @@ async def startup_event(): user_id="default_user_id", ) - if ( - prisma_client is not None - and litellm.max_budget > 0 - and litellm.budget_duration is not None - ): + if prisma_client is not None and litellm.max_budget > 0: + if litellm.budget_duration is None: + raise Exception( + "budget_duration not set on Proxy. budget_duration is required to use max_budget." + ) + # add proxy budget to db in the user table await generate_key_helper_fn( user_id=litellm_proxy_budget_name, From 450b0a0ad17adbe9887ed6c41b3dd81037056124 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Thu, 25 Jan 2024 11:39:57 -0800 Subject: [PATCH 05/10] (fix) raise correct error when proxy crossed budget --- litellm/proxy/proxy_server.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 6004fd836..a10cda723 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -415,9 +415,11 @@ async def user_api_key_auth( user_info=_user, ) ) + + _user_id = _user.get("user_id", None) if user_current_spend > user_max_budget: raise Exception( - f"ExceededBudget: User {valid_token.user_id} has exceeded their budget. Current spend: {user_current_spend}; Max Budget: {user_max_budget}" + f"ExceededBudget: User {_user_id} has exceeded their budget. Current spend: {user_current_spend}; Max Budget: {user_max_budget}" ) else: # Token exists, not expired now check if its in budget for the user From b3f91844cb85f52d053d30566fbdae913a46b604 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Thu, 25 Jan 2024 11:40:20 -0800 Subject: [PATCH 06/10] (fix) better alert message on budgets --- litellm/proxy/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index e955fb4a0..a60041d81 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -214,7 +214,7 @@ class ProxyLogging: # check if 5% of max budget is left if percent_left <= 0.05: - message = "5 Percent budget left for" + user_info + message = "5% budget left for" + user_info await self.alerting_handler( message=message, level="Medium", @@ -223,7 +223,7 @@ class ProxyLogging: # check if 15% of max budget is left if percent_left <= 0.15: - message = "15 Percent budget left for" + user_info + message = "15% budget left for" + user_info await self.alerting_handler( message=message, level="Low", From 6fb3f8f239b212dd4f0471023a7a10bcd092adaf Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Thu, 25 Jan 2024 11:40:56 -0800 Subject: [PATCH 07/10] (docs) track max_budget on proxy config.yaml --- litellm/proxy/proxy_config.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index b06faac32..65aa21d04 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -61,6 +61,8 @@ model_list: litellm_settings: fallbacks: [{"openai-gpt-3.5": ["azure-gpt-3.5"]}] success_callback: ['langfuse'] + max_budget: 0.025 + budget_duration: 30d # cache: True # setting callback class # callbacks: custom_callbacks.proxy_handler_instance # sets litellm.callbacks = [proxy_handler_instance] From 6dc9be4d43218616f193ef4b4fae5123e8427bd0 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Thu, 25 Jan 2024 11:41:35 -0800 Subject: [PATCH 08/10] (docs) config.yaml --- litellm/proxy/proxy_config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 65aa21d04..7cb2714f4 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -61,8 +61,8 @@ model_list: litellm_settings: fallbacks: [{"openai-gpt-3.5": ["azure-gpt-3.5"]}] success_callback: ['langfuse'] - max_budget: 0.025 - budget_duration: 30d + max_budget: 0.025 # global budget for proxy + budget_duration: 30d # global budget duration, will reset after 30d # cache: True # setting callback class # callbacks: custom_callbacks.proxy_handler_instance # sets litellm.callbacks = [proxy_handler_instance] From e80d32dcdd9b2590a139b9b3e623353f13d941ad Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Thu, 25 Jan 2024 11:56:52 -0800 Subject: [PATCH 09/10] (fix) alerting debug statements --- litellm/proxy/utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index a60041d81..e361a82d3 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -200,11 +200,12 @@ class ProxyLogging: # percent of max_budget left to spend percent_left = (user_max_budget - user_current_spend) / user_max_budget verbose_proxy_logger.debug( - f"Bduget Alerts: Percent left: {percent_left} for {user_info}" + f"Budget Alerts: Percent left: {percent_left} for {user_info}" ) # check if crossed budget if user_current_spend >= user_max_budget: + verbose_proxy_logger.debug(f"Budget Crossed for {user_info}") message = "Budget Crossed for" + user_info await self.alerting_handler( message=message, @@ -230,7 +231,7 @@ class ProxyLogging: ) return - pass + return async def alerting_handler( self, message: str, level: Literal["Low", "Medium", "High"] From ca12e703690237d05b9f448e3196302e5b2c340b Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Thu, 25 Jan 2024 11:58:55 -0800 Subject: [PATCH 10/10] (fix) do nothing if alerting is not switched on --- litellm/proxy/utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index e361a82d3..43c61e397 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -188,6 +188,10 @@ class ProxyLogging: user_current_spend: float, user_info=None, ): + if self.alerting is None: + # do nothing if alerting is not switched on + return + if type == "user_and_proxy_budget": user_info = dict(user_info) user_id = user_info["user_id"]