forked from phoenix/litellm-mirror
Merge pull request #1615 from BerriAI/litellm_alerts_budget_tracking
[Feat] Alerts for Proxy Budgets
This commit is contained in:
commit
06a5dbfb5e
3 changed files with 95 additions and 6 deletions
|
@ -61,6 +61,8 @@ model_list:
|
||||||
litellm_settings:
|
litellm_settings:
|
||||||
fallbacks: [{"openai-gpt-3.5": ["azure-gpt-3.5"]}]
|
fallbacks: [{"openai-gpt-3.5": ["azure-gpt-3.5"]}]
|
||||||
success_callback: ['langfuse']
|
success_callback: ['langfuse']
|
||||||
|
max_budget: 0.025 # global budget for proxy
|
||||||
|
budget_duration: 30d # global budget duration, will reset after 30d
|
||||||
# cache: True
|
# cache: True
|
||||||
# setting callback class
|
# setting callback class
|
||||||
# callbacks: custom_callbacks.proxy_handler_instance # sets litellm.callbacks = [proxy_handler_instance]
|
# callbacks: custom_callbacks.proxy_handler_instance # sets litellm.callbacks = [proxy_handler_instance]
|
||||||
|
|
|
@ -407,9 +407,19 @@ async def user_api_key_auth(
|
||||||
user_max_budget is not None
|
user_max_budget is not None
|
||||||
and user_current_spend is not None
|
and user_current_spend is not None
|
||||||
):
|
):
|
||||||
|
asyncio.create_task(
|
||||||
|
proxy_logging_obj.budget_alerts(
|
||||||
|
user_max_budget=user_max_budget,
|
||||||
|
user_current_spend=user_current_spend,
|
||||||
|
type="user_and_proxy_budget",
|
||||||
|
user_info=_user,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
_user_id = _user.get("user_id", None)
|
||||||
if user_current_spend > user_max_budget:
|
if user_current_spend > user_max_budget:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
f"ExceededBudget: User {valid_token.user_id} has exceeded their budget. Current spend: {user_current_spend}; Max Budget: {user_max_budget}"
|
f"ExceededBudget: User {_user_id} has exceeded their budget. Current spend: {user_current_spend}; Max Budget: {user_max_budget}"
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# Token exists, not expired now check if its in budget for the user
|
# Token exists, not expired now check if its in budget for the user
|
||||||
|
@ -422,6 +432,15 @@ async def user_api_key_auth(
|
||||||
user_max_budget is not None
|
user_max_budget is not None
|
||||||
and user_current_spend is not None
|
and user_current_spend is not None
|
||||||
):
|
):
|
||||||
|
asyncio.create_task(
|
||||||
|
proxy_logging_obj.budget_alerts(
|
||||||
|
user_max_budget=user_max_budget,
|
||||||
|
user_current_spend=user_current_spend,
|
||||||
|
type="user_budget",
|
||||||
|
user_info=user_id_information,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
if user_current_spend > user_max_budget:
|
if user_current_spend > user_max_budget:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
f"ExceededBudget: User {valid_token.user_id} has exceeded their budget. Current spend: {user_current_spend}; Max Budget: {user_max_budget}"
|
f"ExceededBudget: User {valid_token.user_id} has exceeded their budget. Current spend: {user_current_spend}; Max Budget: {user_max_budget}"
|
||||||
|
@ -448,6 +467,15 @@ async def user_api_key_auth(
|
||||||
|
|
||||||
# Check 4. Token Spend is under budget
|
# Check 4. Token Spend is under budget
|
||||||
if valid_token.spend is not None and valid_token.max_budget is not None:
|
if valid_token.spend is not None and valid_token.max_budget is not None:
|
||||||
|
asyncio.create_task(
|
||||||
|
proxy_logging_obj.budget_alerts(
|
||||||
|
user_max_budget=valid_token.max_budget,
|
||||||
|
user_current_spend=valid_token.spend,
|
||||||
|
type="token_budget",
|
||||||
|
user_info=valid_token,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
if valid_token.spend > valid_token.max_budget:
|
if valid_token.spend > valid_token.max_budget:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
f"ExceededTokenBudget: Current spend for token: {valid_token.spend}; Max Budget for Token: {valid_token.max_budget}"
|
f"ExceededTokenBudget: Current spend for token: {valid_token.spend}; Max Budget for Token: {valid_token.max_budget}"
|
||||||
|
@ -1625,11 +1653,12 @@ async def startup_event():
|
||||||
user_id="default_user_id",
|
user_id="default_user_id",
|
||||||
)
|
)
|
||||||
|
|
||||||
if (
|
if prisma_client is not None and litellm.max_budget > 0:
|
||||||
prisma_client is not None
|
if litellm.budget_duration is None:
|
||||||
and litellm.max_budget > 0
|
raise Exception(
|
||||||
and litellm.budget_duration is not None
|
"budget_duration not set on Proxy. budget_duration is required to use max_budget."
|
||||||
):
|
)
|
||||||
|
|
||||||
# add proxy budget to db in the user table
|
# add proxy budget to db in the user table
|
||||||
await generate_key_helper_fn(
|
await generate_key_helper_fn(
|
||||||
user_id=litellm_proxy_budget_name,
|
user_id=litellm_proxy_budget_name,
|
||||||
|
|
|
@ -181,6 +181,62 @@ class ProxyLogging:
|
||||||
level="Low",
|
level="Low",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
async def budget_alerts(
|
||||||
|
self,
|
||||||
|
type: Literal["token_budget", "user_budget", "user_and_proxy_budget"],
|
||||||
|
user_max_budget: float,
|
||||||
|
user_current_spend: float,
|
||||||
|
user_info=None,
|
||||||
|
):
|
||||||
|
if self.alerting is None:
|
||||||
|
# do nothing if alerting is not switched on
|
||||||
|
return
|
||||||
|
|
||||||
|
if type == "user_and_proxy_budget":
|
||||||
|
user_info = dict(user_info)
|
||||||
|
user_id = user_info["user_id"]
|
||||||
|
max_budget = user_info["max_budget"]
|
||||||
|
spend = user_info["spend"]
|
||||||
|
user_email = user_info["user_email"]
|
||||||
|
user_info = f"""\nUser ID: {user_id}\nMax Budget: {max_budget}\nSpend: {spend}\nUser Email: {user_email}"""
|
||||||
|
else:
|
||||||
|
user_info = str(user_info)
|
||||||
|
# percent of max_budget left to spend
|
||||||
|
percent_left = (user_max_budget - user_current_spend) / user_max_budget
|
||||||
|
verbose_proxy_logger.debug(
|
||||||
|
f"Budget Alerts: Percent left: {percent_left} for {user_info}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# check if crossed budget
|
||||||
|
if user_current_spend >= user_max_budget:
|
||||||
|
verbose_proxy_logger.debug(f"Budget Crossed for {user_info}")
|
||||||
|
message = "Budget Crossed for" + user_info
|
||||||
|
await self.alerting_handler(
|
||||||
|
message=message,
|
||||||
|
level="High",
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
# check if 5% of max budget is left
|
||||||
|
if percent_left <= 0.05:
|
||||||
|
message = "5% budget left for" + user_info
|
||||||
|
await self.alerting_handler(
|
||||||
|
message=message,
|
||||||
|
level="Medium",
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
# check if 15% of max budget is left
|
||||||
|
if percent_left <= 0.15:
|
||||||
|
message = "15% budget left for" + user_info
|
||||||
|
await self.alerting_handler(
|
||||||
|
message=message,
|
||||||
|
level="Low",
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
async def alerting_handler(
|
async def alerting_handler(
|
||||||
self, message: str, level: Literal["Low", "Medium", "High"]
|
self, message: str, level: Literal["Low", "Medium", "High"]
|
||||||
):
|
):
|
||||||
|
@ -191,6 +247,8 @@ class ProxyLogging:
|
||||||
- Requests are hanging
|
- Requests are hanging
|
||||||
- Calls are failing
|
- Calls are failing
|
||||||
- DB Read/Writes are failing
|
- DB Read/Writes are failing
|
||||||
|
- Proxy Close to max budget
|
||||||
|
- Key Close to max budget
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
level: str - Low|Medium|High - if calls might fail (Medium) or are failing (High); Currently, no alerts would be 'Low'.
|
level: str - Low|Medium|High - if calls might fail (Medium) or are failing (High); Currently, no alerts would be 'Low'.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue