mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
v0 basic structure
This commit is contained in:
parent
72275ad8cb
commit
565531fe9e
2 changed files with 36 additions and 0 deletions
|
@ -407,6 +407,14 @@ async def user_api_key_auth(
|
||||||
user_max_budget is not None
|
user_max_budget is not None
|
||||||
and user_current_spend is not None
|
and user_current_spend is not None
|
||||||
):
|
):
|
||||||
|
asyncio.create_task(
|
||||||
|
proxy_logging_obj.budget_alerts(
|
||||||
|
user_max_budget=user_max_budget,
|
||||||
|
user_current_spend=user_current_spend,
|
||||||
|
type="user_and_proxy_budget",
|
||||||
|
user_info=_user,
|
||||||
|
)
|
||||||
|
)
|
||||||
if user_current_spend > user_max_budget:
|
if user_current_spend > user_max_budget:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
f"ExceededBudget: User {valid_token.user_id} has exceeded their budget. Current spend: {user_current_spend}; Max Budget: {user_max_budget}"
|
f"ExceededBudget: User {valid_token.user_id} has exceeded their budget. Current spend: {user_current_spend}; Max Budget: {user_max_budget}"
|
||||||
|
@ -422,6 +430,15 @@ async def user_api_key_auth(
|
||||||
user_max_budget is not None
|
user_max_budget is not None
|
||||||
and user_current_spend is not None
|
and user_current_spend is not None
|
||||||
):
|
):
|
||||||
|
asyncio.create_task(
|
||||||
|
proxy_logging_obj.budget_alerts(
|
||||||
|
user_max_budget=user_max_budget,
|
||||||
|
user_current_spend=user_current_spend,
|
||||||
|
type="user_budget",
|
||||||
|
user_info=user_id_information,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
if user_current_spend > user_max_budget:
|
if user_current_spend > user_max_budget:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
f"ExceededBudget: User {valid_token.user_id} has exceeded their budget. Current spend: {user_current_spend}; Max Budget: {user_max_budget}"
|
f"ExceededBudget: User {valid_token.user_id} has exceeded their budget. Current spend: {user_current_spend}; Max Budget: {user_max_budget}"
|
||||||
|
@ -448,6 +465,15 @@ async def user_api_key_auth(
|
||||||
|
|
||||||
# Check 4. Token Spend is under budget
|
# Check 4. Token Spend is under budget
|
||||||
if valid_token.spend is not None and valid_token.max_budget is not None:
|
if valid_token.spend is not None and valid_token.max_budget is not None:
|
||||||
|
asyncio.create_task(
|
||||||
|
proxy_logging_obj.budget_alerts(
|
||||||
|
user_max_budget=valid_token.max_budget,
|
||||||
|
user_current_spend=valid_token.spend,
|
||||||
|
type="token_budget",
|
||||||
|
user_info=valid_token,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
if valid_token.spend > valid_token.max_budget:
|
if valid_token.spend > valid_token.max_budget:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
f"ExceededTokenBudget: Current spend for token: {valid_token.spend}; Max Budget for Token: {valid_token.max_budget}"
|
f"ExceededTokenBudget: Current spend for token: {valid_token.spend}; Max Budget for Token: {valid_token.max_budget}"
|
||||||
|
|
|
@ -181,6 +181,14 @@ class ProxyLogging:
|
||||||
level="Low",
|
level="Low",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
async def budget_alerts(
|
||||||
|
self,
|
||||||
|
type: Literal["token_budget", "user_budget", "user_and_proxy_budget"],
|
||||||
|
user_max_budget: float,
|
||||||
|
user_current_spend: float,
|
||||||
|
):
|
||||||
|
pass
|
||||||
|
|
||||||
async def alerting_handler(
|
async def alerting_handler(
|
||||||
self, message: str, level: Literal["Low", "Medium", "High"]
|
self, message: str, level: Literal["Low", "Medium", "High"]
|
||||||
):
|
):
|
||||||
|
@ -191,6 +199,8 @@ class ProxyLogging:
|
||||||
- Requests are hanging
|
- Requests are hanging
|
||||||
- Calls are failing
|
- Calls are failing
|
||||||
- DB Read/Writes are failing
|
- DB Read/Writes are failing
|
||||||
|
- Proxy Close to max budget
|
||||||
|
- Key Close to max budget
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
level: str - Low|Medium|High - if calls might fail (Medium) or are failing (High); Currently, no alerts would be 'Low'.
|
level: str - Low|Medium|High - if calls might fail (Medium) or are failing (High); Currently, no alerts would be 'Low'.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue