forked from phoenix/litellm-mirror
fix(proxy_server.py): enforce end user budgets with 'litellm.max_end_user_budget' param
This commit is contained in:
parent
786116783f
commit
5280fc809f
5 changed files with 22 additions and 14 deletions
|
@ -174,6 +174,7 @@ upperbound_key_generate_params: Optional[Dict] = None
|
|||
default_user_params: Optional[Dict] = None
|
||||
default_team_settings: Optional[List] = None
|
||||
max_user_budget: Optional[float] = None
|
||||
max_end_user_budget: Optional[float] = None
|
||||
#### RELIABILITY ####
|
||||
request_timeout: Optional[float] = 6000
|
||||
num_retries: Optional[int] = None # per model endpoint
|
||||
|
|
|
@ -69,7 +69,7 @@ def common_checks(
|
|||
end_user_budget = end_user_object.litellm_budget_table.max_budget
|
||||
if end_user_budget is not None and end_user_object.spend > end_user_budget:
|
||||
raise Exception(
|
||||
f"End User={end_user_object.user_id} over budget. Spend={end_user_object.spend}, Budget={end_user_budget}"
|
||||
f"ExceededBudget: End User={end_user_object.user_id} over budget. Spend={end_user_object.spend}, Budget={end_user_budget}"
|
||||
)
|
||||
# 5. [OPTIONAL] If 'enforce_user_param' enabled - did developer pass in 'user' param for openai endpoints
|
||||
if (
|
||||
|
|
|
@ -692,7 +692,6 @@ async def user_api_key_auth(
|
|||
)
|
||||
|
||||
# Check 2. If user_id for this token is in budget
|
||||
## Check 2.2 [OPTIONAL - checked only if litellm.max_user_budget is not None] If 'user' passed in /chat/completions is in budget
|
||||
if valid_token.user_id is not None:
|
||||
user_id_list = [valid_token.user_id]
|
||||
for id in user_id_list:
|
||||
|
@ -909,6 +908,13 @@ async def user_api_key_auth(
|
|||
models=valid_token.team_models,
|
||||
)
|
||||
|
||||
_end_user_object = None
|
||||
if "user" in request_data:
|
||||
_id = "end_user_id:{}".format(request_data["user"])
|
||||
_end_user_object = await user_api_key_cache.async_get_cache(key=_id)
|
||||
if _end_user_object is not None:
|
||||
_end_user_object = LiteLLM_EndUserTable(**_end_user_object)
|
||||
|
||||
global_proxy_spend = None
|
||||
if litellm.max_budget > 0: # user set proxy max budget
|
||||
# check cache
|
||||
|
@ -947,7 +953,7 @@ async def user_api_key_auth(
|
|||
_ = common_checks(
|
||||
request_body=request_data,
|
||||
team_object=_team_obj,
|
||||
end_user_object=None,
|
||||
end_user_object=_end_user_object,
|
||||
general_settings=general_settings,
|
||||
global_proxy_spend=global_proxy_spend,
|
||||
route=route,
|
||||
|
@ -1617,7 +1623,7 @@ async def update_cache(
|
|||
|
||||
async def _update_user_cache():
|
||||
## UPDATE CACHE FOR USER ID + GLOBAL PROXY
|
||||
user_ids = [user_id, litellm_proxy_budget_name, end_user_id]
|
||||
user_ids = [user_id, litellm_proxy_budget_name]
|
||||
try:
|
||||
for _id in user_ids:
|
||||
# Fetch the existing cost for the given user
|
||||
|
@ -1664,8 +1670,7 @@ async def update_cache(
|
|||
)
|
||||
|
||||
async def _update_end_user_cache():
|
||||
## UPDATE CACHE FOR USER ID + GLOBAL PROXY
|
||||
_id = end_user_id
|
||||
_id = "end_user_id:{}".format(end_user_id)
|
||||
try:
|
||||
# Fetch the existing cost for the given user
|
||||
existing_spend_obj = await user_api_key_cache.async_get_cache(key=_id)
|
||||
|
@ -1673,14 +1678,14 @@ async def update_cache(
|
|||
# if user does not exist in LiteLLM_UserTable, create a new user
|
||||
existing_spend = 0
|
||||
max_user_budget = None
|
||||
if litellm.max_user_budget is not None:
|
||||
max_user_budget = litellm.max_user_budget
|
||||
if litellm.max_end_user_budget is not None:
|
||||
max_end_user_budget = litellm.max_end_user_budget
|
||||
existing_spend_obj = LiteLLM_EndUserTable(
|
||||
user_id=_id,
|
||||
spend=0,
|
||||
blocked=False,
|
||||
litellm_budget_table=LiteLLM_BudgetTable(
|
||||
max_budget=max_user_budget
|
||||
max_budget=max_end_user_budget
|
||||
),
|
||||
)
|
||||
verbose_proxy_logger.debug(
|
||||
|
|
|
@ -1941,9 +1941,9 @@ async def update_spend(
|
|||
end_user_id,
|
||||
response_cost,
|
||||
) in prisma_client.end_user_list_transactons.items():
|
||||
max_user_budget = None
|
||||
if litellm.max_user_budget is not None:
|
||||
max_user_budget = litellm.max_user_budget
|
||||
max_end_user_budget = None
|
||||
if litellm.max_end_user_budget is not None:
|
||||
max_end_user_budget = litellm.max_end_user_budget
|
||||
new_user_obj = LiteLLM_EndUserTable(
|
||||
user_id=end_user_id, spend=response_cost, blocked=False
|
||||
)
|
||||
|
|
|
@ -324,7 +324,7 @@ def test_call_with_end_user_over_budget(prisma_client):
|
|||
|
||||
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
||||
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
||||
setattr(litellm, "max_user_budget", 0.00001)
|
||||
setattr(litellm, "max_end_user_budget", 0.00001)
|
||||
try:
|
||||
|
||||
async def test():
|
||||
|
@ -378,7 +378,9 @@ def test_call_with_end_user_over_budget(prisma_client):
|
|||
"user_api_key_user_id": user,
|
||||
},
|
||||
"proxy_server_request": {
|
||||
"body": {
|
||||
"user": user,
|
||||
}
|
||||
},
|
||||
},
|
||||
"response_cost": 10,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue