diff --git a/litellm/__init__.py b/litellm/__init__.py index ea5844320..9e7c26186 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -174,6 +174,7 @@ upperbound_key_generate_params: Optional[Dict] = None default_user_params: Optional[Dict] = None default_team_settings: Optional[List] = None max_user_budget: Optional[float] = None +max_end_user_budget: Optional[float] = None #### RELIABILITY #### request_timeout: Optional[float] = 6000 num_retries: Optional[int] = None # per model endpoint diff --git a/litellm/proxy/auth/auth_checks.py b/litellm/proxy/auth/auth_checks.py index e9280fd31..794d393c1 100644 --- a/litellm/proxy/auth/auth_checks.py +++ b/litellm/proxy/auth/auth_checks.py @@ -69,7 +69,7 @@ def common_checks( end_user_budget = end_user_object.litellm_budget_table.max_budget if end_user_budget is not None and end_user_object.spend > end_user_budget: raise Exception( - f"End User={end_user_object.user_id} over budget. Spend={end_user_object.spend}, Budget={end_user_budget}" + f"ExceededBudget: End User={end_user_object.user_id} over budget. Spend={end_user_object.spend}, Budget={end_user_budget}" ) # 5. [OPTIONAL] If 'enforce_user_param' enabled - did developer pass in 'user' param for openai endpoints if ( diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 156157976..e5d57c8a6 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -692,7 +692,6 @@ async def user_api_key_auth( ) # Check 2. If user_id for this token is in budget - ## Check 2.2 [OPTIONAL - checked only if litellm.max_user_budget is not None] If 'user' passed in /chat/completions is in budget if valid_token.user_id is not None: user_id_list = [valid_token.user_id] for id in user_id_list: @@ -909,6 +908,13 @@ async def user_api_key_auth( models=valid_token.team_models, ) + _end_user_object = None + if "user" in request_data: + _id = "end_user_id:{}".format(request_data["user"]) + _end_user_object = await user_api_key_cache.async_get_cache(key=_id) + if _end_user_object is not None: + _end_user_object = LiteLLM_EndUserTable(**_end_user_object) + global_proxy_spend = None if litellm.max_budget > 0: # user set proxy max budget # check cache @@ -947,7 +953,7 @@ async def user_api_key_auth( _ = common_checks( request_body=request_data, team_object=_team_obj, - end_user_object=None, + end_user_object=_end_user_object, general_settings=general_settings, global_proxy_spend=global_proxy_spend, route=route, @@ -1617,7 +1623,7 @@ async def update_cache( async def _update_user_cache(): ## UPDATE CACHE FOR USER ID + GLOBAL PROXY - user_ids = [user_id, litellm_proxy_budget_name, end_user_id] + user_ids = [user_id, litellm_proxy_budget_name] try: for _id in user_ids: # Fetch the existing cost for the given user @@ -1664,8 +1670,7 @@ async def update_cache( ) async def _update_end_user_cache(): - ## UPDATE CACHE FOR USER ID + GLOBAL PROXY - _id = end_user_id + _id = "end_user_id:{}".format(end_user_id) try: # Fetch the existing cost for the given user existing_spend_obj = await user_api_key_cache.async_get_cache(key=_id) @@ -1673,14 +1678,14 @@ async def update_cache( # if user does not exist in LiteLLM_UserTable, create a new user existing_spend = 0 max_user_budget = None - if litellm.max_user_budget is not None: - max_user_budget = litellm.max_user_budget + if litellm.max_end_user_budget is not None: + max_end_user_budget = litellm.max_end_user_budget existing_spend_obj = LiteLLM_EndUserTable( user_id=_id, spend=0, blocked=False, litellm_budget_table=LiteLLM_BudgetTable( - max_budget=max_user_budget + max_budget=max_end_user_budget ), ) verbose_proxy_logger.debug( diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index f313a7985..f70d67aac 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -1941,9 +1941,9 @@ async def update_spend( end_user_id, response_cost, ) in prisma_client.end_user_list_transactons.items(): - max_user_budget = None - if litellm.max_user_budget is not None: - max_user_budget = litellm.max_user_budget + max_end_user_budget = None + if litellm.max_end_user_budget is not None: + max_end_user_budget = litellm.max_end_user_budget new_user_obj = LiteLLM_EndUserTable( user_id=end_user_id, spend=response_cost, blocked=False ) diff --git a/litellm/tests/test_key_generate_prisma.py b/litellm/tests/test_key_generate_prisma.py index 103b344f5..29468c449 100644 --- a/litellm/tests/test_key_generate_prisma.py +++ b/litellm/tests/test_key_generate_prisma.py @@ -324,7 +324,7 @@ def test_call_with_end_user_over_budget(prisma_client): setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client) setattr(litellm.proxy.proxy_server, "master_key", "sk-1234") - setattr(litellm, "max_user_budget", 0.00001) + setattr(litellm, "max_end_user_budget", 0.00001) try: async def test(): @@ -378,7 +378,9 @@ def test_call_with_end_user_over_budget(prisma_client): "user_api_key_user_id": user, }, "proxy_server_request": { - "user": user, + "body": { + "user": user, + } }, }, "response_cost": 10,