mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
Merge pull request #2757 from BerriAI/litellm_fix_budget_alerts
fix(auth_checks.py): make global spend checks more accurate
This commit is contained in:
commit
6d9887969f
6 changed files with 147 additions and 39 deletions
|
@ -174,6 +174,7 @@ upperbound_key_generate_params: Optional[Dict] = None
|
|||
default_user_params: Optional[Dict] = None
|
||||
default_team_settings: Optional[List] = None
|
||||
max_user_budget: Optional[float] = None
|
||||
max_end_user_budget: Optional[float] = None
|
||||
#### RELIABILITY ####
|
||||
request_timeout: Optional[float] = 6000
|
||||
num_retries: Optional[int] = None # per model endpoint
|
||||
|
|
|
@ -5,10 +5,15 @@ model_list:
|
|||
api_key: my-fake-key
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
|
||||
litellm_settings:
|
||||
max_budget: 600020
|
||||
budget_duration: 30d
|
||||
|
||||
general_settings:
|
||||
master_key: sk-1234
|
||||
proxy_batch_write_at: 5 # 👈 Frequency of batch writing logs to server (in seconds)
|
||||
proxy_batch_write_at: 60 # 👈 Frequency of batch writing logs to server (in seconds)
|
||||
enable_jwt_auth: True
|
||||
alerting: ["slack"]
|
||||
litellm_jwtauth:
|
||||
admin_jwt_scope: "litellm_proxy_admin"
|
||||
team_jwt_scope: "litellm_team"
|
||||
|
|
|
@ -18,6 +18,7 @@ from litellm.proxy._types import (
|
|||
from typing import Optional, Literal, Union
|
||||
from litellm.proxy.utils import PrismaClient
|
||||
from litellm.caching import DualCache
|
||||
import litellm
|
||||
|
||||
all_routes = LiteLLMRoutes.openai_routes.value + LiteLLMRoutes.management_routes.value
|
||||
|
||||
|
@ -26,6 +27,7 @@ def common_checks(
|
|||
request_body: dict,
|
||||
team_object: LiteLLM_TeamTable,
|
||||
end_user_object: Optional[LiteLLM_EndUserTable],
|
||||
global_proxy_spend: Optional[float],
|
||||
general_settings: dict,
|
||||
route: str,
|
||||
) -> bool:
|
||||
|
@ -37,6 +39,7 @@ def common_checks(
|
|||
3. If team is in budget
|
||||
4. If end_user ('user' passed to /chat/completions, /embeddings endpoint) is in budget
|
||||
5. [OPTIONAL] If 'enforce_end_user' enabled - did developer pass in 'user' param for openai endpoints
|
||||
6. [OPTIONAL] If 'litellm.max_budget' is set (>0), is proxy under budget
|
||||
"""
|
||||
_model = request_body.get("model", None)
|
||||
if team_object.blocked == True:
|
||||
|
@ -66,7 +69,7 @@ def common_checks(
|
|||
end_user_budget = end_user_object.litellm_budget_table.max_budget
|
||||
if end_user_budget is not None and end_user_object.spend > end_user_budget:
|
||||
raise Exception(
|
||||
f"End User={end_user_object.user_id} over budget. Spend={end_user_object.spend}, Budget={end_user_budget}"
|
||||
f"ExceededBudget: End User={end_user_object.user_id} over budget. Spend={end_user_object.spend}, Budget={end_user_budget}"
|
||||
)
|
||||
# 5. [OPTIONAL] If 'enforce_user_param' enabled - did developer pass in 'user' param for openai endpoints
|
||||
if (
|
||||
|
@ -77,7 +80,12 @@ def common_checks(
|
|||
raise Exception(
|
||||
f"'user' param not passed in. 'enforce_user_param'={general_settings['enforce_user_param']}"
|
||||
)
|
||||
|
||||
# 6. [OPTIONAL] If 'litellm.max_budget' is set (>0), is proxy under budget
|
||||
if litellm.max_budget > 0 and global_proxy_spend is not None:
|
||||
if global_proxy_spend > litellm.max_budget:
|
||||
raise Exception(
|
||||
f"ExceededBudget: LiteLLM Proxy has exceeded its budget. Current spend: {global_proxy_spend}; Max Budget: {litellm.max_budget}"
|
||||
)
|
||||
return True
|
||||
|
||||
|
||||
|
|
|
@ -437,12 +437,49 @@ async def user_api_key_auth(
|
|||
key=end_user_id, value=end_user_object
|
||||
)
|
||||
|
||||
global_proxy_spend = None
|
||||
|
||||
if litellm.max_budget > 0: # user set proxy max budget
|
||||
# check cache
|
||||
global_proxy_spend = await user_api_key_cache.async_get_cache(
|
||||
key="{}:spend".format(litellm_proxy_admin_name)
|
||||
)
|
||||
if global_proxy_spend is None and prisma_client is not None:
|
||||
# get from db
|
||||
sql_query = """SELECT SUM(spend) as total_spend FROM "MonthlyGlobalSpend";"""
|
||||
|
||||
response = await prisma_client.db.query_raw(query=sql_query)
|
||||
|
||||
global_proxy_spend = response[0]["total_spend"]
|
||||
|
||||
await user_api_key_cache.async_set_cache(
|
||||
key="{}:spend".format(litellm_proxy_admin_name),
|
||||
value=global_proxy_spend,
|
||||
ttl=60,
|
||||
)
|
||||
if global_proxy_spend is not None:
|
||||
user_info = {
|
||||
"user_id": litellm_proxy_admin_name,
|
||||
"max_budget": litellm.max_budget,
|
||||
"spend": global_proxy_spend,
|
||||
"user_email": "",
|
||||
}
|
||||
asyncio.create_task(
|
||||
proxy_logging_obj.budget_alerts(
|
||||
user_max_budget=litellm.max_budget,
|
||||
user_current_spend=global_proxy_spend,
|
||||
type="user_and_proxy_budget",
|
||||
user_info=user_info,
|
||||
)
|
||||
)
|
||||
|
||||
# run through common checks
|
||||
_ = common_checks(
|
||||
request_body=request_data,
|
||||
team_object=team_object,
|
||||
end_user_object=end_user_object,
|
||||
general_settings=general_settings,
|
||||
global_proxy_spend=global_proxy_spend,
|
||||
route=route,
|
||||
)
|
||||
# save user object in cache
|
||||
|
@ -656,17 +693,8 @@ async def user_api_key_auth(
|
|||
)
|
||||
|
||||
# Check 2. If user_id for this token is in budget
|
||||
## Check 2.1 If global proxy is in budget
|
||||
## Check 2.2 [OPTIONAL - checked only if litellm.max_user_budget is not None] If 'user' passed in /chat/completions is in budget
|
||||
if valid_token.user_id is not None:
|
||||
user_id_list = [valid_token.user_id, litellm_proxy_budget_name]
|
||||
if (
|
||||
litellm.max_user_budget is not None
|
||||
): # Check if 'user' passed in /chat/completions is in budget, only checked if litellm.max_user_budget is set
|
||||
user_passed_to_chat_completions = request_data.get("user", None)
|
||||
if user_passed_to_chat_completions is not None:
|
||||
user_id_list.append(user_passed_to_chat_completions)
|
||||
|
||||
user_id_list = [valid_token.user_id]
|
||||
for id in user_id_list:
|
||||
value = user_api_key_cache.get_cache(key=id)
|
||||
if value is not None:
|
||||
|
@ -675,13 +703,12 @@ async def user_api_key_auth(
|
|||
user_id_information.append(value)
|
||||
if user_id_information is None or (
|
||||
isinstance(user_id_information, list)
|
||||
and len(user_id_information) < 2
|
||||
and len(user_id_information) < 1
|
||||
):
|
||||
if prisma_client is not None:
|
||||
user_id_information = await prisma_client.get_data(
|
||||
user_id_list=[
|
||||
valid_token.user_id,
|
||||
litellm_proxy_budget_name,
|
||||
],
|
||||
table_name="user",
|
||||
query_type="find_all",
|
||||
|
@ -881,11 +908,54 @@ async def user_api_key_auth(
|
|||
blocked=valid_token.team_blocked,
|
||||
models=valid_token.team_models,
|
||||
)
|
||||
|
||||
_end_user_object = None
|
||||
if "user" in request_data:
|
||||
_id = "end_user_id:{}".format(request_data["user"])
|
||||
_end_user_object = await user_api_key_cache.async_get_cache(key=_id)
|
||||
if _end_user_object is not None:
|
||||
_end_user_object = LiteLLM_EndUserTable(**_end_user_object)
|
||||
|
||||
global_proxy_spend = None
|
||||
if litellm.max_budget > 0: # user set proxy max budget
|
||||
# check cache
|
||||
global_proxy_spend = await user_api_key_cache.async_get_cache(
|
||||
key="{}:spend".format(litellm_proxy_admin_name)
|
||||
)
|
||||
if global_proxy_spend is None:
|
||||
# get from db
|
||||
sql_query = """SELECT SUM(spend) as total_spend FROM "MonthlyGlobalSpend";"""
|
||||
|
||||
response = await prisma_client.db.query_raw(query=sql_query)
|
||||
|
||||
global_proxy_spend = response[0]["total_spend"]
|
||||
await user_api_key_cache.async_set_cache(
|
||||
key="{}:spend".format(litellm_proxy_admin_name),
|
||||
value=global_proxy_spend,
|
||||
ttl=60,
|
||||
)
|
||||
|
||||
if global_proxy_spend is not None:
|
||||
user_info = {
|
||||
"user_id": litellm_proxy_admin_name,
|
||||
"max_budget": litellm.max_budget,
|
||||
"spend": global_proxy_spend,
|
||||
"user_email": "",
|
||||
}
|
||||
asyncio.create_task(
|
||||
proxy_logging_obj.budget_alerts(
|
||||
user_max_budget=litellm.max_budget,
|
||||
user_current_spend=global_proxy_spend,
|
||||
type="user_and_proxy_budget",
|
||||
user_info=user_info,
|
||||
)
|
||||
)
|
||||
_ = common_checks(
|
||||
request_body=request_data,
|
||||
team_object=_team_obj,
|
||||
end_user_object=None,
|
||||
end_user_object=_end_user_object,
|
||||
general_settings=general_settings,
|
||||
global_proxy_spend=global_proxy_spend,
|
||||
route=route,
|
||||
)
|
||||
# Token passed all checks
|
||||
|
@ -1553,7 +1623,7 @@ async def update_cache(
|
|||
|
||||
async def _update_user_cache():
|
||||
## UPDATE CACHE FOR USER ID + GLOBAL PROXY
|
||||
user_ids = [user_id, litellm_proxy_budget_name, end_user_id]
|
||||
user_ids = [user_id]
|
||||
try:
|
||||
for _id in user_ids:
|
||||
# Fetch the existing cost for the given user
|
||||
|
@ -1594,14 +1664,26 @@ async def update_cache(
|
|||
user_api_key_cache.set_cache(
|
||||
key=_id, value=existing_spend_obj.json()
|
||||
)
|
||||
## UPDATE GLOBAL PROXY ##
|
||||
global_proxy_spend = await user_api_key_cache.async_get_cache(
|
||||
key="{}:spend".format(litellm_proxy_admin_name)
|
||||
)
|
||||
if global_proxy_spend is None:
|
||||
await user_api_key_cache.async_set_cache(
|
||||
key="{}:spend".format(litellm_proxy_admin_name), value=response_cost
|
||||
)
|
||||
elif response_cost is not None and global_proxy_spend is not None:
|
||||
increment = global_proxy_spend + response_cost
|
||||
await user_api_key_cache.async_set_cache(
|
||||
key="{}:spend".format(litellm_proxy_admin_name), value=increment
|
||||
)
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.debug(
|
||||
f"An error occurred updating user cache: {str(e)}\n\n{traceback.format_exc()}"
|
||||
)
|
||||
|
||||
async def _update_end_user_cache():
|
||||
## UPDATE CACHE FOR USER ID + GLOBAL PROXY
|
||||
_id = end_user_id
|
||||
_id = "end_user_id:{}".format(end_user_id)
|
||||
try:
|
||||
# Fetch the existing cost for the given user
|
||||
existing_spend_obj = await user_api_key_cache.async_get_cache(key=_id)
|
||||
|
@ -1609,14 +1691,14 @@ async def update_cache(
|
|||
# if user does not exist in LiteLLM_UserTable, create a new user
|
||||
existing_spend = 0
|
||||
max_user_budget = None
|
||||
if litellm.max_user_budget is not None:
|
||||
max_user_budget = litellm.max_user_budget
|
||||
if litellm.max_end_user_budget is not None:
|
||||
max_end_user_budget = litellm.max_end_user_budget
|
||||
existing_spend_obj = LiteLLM_EndUserTable(
|
||||
user_id=_id,
|
||||
spend=0,
|
||||
blocked=False,
|
||||
litellm_budget_table=LiteLLM_BudgetTable(
|
||||
max_budget=max_user_budget
|
||||
max_budget=max_end_user_budget
|
||||
),
|
||||
)
|
||||
verbose_proxy_logger.debug(
|
||||
|
@ -4049,7 +4131,6 @@ async def generate_key_fn(
|
|||
)
|
||||
_budget_id = getattr(_budget, "budget_id", None)
|
||||
data_json = data.json() # type: ignore
|
||||
|
||||
# if we get max_budget passed to /key/generate, then use it as key_max_budget. Since generate_key_helper_fn is used to make new users
|
||||
if "max_budget" in data_json:
|
||||
data_json["key_max_budget"] = data_json.pop("max_budget", None)
|
||||
|
|
|
@ -1941,9 +1941,9 @@ async def update_spend(
|
|||
end_user_id,
|
||||
response_cost,
|
||||
) in prisma_client.end_user_list_transactons.items():
|
||||
max_user_budget = None
|
||||
if litellm.max_user_budget is not None:
|
||||
max_user_budget = litellm.max_user_budget
|
||||
max_end_user_budget = None
|
||||
if litellm.max_end_user_budget is not None:
|
||||
max_end_user_budget = litellm.max_end_user_budget
|
||||
new_user_obj = LiteLLM_EndUserTable(
|
||||
user_id=end_user_id, spend=response_cost, blocked=False
|
||||
)
|
||||
|
|
|
@ -324,7 +324,7 @@ def test_call_with_end_user_over_budget(prisma_client):
|
|||
|
||||
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
||||
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
||||
setattr(litellm, "max_user_budget", 0.00001)
|
||||
setattr(litellm, "max_end_user_budget", 0.00001)
|
||||
try:
|
||||
|
||||
async def test():
|
||||
|
@ -378,7 +378,9 @@ def test_call_with_end_user_over_budget(prisma_client):
|
|||
"user_api_key_user_id": user,
|
||||
},
|
||||
"proxy_server_request": {
|
||||
"user": user,
|
||||
"body": {
|
||||
"user": user,
|
||||
}
|
||||
},
|
||||
},
|
||||
"response_cost": 10,
|
||||
|
@ -407,18 +409,20 @@ def test_call_with_proxy_over_budget(prisma_client):
|
|||
litellm_proxy_budget_name = f"litellm-proxy-budget-{time.time()}"
|
||||
setattr(
|
||||
litellm.proxy.proxy_server,
|
||||
"litellm_proxy_budget_name",
|
||||
"litellm_proxy_admin_name",
|
||||
litellm_proxy_budget_name,
|
||||
)
|
||||
setattr(litellm, "max_budget", 0.00001)
|
||||
from litellm.proxy.proxy_server import user_api_key_cache
|
||||
|
||||
user_api_key_cache.set_cache(
|
||||
key="{}:spend".format(litellm_proxy_budget_name), value=0
|
||||
)
|
||||
setattr(litellm.proxy.proxy_server, "user_api_key_cache", user_api_key_cache)
|
||||
try:
|
||||
|
||||
async def test():
|
||||
await litellm.proxy.proxy_server.prisma_client.connect()
|
||||
## CREATE PROXY + USER BUDGET ##
|
||||
request = NewUserRequest(
|
||||
max_budget=0.00001, user_id=litellm_proxy_budget_name
|
||||
)
|
||||
await new_user(request)
|
||||
request = NewUserRequest()
|
||||
key = await new_user(request)
|
||||
print(key)
|
||||
|
@ -470,6 +474,7 @@ def test_call_with_proxy_over_budget(prisma_client):
|
|||
start_time=datetime.now(),
|
||||
end_time=datetime.now(),
|
||||
)
|
||||
|
||||
await asyncio.sleep(5)
|
||||
# use generated key to auth in
|
||||
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
||||
|
@ -571,9 +576,17 @@ def test_call_with_proxy_over_budget_stream(prisma_client):
|
|||
litellm_proxy_budget_name = f"litellm-proxy-budget-{time.time()}"
|
||||
setattr(
|
||||
litellm.proxy.proxy_server,
|
||||
"litellm_proxy_budget_name",
|
||||
"litellm_proxy_admin_name",
|
||||
litellm_proxy_budget_name,
|
||||
)
|
||||
setattr(litellm, "max_budget", 0.00001)
|
||||
from litellm.proxy.proxy_server import user_api_key_cache
|
||||
|
||||
user_api_key_cache.set_cache(
|
||||
key="{}:spend".format(litellm_proxy_budget_name), value=0
|
||||
)
|
||||
setattr(litellm.proxy.proxy_server, "user_api_key_cache", user_api_key_cache)
|
||||
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
import logging
|
||||
|
||||
|
@ -584,10 +597,10 @@ def test_call_with_proxy_over_budget_stream(prisma_client):
|
|||
async def test():
|
||||
await litellm.proxy.proxy_server.prisma_client.connect()
|
||||
## CREATE PROXY + USER BUDGET ##
|
||||
request = NewUserRequest(
|
||||
max_budget=0.00001, user_id=litellm_proxy_budget_name
|
||||
)
|
||||
await new_user(request)
|
||||
# request = NewUserRequest(
|
||||
# max_budget=0.00001, user_id=litellm_proxy_budget_name
|
||||
# )
|
||||
# await new_user(request)
|
||||
request = NewUserRequest()
|
||||
key = await new_user(request)
|
||||
print(key)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue