mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 11:43:54 +00:00
(feat) budgets per model
This commit is contained in:
parent
e8dcf8fa13
commit
4ea354ee6e
1 changed files with 24 additions and 1 deletions
|
@ -376,6 +376,11 @@ async def user_api_key_auth(
|
||||||
# 3. If 'user' passed to /chat/completions, /embeddings endpoint is in budget
|
# 3. If 'user' passed to /chat/completions, /embeddings endpoint is in budget
|
||||||
# 4. If token is expired
|
# 4. If token is expired
|
||||||
# 5. If token spend is under Budget for the token
|
# 5. If token spend is under Budget for the token
|
||||||
|
# 6. If token spend per model is under budget per model
|
||||||
|
|
||||||
|
request_data = await _read_request_body(
|
||||||
|
request=request
|
||||||
|
) # request data, used across all checks. Making this easily available
|
||||||
|
|
||||||
# Check 1. If token can call model
|
# Check 1. If token can call model
|
||||||
litellm.model_alias_map = valid_token.aliases
|
litellm.model_alias_map = valid_token.aliases
|
||||||
|
@ -450,7 +455,6 @@ async def user_api_key_auth(
|
||||||
if (
|
if (
|
||||||
litellm.max_user_budget is not None
|
litellm.max_user_budget is not None
|
||||||
): # Check if 'user' passed in /chat/completions is in budget, only checked if litellm.max_user_budget is set
|
): # Check if 'user' passed in /chat/completions is in budget, only checked if litellm.max_user_budget is set
|
||||||
request_data = await _read_request_body(request=request)
|
|
||||||
user_passed_to_chat_completions = request_data.get("user", None)
|
user_passed_to_chat_completions = request_data.get("user", None)
|
||||||
if user_passed_to_chat_completions is not None:
|
if user_passed_to_chat_completions is not None:
|
||||||
user_id_list.append(user_passed_to_chat_completions)
|
user_id_list.append(user_passed_to_chat_completions)
|
||||||
|
@ -587,6 +591,25 @@ async def user_api_key_auth(
|
||||||
f"ExceededTokenBudget: Current spend for token: {valid_token.spend}; Max Budget for Token: {valid_token.max_budget}"
|
f"ExceededTokenBudget: Current spend for token: {valid_token.spend}; Max Budget for Token: {valid_token.max_budget}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Check 5. Token Model Spend is under Model budget
|
||||||
|
max_budget_per_model = valid_token.model_max_budget
|
||||||
|
spend_per_model = valid_token.model_spend
|
||||||
|
|
||||||
|
if max_budget_per_model is not None and spend_per_model is not None:
|
||||||
|
current_model = request_data.get("model")
|
||||||
|
if current_model is not None:
|
||||||
|
current_model_spend = spend_per_model.get(current_model, None)
|
||||||
|
current_model_budget = max_budget_per_model.get(current_model, None)
|
||||||
|
|
||||||
|
if (
|
||||||
|
current_model_spend is not None
|
||||||
|
and current_model_budget is not None
|
||||||
|
):
|
||||||
|
if current_model_spend > current_model_budget:
|
||||||
|
raise Exception(
|
||||||
|
f"ExceededModelBudget: Current spend for model: {current_model_spend}; Max Budget for Model: {current_model_budget}"
|
||||||
|
)
|
||||||
|
|
||||||
# Token passed all checks
|
# Token passed all checks
|
||||||
api_key = valid_token.token
|
api_key = valid_token.token
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue