(feat proxy) v2 - model max budgets (#7302)

* clean up unused code

* add _PROXY_VirtualKeyModelMaxBudgetLimiter

* adjust type imports

* working _PROXY_VirtualKeyModelMaxBudgetLimiter

* fix user_api_key_model_max_budget

* fix user_api_key_model_max_budget

* update naming

* update naming

* fix changes to RouterBudgetLimiting

* test_call_with_key_over_model_budget

* test_call_with_key_over_model_budget

* handle _get_request_model_budget_config

* e2e test for test_call_with_key_over_model_budget

* clean up test

* run ci/cd again

* add validate_model_max_budget

* docs fix

* update doc

* add e2e testing for _PROXY_VirtualKeyModelMaxBudgetLimiter

* test_unit_test_max_model_budget_limiter.py
This commit is contained in:
Ishaan Jaff 2024-12-18 19:42:46 -08:00 committed by GitHub
parent 5253f639cd
commit 6261ec3599
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 628 additions and 261 deletions

View file

@ -262,6 +262,7 @@ async def user_api_key_auth( # noqa: PLR0915
llm_model_list,
llm_router,
master_key,
model_max_budget_limiter,
open_telemetry_logger,
prisma_client,
proxy_logging_obj,
@ -1053,37 +1054,10 @@ async def user_api_key_auth( # noqa: PLR0915
and valid_token.token is not None
):
## GET THE SPEND FOR THIS MODEL
twenty_eight_days_ago = datetime.now() - timedelta(days=28)
model_spend = await prisma_client.db.litellm_spendlogs.group_by(
by=["model"],
sum={"spend": True},
where={
"AND": [
{"api_key": valid_token.token},
{"startTime": {"gt": twenty_eight_days_ago}},
{"model": current_model},
]
}, # type: ignore
await model_max_budget_limiter.is_key_within_model_budget(
user_api_key_dict=valid_token,
model=current_model,
)
if (
len(model_spend) > 0
and max_budget_per_model.get(current_model, None) is not None
):
if (
"model" in model_spend[0]
and model_spend[0].get("model") == current_model
and "_sum" in model_spend[0]
and "spend" in model_spend[0]["_sum"]
and model_spend[0]["_sum"]["spend"]
>= max_budget_per_model[current_model]
):
current_model_spend = model_spend[0]["_sum"]["spend"]
current_model_budget = max_budget_per_model[current_model]
raise litellm.BudgetExceededError(
current_cost=current_model_spend,
max_budget=current_model_budget,
)
# Check 6. Team spend is under Team budget
if (
hasattr(valid_token, "team_spend")