Support budget/rate limit tiers for keys (#7429)

* feat(proxy/utils.py): get associated litellm budget from db in combined_view for key

allows user to create rate limit tiers and associate those to keys

* feat(proxy/_types.py): update the value of key-level tpm/rpm/model max budget metrics with the associated budget table values if set

allows rate limit tiers to be easily applied to keys

* docs(rate_limit_tiers.md): add doc on setting rate limit / budget tiers

make feature discoverable

* feat(key_management_endpoints.py): return litellm_budget_table value in key generate

make it easy for user to know associated budget on key creation

* fix(key_management_endpoints.py): document 'budget_id' param in `/key/generate`

* docs(key_management_endpoints.py): document budget_id usage

* refactor(budget_management_endpoints.py): refactor budget endpoints into separate file - makes it easier to run documentation testing against it

* docs(test_api_docs.py): add budget endpoints to ci/cd doc test + add missing param info to docs

* fix(customer_endpoints.py): use new pydantic obj name

* docs(user_management_heirarchy.md): add simple doc explaining teams/keys/org/users on litellm

* Litellm dev 12 26 2024 p2 (#7432)

* (Feat) Add logging for `POST v1/fine_tuning/jobs`  (#7426)

* init commit ft jobs logging

* add ft logging

* add logging for FineTuningJob

* simple FT Job create test

* (docs) - show all supported Azure OpenAI endpoints in overview  (#7428)

* azure batches

* update doc

* docs azure endpoints

* docs endpoints on azure

* docs azure batches api

* docs azure batches api

* fix(key_management_endpoints.py): fix key update to actually work

* test(test_key_management.py): add e2e test asserting ui key update call works

* fix: proxy/_types - fix linting erros

* test: update test

---------

Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com>

* fix: test

* fix(parallel_request_limiter.py): enforce tpm/rpm limits on key from tiers

* fix: fix linting errors

* test: fix test

* fix: remove unused import

* test: update test

* docs(customer_endpoints.py): document new model_max_budget param

* test: specify unique key alias

* docs(budget_management_endpoints.py): document new model_max_budget param

* test: fix test

* test: fix tests

---------

Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com>
This commit is contained in:
Krish Dholakia 2024-12-26 19:05:27 -08:00 committed by GitHub
parent 12c4e7e695
commit 539f166166
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
25 changed files with 764 additions and 376 deletions

View file

@ -178,6 +178,9 @@ from litellm.proxy.hooks.prompt_injection_detection import (
from litellm.proxy.hooks.proxy_failure_handler import _PROXY_failure_handler
from litellm.proxy.hooks.proxy_track_cost_callback import _PROXY_track_cost_callback
from litellm.proxy.litellm_pre_call_utils import add_litellm_data_to_request
from litellm.proxy.management_endpoints.budget_management_endpoints import (
router as budget_management_router,
)
from litellm.proxy.management_endpoints.customer_endpoints import (
router as customer_router,
)
@ -5531,238 +5534,6 @@ async def supported_openai_params(model: str):
)
#### BUDGET TABLE MANAGEMENT ####
@router.post(
"/budget/new",
tags=["budget management"],
dependencies=[Depends(user_api_key_auth)],
)
async def new_budget(
budget_obj: BudgetNew,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
):
"""
Create a new budget object. Can apply this to teams, orgs, end-users, keys.
"""
global prisma_client
if prisma_client is None:
raise HTTPException(
status_code=500,
detail={"error": CommonProxyErrors.db_not_connected_error.value},
)
response = await prisma_client.db.litellm_budgettable.create(
data={
**budget_obj.model_dump(exclude_none=True), # type: ignore
"created_by": user_api_key_dict.user_id or litellm_proxy_admin_name,
"updated_by": user_api_key_dict.user_id or litellm_proxy_admin_name,
} # type: ignore
)
return response
@router.post(
"/budget/update",
tags=["budget management"],
dependencies=[Depends(user_api_key_auth)],
)
async def update_budget(
budget_obj: BudgetNew,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
):
"""
Create a new budget object. Can apply this to teams, orgs, end-users, keys.
"""
global prisma_client
if prisma_client is None:
raise HTTPException(
status_code=500,
detail={"error": CommonProxyErrors.db_not_connected_error.value},
)
if budget_obj.budget_id is None:
raise HTTPException(status_code=400, detail={"error": "budget_id is required"})
response = await prisma_client.db.litellm_budgettable.update(
where={"budget_id": budget_obj.budget_id},
data={
**budget_obj.model_dump(exclude_none=True), # type: ignore
"updated_by": user_api_key_dict.user_id or litellm_proxy_admin_name,
}, # type: ignore
)
return response
@router.post(
"/budget/info",
tags=["budget management"],
dependencies=[Depends(user_api_key_auth)],
)
async def info_budget(data: BudgetRequest):
"""
Get the budget id specific information
"""
global prisma_client
if prisma_client is None:
raise HTTPException(status_code=500, detail={"error": "No db connected"})
if len(data.budgets) == 0:
raise HTTPException(
status_code=400,
detail={
"error": f"Specify list of budget id's to query. Passed in={data.budgets}"
},
)
response = await prisma_client.db.litellm_budgettable.find_many(
where={"budget_id": {"in": data.budgets}},
)
return response
@router.get(
"/budget/settings",
tags=["budget management"],
dependencies=[Depends(user_api_key_auth)],
)
async def budget_settings(
budget_id: str,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
):
"""
Get list of configurable params + current value for a budget item + description of each field
Used on Admin UI.
"""
if prisma_client is None:
raise HTTPException(
status_code=400,
detail={"error": CommonProxyErrors.db_not_connected_error.value},
)
if user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN:
raise HTTPException(
status_code=400,
detail={
"error": "{}, your role={}".format(
CommonProxyErrors.not_allowed_access.value,
user_api_key_dict.user_role,
)
},
)
## get budget item from db
db_budget_row = await prisma_client.db.litellm_budgettable.find_first(
where={"budget_id": budget_id}
)
if db_budget_row is not None:
db_budget_row_dict = db_budget_row.model_dump(exclude_none=True)
else:
db_budget_row_dict = {}
allowed_args = {
"max_parallel_requests": {"type": "Integer"},
"tpm_limit": {"type": "Integer"},
"rpm_limit": {"type": "Integer"},
"budget_duration": {"type": "String"},
"max_budget": {"type": "Float"},
"soft_budget": {"type": "Float"},
}
return_val = []
for field_name, field_info in BudgetNew.model_fields.items():
if field_name in allowed_args:
_stored_in_db = True
_response_obj = ConfigList(
field_name=field_name,
field_type=allowed_args[field_name]["type"],
field_description=field_info.description or "",
field_value=db_budget_row_dict.get(field_name, None),
stored_in_db=_stored_in_db,
field_default_value=field_info.default,
)
return_val.append(_response_obj)
return return_val
@router.get(
"/budget/list",
tags=["budget management"],
dependencies=[Depends(user_api_key_auth)],
)
async def list_budget(
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
):
"""List all the created budgets in proxy db. Used on Admin UI."""
if prisma_client is None:
raise HTTPException(
status_code=400,
detail={"error": CommonProxyErrors.db_not_connected_error.value},
)
if user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN:
raise HTTPException(
status_code=400,
detail={
"error": "{}, your role={}".format(
CommonProxyErrors.not_allowed_access.value,
user_api_key_dict.user_role,
)
},
)
response = await prisma_client.db.litellm_budgettable.find_many()
return response
@router.post(
"/budget/delete",
tags=["budget management"],
dependencies=[Depends(user_api_key_auth)],
)
async def delete_budget(
data: BudgetDeleteRequest,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
):
"""Delete budget"""
global prisma_client
if prisma_client is None:
raise HTTPException(
status_code=500,
detail={"error": CommonProxyErrors.db_not_connected_error.value},
)
if user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN:
raise HTTPException(
status_code=400,
detail={
"error": "{}, your role={}".format(
CommonProxyErrors.not_allowed_access.value,
user_api_key_dict.user_role,
)
},
)
response = await prisma_client.db.litellm_budgettable.delete(
where={"budget_id": data.id}
)
return response
#### MODEL MANAGEMENT ####
@ -8856,3 +8627,4 @@ app.include_router(debugging_endpoints_router)
app.include_router(ui_crud_endpoints_router)
app.include_router(openai_files_router)
app.include_router(team_callback_router)
app.include_router(budget_management_router)