mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
Support budget/rate limit tiers for keys (#7429)
* feat(proxy/utils.py): get associated litellm budget from db in combined_view for key allows user to create rate limit tiers and associate those to keys * feat(proxy/_types.py): update the value of key-level tpm/rpm/model max budget metrics with the associated budget table values if set allows rate limit tiers to be easily applied to keys * docs(rate_limit_tiers.md): add doc on setting rate limit / budget tiers make feature discoverable * feat(key_management_endpoints.py): return litellm_budget_table value in key generate make it easy for user to know associated budget on key creation * fix(key_management_endpoints.py): document 'budget_id' param in `/key/generate` * docs(key_management_endpoints.py): document budget_id usage * refactor(budget_management_endpoints.py): refactor budget endpoints into separate file - makes it easier to run documentation testing against it * docs(test_api_docs.py): add budget endpoints to ci/cd doc test + add missing param info to docs * fix(customer_endpoints.py): use new pydantic obj name * docs(user_management_heirarchy.md): add simple doc explaining teams/keys/org/users on litellm * Litellm dev 12 26 2024 p2 (#7432) * (Feat) Add logging for `POST v1/fine_tuning/jobs` (#7426) * init commit ft jobs logging * add ft logging * add logging for FineTuningJob * simple FT Job create test * (docs) - show all supported Azure OpenAI endpoints in overview (#7428) * azure batches * update doc * docs azure endpoints * docs endpoints on azure * docs azure batches api * docs azure batches api * fix(key_management_endpoints.py): fix key update to actually work * test(test_key_management.py): add e2e test asserting ui key update call works * fix: proxy/_types - fix linting erros * test: update test --------- Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com> * fix: test * fix(parallel_request_limiter.py): enforce tpm/rpm limits on key from tiers * fix: fix linting errors * test: fix test * fix: remove unused import * test: update test * docs(customer_endpoints.py): document new model_max_budget param * test: specify unique key alias * docs(budget_management_endpoints.py): document new model_max_budget param * test: fix test * test: fix tests --------- Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com>
This commit is contained in:
parent
12c4e7e695
commit
539f166166
25 changed files with 764 additions and 376 deletions
|
@ -317,7 +317,6 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
|
|||
|
||||
_tpm_limit_for_key_model = get_key_model_tpm_limit(user_api_key_dict)
|
||||
_rpm_limit_for_key_model = get_key_model_rpm_limit(user_api_key_dict)
|
||||
|
||||
if _model is not None:
|
||||
|
||||
if _tpm_limit_for_key_model:
|
||||
|
@ -325,6 +324,7 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
|
|||
|
||||
if _rpm_limit_for_key_model:
|
||||
rpm_limit_for_model = _rpm_limit_for_key_model.get(_model)
|
||||
|
||||
if current is None:
|
||||
new_val = {
|
||||
"current_requests": 1,
|
||||
|
@ -485,6 +485,7 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
|
|||
)
|
||||
try:
|
||||
self.print_verbose("INSIDE parallel request limiter ASYNC SUCCESS LOGGING")
|
||||
|
||||
global_max_parallel_requests = kwargs["litellm_params"]["metadata"].get(
|
||||
"global_max_parallel_requests", None
|
||||
)
|
||||
|
@ -495,6 +496,9 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
|
|||
user_api_key_team_id = kwargs["litellm_params"]["metadata"].get(
|
||||
"user_api_key_team_id", None
|
||||
)
|
||||
user_api_key_model_max_budget = kwargs["litellm_params"]["metadata"].get(
|
||||
"user_api_key_model_max_budget", None
|
||||
)
|
||||
user_api_key_end_user_id = kwargs.get("user")
|
||||
|
||||
user_api_key_metadata = (
|
||||
|
@ -568,6 +572,7 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
|
|||
and (
|
||||
"model_rpm_limit" in user_api_key_metadata
|
||||
or "model_tpm_limit" in user_api_key_metadata
|
||||
or user_api_key_model_max_budget is not None
|
||||
)
|
||||
):
|
||||
request_count_api_key = (
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue