Support budget/rate limit tiers for keys (#7429)

* feat(proxy/utils.py): get associated litellm budget from db in combined_view for key

allows user to create rate limit tiers and associate those to keys

* feat(proxy/_types.py): update the value of key-level tpm/rpm/model max budget metrics with the associated budget table values if set

allows rate limit tiers to be easily applied to keys

* docs(rate_limit_tiers.md): add doc on setting rate limit / budget tiers

make feature discoverable

* feat(key_management_endpoints.py): return litellm_budget_table value in key generate

make it easy for user to know associated budget on key creation

* fix(key_management_endpoints.py): document 'budget_id' param in `/key/generate`

* docs(key_management_endpoints.py): document budget_id usage

* refactor(budget_management_endpoints.py): refactor budget endpoints into separate file - makes it easier to run documentation testing against it

* docs(test_api_docs.py): add budget endpoints to ci/cd doc test + add missing param info to docs

* fix(customer_endpoints.py): use new pydantic obj name

* docs(user_management_heirarchy.md): add simple doc explaining teams/keys/org/users on litellm

* Litellm dev 12 26 2024 p2 (#7432)

* (Feat) Add logging for `POST v1/fine_tuning/jobs`  (#7426)

* init commit ft jobs logging

* add ft logging

* add logging for FineTuningJob

* simple FT Job create test

* (docs) - show all supported Azure OpenAI endpoints in overview  (#7428)

* azure batches

* update doc

* docs azure endpoints

* docs endpoints on azure

* docs azure batches api

* docs azure batches api

* fix(key_management_endpoints.py): fix key update to actually work

* test(test_key_management.py): add e2e test asserting ui key update call works

* fix: proxy/_types - fix linting erros

* test: update test

---------

Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com>

* fix: test

* fix(parallel_request_limiter.py): enforce tpm/rpm limits on key from tiers

* fix: fix linting errors

* test: fix test

* fix: remove unused import

* test: update test

* docs(customer_endpoints.py): document new model_max_budget param

* test: specify unique key alias

* docs(budget_management_endpoints.py): document new model_max_budget param

* test: fix test

* test: fix tests

---------

Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com>
This commit is contained in:
Krish Dholakia 2024-12-26 19:05:27 -08:00 committed by GitHub
parent 12c4e7e695
commit 539f166166
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
25 changed files with 764 additions and 376 deletions

View file

@ -1018,6 +1018,19 @@ def on_backoff(details):
print_verbose(f"Backing off... this was attempt #{details['tries']}")
def jsonify_object(data: dict) -> dict:
db_data = copy.deepcopy(data)
for k, v in db_data.items():
if isinstance(v, dict):
try:
db_data[k] = json.dumps(v)
except Exception:
# This avoids Prisma retrying this 5 times, and making 5 clients
db_data[k] = "failed-to-serialize-json"
return db_data
class PrismaClient:
user_list_transactons: dict = {}
end_user_list_transactons: dict = {}
@ -1503,25 +1516,31 @@ class PrismaClient:
)
sql_query = f"""
SELECT
v.*,
t.spend AS team_spend,
t.max_budget AS team_max_budget,
t.tpm_limit AS team_tpm_limit,
t.rpm_limit AS team_rpm_limit,
t.models AS team_models,
t.metadata AS team_metadata,
t.blocked AS team_blocked,
t.team_alias AS team_alias,
t.metadata AS team_metadata,
t.members_with_roles AS team_members_with_roles,
tm.spend AS team_member_spend,
m.aliases as team_model_aliases
FROM "LiteLLM_VerificationToken" AS v
LEFT JOIN "LiteLLM_TeamTable" AS t ON v.team_id = t.team_id
LEFT JOIN "LiteLLM_TeamMembership" AS tm ON v.team_id = tm.team_id AND tm.user_id = v.user_id
LEFT JOIN "LiteLLM_ModelTable" m ON t.model_id = m.id
WHERE v.token = '{token}'
SELECT
v.*,
t.spend AS team_spend,
t.max_budget AS team_max_budget,
t.tpm_limit AS team_tpm_limit,
t.rpm_limit AS team_rpm_limit,
t.models AS team_models,
t.metadata AS team_metadata,
t.blocked AS team_blocked,
t.team_alias AS team_alias,
t.metadata AS team_metadata,
t.members_with_roles AS team_members_with_roles,
tm.spend AS team_member_spend,
m.aliases AS team_model_aliases,
-- Added comma to separate b.* columns
b.max_budget AS litellm_budget_table_max_budget,
b.tpm_limit AS litellm_budget_table_tpm_limit,
b.rpm_limit AS litellm_budget_table_rpm_limit,
b.model_max_budget as litellm_budget_table_model_max_budget
FROM "LiteLLM_VerificationToken" AS v
LEFT JOIN "LiteLLM_TeamTable" AS t ON v.team_id = t.team_id
LEFT JOIN "LiteLLM_TeamMembership" AS tm ON v.team_id = tm.team_id AND tm.user_id = v.user_id
LEFT JOIN "LiteLLM_ModelTable" m ON t.model_id = m.id
LEFT JOIN "LiteLLM_BudgetTable" AS b ON v.budget_id = b.budget_id
WHERE v.token = '{token}'
"""
print_verbose("sql_query being made={}".format(sql_query))
@ -1634,6 +1653,7 @@ class PrismaClient:
"create": {**db_data}, # type: ignore
"update": {}, # don't do anything if it already exists
},
include={"litellm_budget_table": True},
)
verbose_proxy_logger.info("Data Inserted into Keys Table")
return new_verification_token