Support budget/rate limit tiers for keys (#7429)

* feat(proxy/utils.py): get associated litellm budget from db in combined_view for key allows user to create rate limit tiers and associate those to keys * feat(proxy/_types.py): update the value of key-level tpm/rpm/model max budget metrics with the associated budget table values if set allows rate limit tiers to be easily applied to keys * docs(rate_limit_tiers.md): add doc on setting rate limit / budget tiers make feature discoverable * feat(key_management_endpoints.py): return litellm_budget_table value in key generate make it easy for user to know associated budget on key creation * fix(key_management_endpoints.py): document 'budget_id' param in `/key/generate` * docs(key_management_endpoints.py): document budget_id usage * refactor(budget_management_endpoints.py): refactor budget endpoints into separate file - makes it easier to run documentation testing against it * docs(test_api_docs.py): add budget endpoints to ci/cd doc test + add missing param info to docs * fix(customer_endpoints.py): use new pydantic obj name * docs(user_management_heirarchy.md): add simple doc explaining teams/keys/org/users on litellm * Litellm dev 12 26 2024 p2 (#7432) * (Feat) Add logging for `POST v1/fine_tuning/jobs` (#7426) * init commit ft jobs logging * add ft logging * add logging for FineTuningJob * simple FT Job create test * (docs) - show all supported Azure OpenAI endpoints in overview (#7428) * azure batches * update doc * docs azure endpoints * docs endpoints on azure * docs azure batches api * docs azure batches api * fix(key_management_endpoints.py): fix key update to actually work * test(test_key_management.py): add e2e test asserting ui key update call works * fix: proxy/_types - fix linting erros * test: update test --------- Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com> * fix: test * fix(parallel_request_limiter.py): enforce tpm/rpm limits on key from tiers * fix: fix linting errors * test: fix test * fix: remove unused import * test: update test * docs(customer_endpoints.py): document new model_max_budget param * test: specify unique key alias * docs(budget_management_endpoints.py): document new model_max_budget param * test: fix test * test: fix tests --------- Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com>
2025-04-25 18:54:30 +00:00 · 2024-12-26 19:05:27 -08:00 · 2024-12-26 19:05:27 -08:00 · 539f166166
commit 539f166166
parent 12c4e7e695
25 changed files with 764 additions and 376 deletions
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@ -1018,6 +1018,19 @@ def on_backoff(details):
    print_verbose(f"Backing off... this was attempt #{details['tries']}")


+def jsonify_object(data: dict) -> dict:
+    db_data = copy.deepcopy(data)
+
+    for k, v in db_data.items():
+        if isinstance(v, dict):
+            try:
+                db_data[k] = json.dumps(v)
+            except Exception:
+                # This avoids Prisma retrying this 5 times, and making 5 clients
+                db_data[k] = "failed-to-serialize-json"
+    return db_data
+
+
 class PrismaClient:
    user_list_transactons: dict = {}
    end_user_list_transactons: dict = {}
@ -1503,25 +1516,31 @@ class PrismaClient:
                        )

                    sql_query = f"""
-                    SELECT 
-                    v.*,
-                    t.spend AS team_spend, 
-                    t.max_budget AS team_max_budget, 
-                    t.tpm_limit AS team_tpm_limit,
-                    t.rpm_limit AS team_rpm_limit,
-                    t.models AS team_models,
-                    t.metadata AS team_metadata,
-                    t.blocked AS team_blocked,
-                    t.team_alias AS team_alias,
-                    t.metadata AS team_metadata,
-                    t.members_with_roles AS team_members_with_roles,
-                    tm.spend AS team_member_spend,
-                    m.aliases as team_model_aliases
-                    FROM "LiteLLM_VerificationToken" AS v
-                    LEFT JOIN "LiteLLM_TeamTable" AS t ON v.team_id = t.team_id
-                    LEFT JOIN "LiteLLM_TeamMembership" AS tm ON v.team_id = tm.team_id AND tm.user_id = v.user_id
-                    LEFT JOIN "LiteLLM_ModelTable" m ON t.model_id = m.id
-                    WHERE v.token = '{token}'
+                        SELECT 
+                            v.*,
+                            t.spend AS team_spend, 
+                            t.max_budget AS team_max_budget, 
+                            t.tpm_limit AS team_tpm_limit,
+                            t.rpm_limit AS team_rpm_limit,
+                            t.models AS team_models,
+                            t.metadata AS team_metadata,
+                            t.blocked AS team_blocked,
+                            t.team_alias AS team_alias,
+                            t.metadata AS team_metadata,
+                            t.members_with_roles AS team_members_with_roles,
+                            tm.spend AS team_member_spend,
+                            m.aliases AS team_model_aliases,
+                            -- Added comma to separate b.* columns
+                            b.max_budget AS litellm_budget_table_max_budget,
+                            b.tpm_limit AS litellm_budget_table_tpm_limit,
+                            b.rpm_limit AS litellm_budget_table_rpm_limit,
+                            b.model_max_budget as litellm_budget_table_model_max_budget
+                        FROM "LiteLLM_VerificationToken" AS v
+                        LEFT JOIN "LiteLLM_TeamTable" AS t ON v.team_id = t.team_id
+                        LEFT JOIN "LiteLLM_TeamMembership" AS tm ON v.team_id = tm.team_id AND tm.user_id = v.user_id
+                        LEFT JOIN "LiteLLM_ModelTable" m ON t.model_id = m.id
+                        LEFT JOIN "LiteLLM_BudgetTable" AS b ON v.budget_id = b.budget_id
+                        WHERE v.token = '{token}'
                    """

                    print_verbose("sql_query being made={}".format(sql_query))
@ -1634,6 +1653,7 @@ class PrismaClient:
                        "create": {**db_data},  # type: ignore
                        "update": {},  # don't do anything if it already exists
                    },
+                    include={"litellm_budget_table": True},
                )
                verbose_proxy_logger.info("Data Inserted into Keys Table")
                return new_verification_token