Support temporary budget increases on keys (#7754)

* fix(gpt_transformation.py): fix response_format translation check for 4o models

Fixes https://github.com/BerriAI/litellm/issues/7616

* feat(key_management_endpoints.py): support 'temp_budget_increase' and 'temp_budget_expiry' fields

Allow proxy admin to grant temporary budget increases to keys

* fix(proxy/_types.py): enforce temp_budget_increase and temp_budget_expiry are always passed together

* feat(user_api_key_auth.py): initial working temp budget increase logic

ensures key budget exceeded error checks for temp budget in key metadata

* feat(proxy_server.py): return the key max budget and key spend in the response headers

Allows clientside user to know their remaining limits

* test: add unit testing for new proxy utils

Ensures new key budget is correctly handled

* docs(temporary_budget_increase.md): add doc on temporary budget increase

* fix(utils.py): remove 3.5 from response_format check for now

not all azure  3.5 models support response_format

* fix(user_api_key_auth.py): return valid user api key auth object on all paths
This commit is contained in:
Krish Dholakia 2025-01-14 17:03:11 -08:00 committed by GitHub
parent 29663c2db5
commit 7b27cfb0ae
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 259 additions and 52 deletions

View file

@ -811,7 +811,10 @@ async def user_api_key_auth( # noqa: PLR0915
valid_token.allowed_model_region = end_user_params.get(
"allowed_model_region"
)
# update key budget with temp budget increase
valid_token = _update_key_budget_with_temp_budget_increase(
valid_token
) # updating it here, allows all downstream reporting / checks to use the updated budget
except Exception:
verbose_logger.info(
"litellm.proxy.auth.user_api_key_auth.py::user_api_key_auth() - Unable to find token={} in cache or `LiteLLM_VerificationTokenTable`. Defaulting 'valid_token' to None'".format(
@ -1016,6 +1019,7 @@ async def user_api_key_auth( # noqa: PLR0915
current_cost=valid_token.spend,
max_budget=valid_token.max_budget,
)
if valid_token.soft_budget and valid_token.spend >= valid_token.soft_budget:
verbose_proxy_logger.debug(
"Crossed Soft Budget for token %s, spend %s, soft_budget %s",
@ -1383,3 +1387,25 @@ def get_api_key_from_custom_header(
f"No LiteLLM Virtual Key pass. Please set header={custom_litellm_key_header_name}: Bearer <api_key>"
)
return api_key
def _get_temp_budget_increase(valid_token: UserAPIKeyAuth):
valid_token_metadata = valid_token.metadata
if (
"temp_budget_increase" in valid_token_metadata
and "temp_budget_expiry" in valid_token_metadata
):
expiry = datetime.fromisoformat(valid_token_metadata["temp_budget_expiry"])
if expiry > datetime.now():
return valid_token_metadata["temp_budget_increase"]
return None
def _update_key_budget_with_temp_budget_increase(
valid_token: UserAPIKeyAuth,
) -> UserAPIKeyAuth:
if valid_token.max_budget is None:
return valid_token
temp_budget_increase = _get_temp_budget_increase(valid_token) or 0.0
valid_token.max_budget = valid_token.max_budget + temp_budget_increase
return valid_token