diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py index 72b7273e51..1615662012 100644 --- a/litellm/proxy/_types.py +++ b/litellm/proxy/_types.py @@ -128,6 +128,7 @@ class GenerateKeyRequest(LiteLLMBase): aliases: Optional[dict] = {} config: Optional[dict] = {} spend: Optional[float] = 0 + max_budget: Optional[float] = None user_id: Optional[str] = None team_id: Optional[str] = None max_parallel_requests: Optional[int] = None @@ -145,6 +146,7 @@ class UpdateKeyRequest(LiteLLMBase): aliases: Optional[dict] = None config: Optional[dict] = None spend: Optional[float] = None + max_budget: Optional[float] = None user_id: Optional[str] = None max_parallel_requests: Optional[int] = None metadata: Optional[dict] = None @@ -162,6 +164,7 @@ class UserAPIKeyAuth(LiteLLMBase): # the expected response object for user api aliases: dict = {} config: dict = {} spend: Optional[float] = 0 + max_budget: Optional[float] = None user_id: Optional[str] = None max_parallel_requests: Optional[int] = None duration: str = "1h" diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 30493f60ec..a26b32b1fa 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -1142,7 +1142,8 @@ async def generate_key_helper_fn( aliases: dict, config: dict, spend: float, - max_budget: Optional[float] = None, + key_max_budget: Optional[float] = None, # key_max_budget is used to Budget Per key + max_budget: Optional[float] = None, # max_budget is used to Budget Per user token: Optional[str] = None, user_id: Optional[str] = None, team_id: Optional[str] = None, @@ -1215,6 +1216,7 @@ async def generate_key_helper_fn( "aliases": aliases_json, "config": config_json, "spend": spend, + "max_budget": key_max_budget, "user_id": user_id, "team_id": team_id, "max_parallel_requests": max_parallel_requests, @@ -2177,6 +2179,7 @@ async def generate_key_fn( - aliases: Optional[dict] - Any alias mappings, on top of anything in the config.yaml model list. - https://docs.litellm.ai/docs/proxy/virtual_keys#managing-auth---upgradedowngrade-models - config: Optional[dict] - any key-specific configs, overrides config in config.yaml - spend: Optional[int] - Amount spent by key. Default is 0. Will be updated by proxy whenever key is used. https://docs.litellm.ai/docs/proxy/virtual_keys#managing-auth---tracking-spend + - max_budget: Optional[float] - Specify max budget for a given key. - max_parallel_requests: Optional[int] - Rate limit a user based on the number of parallel requests. Raises 429 error, if user's parallel requests > x. - metadata: Optional[dict] - Metadata for key, store information for key. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" } @@ -2201,6 +2204,11 @@ async def generate_key_fn( ) data_json = data.json() # type: ignore + + # if we get max_budget passed to /key/generate, then use it as key_max_budget. Since generate_key_helper_fn is used to make new users + if "max_budget" in data_json: + data_json["key_max_budget"] = data_json.pop("max_budget", None) + response = await generate_key_helper_fn(**data_json) return GenerateKeyResponse( key=response["token"], expires=response["expires"], user_id=response["user_id"]