diff --git a/docs/my-website/docs/proxy/virtual_keys.md b/docs/my-website/docs/proxy/virtual_keys.md index e350ce9d5..70fd6e6a8 100644 --- a/docs/my-website/docs/proxy/virtual_keys.md +++ b/docs/my-website/docs/proxy/virtual_keys.md @@ -79,6 +79,7 @@ curl 'http://0.0.0.0:8000/key/generate' \ "metadata": {"user": "ishaan@berri.ai"}, "team_id": "core-infra", "max_budget": 10, + "soft_budget": 5, }' ``` @@ -93,6 +94,7 @@ Request Params: - `config`: *Optional[dict]* - any key-specific configs, overrides config in config.yaml - `spend`: *Optional[int]* - Amount spent by key. Default is 0. Will be updated by proxy whenever key is used. https://docs.litellm.ai/docs/proxy/virtual_keys#managing-auth---tracking-spend - `max_budget`: *Optional[float]* - Specify max budget for a given key. +- `soft_budget`: *Optional[float]* - Specify soft limit budget for a given key. Get Alerts when key hits its soft budget - `model_max_budget`: *Optional[dict[str, float]]* - Specify max budget for each model, `model_max_budget={"gpt4": 0.5, "gpt-5": 0.01}` - `max_parallel_requests`: *Optional[int]* - Rate limit a user based on the number of parallel requests. Raises 429 error, if user's parallel requests > x. - `metadata`: *Optional[dict]* - Metadata for key, store information for key. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" } diff --git a/litellm/__init__.py b/litellm/__init__.py index cd639ddb9..f218fe036 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -79,6 +79,9 @@ max_budget: float = 0.0 # set the max budget across all providers budget_duration: Optional[str] = ( None # proxy only - resets budget after fixed duration. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d"). ) +default_soft_budget: float = ( + 50.0 # by default all litellm proxy keys have a soft budget of 50.0 +) _openai_finish_reasons = ["stop", "length", "function_call", "content_filter", "null"] _openai_completion_params = [ "functions", diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 37b28baea..dcd4283ba 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -1810,6 +1810,9 @@ async def generate_key_helper_fn( spend: float, key_max_budget: Optional[float] = None, # key_max_budget is used to Budget Per key key_budget_duration: Optional[str] = None, + key_soft_budget: Optional[ + float + ] = None, # key_soft_budget is used to Budget Per key max_budget: Optional[float] = None, # max_budget is used to Budget Per user budget_duration: Optional[str] = None, # max_budget is used to Budget Per user token: Optional[str] = None, @@ -1873,7 +1876,7 @@ async def generate_key_helper_fn( if prisma_client is not None: # create the Budget Row for the LiteLLM Verification Token budget_row = LiteLLM_BudgetTable( - soft_budget=50, + soft_budget=key_soft_budget or litellm.default_soft_budget, model_max_budget=model_max_budget or {}, created_by=user_id, updated_by=user_id, @@ -3347,6 +3350,8 @@ async def generate_key_fn( # if we get max_budget passed to /key/generate, then use it as key_max_budget. Since generate_key_helper_fn is used to make new users if "max_budget" in data_json: data_json["key_max_budget"] = data_json.pop("max_budget", None) + if "soft_budget" in data_json: + data_json["key_soft_budget"] = data_json.pop("soft_budget", None) if "budget_duration" in data_json: data_json["key_budget_duration"] = data_json.pop("budget_duration", None)