(docs) setting soft budgets

This commit is contained in:
ishaan-jaff 2024-03-02 13:05:00 -08:00
parent eb4f90115d
commit fd9f8b7010
3 changed files with 11 additions and 1 deletions

View file

@ -79,6 +79,7 @@ curl 'http://0.0.0.0:8000/key/generate' \
"metadata": {"user": "ishaan@berri.ai"},
"team_id": "core-infra",
"max_budget": 10,
"soft_budget": 5,
}'
```
@ -93,6 +94,7 @@ Request Params:
- `config`: *Optional[dict]* - any key-specific configs, overrides config in config.yaml
- `spend`: *Optional[int]* - Amount spent by key. Default is 0. Will be updated by proxy whenever key is used. https://docs.litellm.ai/docs/proxy/virtual_keys#managing-auth---tracking-spend
- `max_budget`: *Optional[float]* - Specify max budget for a given key.
- `soft_budget`: *Optional[float]* - Specify soft limit budget for a given key. Get Alerts when key hits its soft budget
- `model_max_budget`: *Optional[dict[str, float]]* - Specify max budget for each model, `model_max_budget={"gpt4": 0.5, "gpt-5": 0.01}`
- `max_parallel_requests`: *Optional[int]* - Rate limit a user based on the number of parallel requests. Raises 429 error, if user's parallel requests > x.
- `metadata`: *Optional[dict]* - Metadata for key, store information for key. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" }

View file

@ -79,6 +79,9 @@ max_budget: float = 0.0 # set the max budget across all providers
budget_duration: Optional[str] = (
None # proxy only - resets budget after fixed duration. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
)
default_soft_budget: float = (
50.0 # by default all litellm proxy keys have a soft budget of 50.0
)
_openai_finish_reasons = ["stop", "length", "function_call", "content_filter", "null"]
_openai_completion_params = [
"functions",

View file

@ -1810,6 +1810,9 @@ async def generate_key_helper_fn(
spend: float,
key_max_budget: Optional[float] = None, # key_max_budget is used to Budget Per key
key_budget_duration: Optional[str] = None,
key_soft_budget: Optional[
float
] = None, # key_soft_budget is used to Budget Per key
max_budget: Optional[float] = None, # max_budget is used to Budget Per user
budget_duration: Optional[str] = None, # max_budget is used to Budget Per user
token: Optional[str] = None,
@ -1873,7 +1876,7 @@ async def generate_key_helper_fn(
if prisma_client is not None:
# create the Budget Row for the LiteLLM Verification Token
budget_row = LiteLLM_BudgetTable(
soft_budget=50,
soft_budget=key_soft_budget or litellm.default_soft_budget,
model_max_budget=model_max_budget or {},
created_by=user_id,
updated_by=user_id,
@ -3347,6 +3350,8 @@ async def generate_key_fn(
# if we get max_budget passed to /key/generate, then use it as key_max_budget. Since generate_key_helper_fn is used to make new users
if "max_budget" in data_json:
data_json["key_max_budget"] = data_json.pop("max_budget", None)
if "soft_budget" in data_json:
data_json["key_soft_budget"] = data_json.pop("soft_budget", None)
if "budget_duration" in data_json:
data_json["key_budget_duration"] = data_json.pop("budget_duration", None)