forked from phoenix/litellm-mirror
(docs) setting soft budgets
This commit is contained in:
parent
eb4f90115d
commit
fd9f8b7010
3 changed files with 11 additions and 1 deletions
|
@ -79,6 +79,7 @@ curl 'http://0.0.0.0:8000/key/generate' \
|
|||
"metadata": {"user": "ishaan@berri.ai"},
|
||||
"team_id": "core-infra",
|
||||
"max_budget": 10,
|
||||
"soft_budget": 5,
|
||||
}'
|
||||
```
|
||||
|
||||
|
@ -93,6 +94,7 @@ Request Params:
|
|||
- `config`: *Optional[dict]* - any key-specific configs, overrides config in config.yaml
|
||||
- `spend`: *Optional[int]* - Amount spent by key. Default is 0. Will be updated by proxy whenever key is used. https://docs.litellm.ai/docs/proxy/virtual_keys#managing-auth---tracking-spend
|
||||
- `max_budget`: *Optional[float]* - Specify max budget for a given key.
|
||||
- `soft_budget`: *Optional[float]* - Specify soft limit budget for a given key. Get Alerts when key hits its soft budget
|
||||
- `model_max_budget`: *Optional[dict[str, float]]* - Specify max budget for each model, `model_max_budget={"gpt4": 0.5, "gpt-5": 0.01}`
|
||||
- `max_parallel_requests`: *Optional[int]* - Rate limit a user based on the number of parallel requests. Raises 429 error, if user's parallel requests > x.
|
||||
- `metadata`: *Optional[dict]* - Metadata for key, store information for key. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" }
|
||||
|
|
|
@ -79,6 +79,9 @@ max_budget: float = 0.0 # set the max budget across all providers
|
|||
budget_duration: Optional[str] = (
|
||||
None # proxy only - resets budget after fixed duration. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
|
||||
)
|
||||
default_soft_budget: float = (
|
||||
50.0 # by default all litellm proxy keys have a soft budget of 50.0
|
||||
)
|
||||
_openai_finish_reasons = ["stop", "length", "function_call", "content_filter", "null"]
|
||||
_openai_completion_params = [
|
||||
"functions",
|
||||
|
|
|
@ -1810,6 +1810,9 @@ async def generate_key_helper_fn(
|
|||
spend: float,
|
||||
key_max_budget: Optional[float] = None, # key_max_budget is used to Budget Per key
|
||||
key_budget_duration: Optional[str] = None,
|
||||
key_soft_budget: Optional[
|
||||
float
|
||||
] = None, # key_soft_budget is used to Budget Per key
|
||||
max_budget: Optional[float] = None, # max_budget is used to Budget Per user
|
||||
budget_duration: Optional[str] = None, # max_budget is used to Budget Per user
|
||||
token: Optional[str] = None,
|
||||
|
@ -1873,7 +1876,7 @@ async def generate_key_helper_fn(
|
|||
if prisma_client is not None:
|
||||
# create the Budget Row for the LiteLLM Verification Token
|
||||
budget_row = LiteLLM_BudgetTable(
|
||||
soft_budget=50,
|
||||
soft_budget=key_soft_budget or litellm.default_soft_budget,
|
||||
model_max_budget=model_max_budget or {},
|
||||
created_by=user_id,
|
||||
updated_by=user_id,
|
||||
|
@ -3347,6 +3350,8 @@ async def generate_key_fn(
|
|||
# if we get max_budget passed to /key/generate, then use it as key_max_budget. Since generate_key_helper_fn is used to make new users
|
||||
if "max_budget" in data_json:
|
||||
data_json["key_max_budget"] = data_json.pop("max_budget", None)
|
||||
if "soft_budget" in data_json:
|
||||
data_json["key_soft_budget"] = data_json.pop("soft_budget", None)
|
||||
|
||||
if "budget_duration" in data_json:
|
||||
data_json["key_budget_duration"] = data_json.pop("budget_duration", None)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue