forked from phoenix/litellm-mirror
(docs) setting soft budgets
This commit is contained in:
parent
eb4f90115d
commit
fd9f8b7010
3 changed files with 11 additions and 1 deletions
|
@ -79,6 +79,7 @@ curl 'http://0.0.0.0:8000/key/generate' \
|
||||||
"metadata": {"user": "ishaan@berri.ai"},
|
"metadata": {"user": "ishaan@berri.ai"},
|
||||||
"team_id": "core-infra",
|
"team_id": "core-infra",
|
||||||
"max_budget": 10,
|
"max_budget": 10,
|
||||||
|
"soft_budget": 5,
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -93,6 +94,7 @@ Request Params:
|
||||||
- `config`: *Optional[dict]* - any key-specific configs, overrides config in config.yaml
|
- `config`: *Optional[dict]* - any key-specific configs, overrides config in config.yaml
|
||||||
- `spend`: *Optional[int]* - Amount spent by key. Default is 0. Will be updated by proxy whenever key is used. https://docs.litellm.ai/docs/proxy/virtual_keys#managing-auth---tracking-spend
|
- `spend`: *Optional[int]* - Amount spent by key. Default is 0. Will be updated by proxy whenever key is used. https://docs.litellm.ai/docs/proxy/virtual_keys#managing-auth---tracking-spend
|
||||||
- `max_budget`: *Optional[float]* - Specify max budget for a given key.
|
- `max_budget`: *Optional[float]* - Specify max budget for a given key.
|
||||||
|
- `soft_budget`: *Optional[float]* - Specify soft limit budget for a given key. Get Alerts when key hits its soft budget
|
||||||
- `model_max_budget`: *Optional[dict[str, float]]* - Specify max budget for each model, `model_max_budget={"gpt4": 0.5, "gpt-5": 0.01}`
|
- `model_max_budget`: *Optional[dict[str, float]]* - Specify max budget for each model, `model_max_budget={"gpt4": 0.5, "gpt-5": 0.01}`
|
||||||
- `max_parallel_requests`: *Optional[int]* - Rate limit a user based on the number of parallel requests. Raises 429 error, if user's parallel requests > x.
|
- `max_parallel_requests`: *Optional[int]* - Rate limit a user based on the number of parallel requests. Raises 429 error, if user's parallel requests > x.
|
||||||
- `metadata`: *Optional[dict]* - Metadata for key, store information for key. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" }
|
- `metadata`: *Optional[dict]* - Metadata for key, store information for key. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" }
|
||||||
|
|
|
@ -79,6 +79,9 @@ max_budget: float = 0.0 # set the max budget across all providers
|
||||||
budget_duration: Optional[str] = (
|
budget_duration: Optional[str] = (
|
||||||
None # proxy only - resets budget after fixed duration. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
|
None # proxy only - resets budget after fixed duration. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
|
||||||
)
|
)
|
||||||
|
default_soft_budget: float = (
|
||||||
|
50.0 # by default all litellm proxy keys have a soft budget of 50.0
|
||||||
|
)
|
||||||
_openai_finish_reasons = ["stop", "length", "function_call", "content_filter", "null"]
|
_openai_finish_reasons = ["stop", "length", "function_call", "content_filter", "null"]
|
||||||
_openai_completion_params = [
|
_openai_completion_params = [
|
||||||
"functions",
|
"functions",
|
||||||
|
|
|
@ -1810,6 +1810,9 @@ async def generate_key_helper_fn(
|
||||||
spend: float,
|
spend: float,
|
||||||
key_max_budget: Optional[float] = None, # key_max_budget is used to Budget Per key
|
key_max_budget: Optional[float] = None, # key_max_budget is used to Budget Per key
|
||||||
key_budget_duration: Optional[str] = None,
|
key_budget_duration: Optional[str] = None,
|
||||||
|
key_soft_budget: Optional[
|
||||||
|
float
|
||||||
|
] = None, # key_soft_budget is used to Budget Per key
|
||||||
max_budget: Optional[float] = None, # max_budget is used to Budget Per user
|
max_budget: Optional[float] = None, # max_budget is used to Budget Per user
|
||||||
budget_duration: Optional[str] = None, # max_budget is used to Budget Per user
|
budget_duration: Optional[str] = None, # max_budget is used to Budget Per user
|
||||||
token: Optional[str] = None,
|
token: Optional[str] = None,
|
||||||
|
@ -1873,7 +1876,7 @@ async def generate_key_helper_fn(
|
||||||
if prisma_client is not None:
|
if prisma_client is not None:
|
||||||
# create the Budget Row for the LiteLLM Verification Token
|
# create the Budget Row for the LiteLLM Verification Token
|
||||||
budget_row = LiteLLM_BudgetTable(
|
budget_row = LiteLLM_BudgetTable(
|
||||||
soft_budget=50,
|
soft_budget=key_soft_budget or litellm.default_soft_budget,
|
||||||
model_max_budget=model_max_budget or {},
|
model_max_budget=model_max_budget or {},
|
||||||
created_by=user_id,
|
created_by=user_id,
|
||||||
updated_by=user_id,
|
updated_by=user_id,
|
||||||
|
@ -3347,6 +3350,8 @@ async def generate_key_fn(
|
||||||
# if we get max_budget passed to /key/generate, then use it as key_max_budget. Since generate_key_helper_fn is used to make new users
|
# if we get max_budget passed to /key/generate, then use it as key_max_budget. Since generate_key_helper_fn is used to make new users
|
||||||
if "max_budget" in data_json:
|
if "max_budget" in data_json:
|
||||||
data_json["key_max_budget"] = data_json.pop("max_budget", None)
|
data_json["key_max_budget"] = data_json.pop("max_budget", None)
|
||||||
|
if "soft_budget" in data_json:
|
||||||
|
data_json["key_soft_budget"] = data_json.pop("soft_budget", None)
|
||||||
|
|
||||||
if "budget_duration" in data_json:
|
if "budget_duration" in data_json:
|
||||||
data_json["key_budget_duration"] = data_json.pop("budget_duration", None)
|
data_json["key_budget_duration"] = data_json.pop("budget_duration", None)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue