fix(proxy_server.py): allow 'upperbound_key_generate' params to be set via 'os.environ/'

This commit is contained in:
Krrish Dholakia 2024-04-09 07:48:09 -07:00
parent fd424a387c
commit df62f931e7
4 changed files with 41 additions and 6 deletions

View file

@ -3,7 +3,11 @@ import threading, requests, os
from typing import Callable, List, Optional, Dict, Union, Any, Literal from typing import Callable, List, Optional, Dict, Union, Any, Literal
from litellm.caching import Cache from litellm.caching import Cache
from litellm._logging import set_verbose, _turn_on_debug, verbose_logger from litellm._logging import set_verbose, _turn_on_debug, verbose_logger
from litellm.proxy._types import KeyManagementSystem, KeyManagementSettings from litellm.proxy._types import (
KeyManagementSystem,
KeyManagementSettings,
LiteLLM_UpperboundKeyGenerateParams,
)
import httpx import httpx
import dotenv import dotenv
@ -172,7 +176,7 @@ dynamodb_table_name: Optional[str] = None
s3_callback_params: Optional[Dict] = None s3_callback_params: Optional[Dict] = None
generic_logger_headers: Optional[Dict] = None generic_logger_headers: Optional[Dict] = None
default_key_generate_params: Optional[Dict] = None default_key_generate_params: Optional[Dict] = None
upperbound_key_generate_params: Optional[Dict] = None upperbound_key_generate_params: Optional[LiteLLM_UpperboundKeyGenerateParams] = None
default_user_params: Optional[Dict] = None default_user_params: Optional[Dict] = None
default_team_settings: Optional[List] = None default_team_settings: Optional[List] = None
max_user_budget: Optional[float] = None max_user_budget: Optional[float] = None

View file

@ -24,6 +24,8 @@ model_list:
litellm_settings: litellm_settings:
success_callback: ["prometheus"] success_callback: ["prometheus"]
upperbound_key_generate_params:
max_budget: os.environ/LITELLM_UPPERBOUND_KEYS_MAX_BUDGET
# litellm_settings: # litellm_settings:
# drop_params: True # drop_params: True

View file

@ -38,6 +38,17 @@ class LiteLLMBase(BaseModel):
protected_namespaces = () protected_namespaces = ()
class LiteLLM_UpperboundKeyGenerateParams(LiteLLMBase):
"""
Set default upperbound to max budget a key called via `/key/generate` can be.
"""
max_budget: Optional[float] = None
max_parallel_requests: Optional[int] = None
tpm_limit: Optional[int] = None
rpm_limit: Optional[int] = None
class LiteLLMRoutes(enum.Enum): class LiteLLMRoutes(enum.Enum):
openai_routes: List = [ # chat completions openai_routes: List = [ # chat completions
"/openai/deployments/{model}/chat/completions", "/openai/deployments/{model}/chat/completions",

View file

@ -2181,6 +2181,18 @@ class ProxyConfig:
f"{blue_color_code} setting litellm.{key}={value}{reset_color_code}" f"{blue_color_code} setting litellm.{key}={value}{reset_color_code}"
) )
setattr(litellm, key, value) setattr(litellm, key, value)
elif key == "upperbound_key_generate_params":
if value is not None and isinstance(value, dict):
for _k, _v in value.items():
if isinstance(_v, str) and _v.startswith("os.environ/"):
value[_k] = litellm.get_secret(_v)
litellm.upperbound_key_generate_params = (
LiteLLM_UpperboundKeyGenerateParams(**value)
)
else:
raise Exception(
f"Invalid value set for upperbound_key_generate_params - value={value}"
)
else: else:
verbose_proxy_logger.debug( verbose_proxy_logger.debug(
f"{blue_color_code} setting litellm.{key}={value}{reset_color_code}" f"{blue_color_code} setting litellm.{key}={value}{reset_color_code}"
@ -4302,7 +4314,11 @@ async def generate_key_fn(
for elem in data: for elem in data:
# if key in litellm.upperbound_key_generate_params, use the min of value and litellm.upperbound_key_generate_params[key] # if key in litellm.upperbound_key_generate_params, use the min of value and litellm.upperbound_key_generate_params[key]
key, value = elem key, value = elem
if value is not None and key in litellm.upperbound_key_generate_params: if (
value is not None
and getattr(litellm.upperbound_key_generate_params, key, None)
is not None
):
# if value is float/int # if value is float/int
if key in [ if key in [
"max_budget", "max_budget",
@ -4310,18 +4326,20 @@ async def generate_key_fn(
"tpm_limit", "tpm_limit",
"rpm_limit", "rpm_limit",
]: ]:
if value > litellm.upperbound_key_generate_params[key]: if value > getattr(litellm.upperbound_key_generate_params, key):
raise HTTPException( raise HTTPException(
status_code=400, status_code=400,
detail={ detail={
"error": f"{key} is over max limit set in config - user_value={value}; max_value={litellm.upperbound_key_generate_params[key]}" "error": f"{key} is over max limit set in config - user_value={value}; max_value={getattr(litellm.upperbound_key_generate_params, key)}"
}, },
) )
elif key == "budget_duration": elif key == "budget_duration":
# budgets are in 1s, 1m, 1h, 1d, 1m (30s, 30m, 30h, 30d, 30m) # budgets are in 1s, 1m, 1h, 1d, 1m (30s, 30m, 30h, 30d, 30m)
# compare the duration in seconds and max duration in seconds # compare the duration in seconds and max duration in seconds
upperbound_budget_duration = _duration_in_seconds( upperbound_budget_duration = _duration_in_seconds(
duration=litellm.upperbound_key_generate_params[key] duration=getattr(
litellm.upperbound_key_generate_params, key
)
) )
user_set_budget_duration = _duration_in_seconds(duration=value) user_set_budget_duration = _duration_in_seconds(duration=value)
if user_set_budget_duration > upperbound_budget_duration: if user_set_budget_duration > upperbound_budget_duration: