fix(proxy_server.py): allow 'upperbound_key_generate' params to be set via 'os.environ/'

This commit is contained in:
Krrish Dholakia 2024-04-09 07:48:09 -07:00
parent fd424a387c
commit df62f931e7
4 changed files with 41 additions and 6 deletions

View file

@ -3,7 +3,11 @@ import threading, requests, os
from typing import Callable, List, Optional, Dict, Union, Any, Literal
from litellm.caching import Cache
from litellm._logging import set_verbose, _turn_on_debug, verbose_logger
from litellm.proxy._types import KeyManagementSystem, KeyManagementSettings
from litellm.proxy._types import (
KeyManagementSystem,
KeyManagementSettings,
LiteLLM_UpperboundKeyGenerateParams,
)
import httpx
import dotenv
@ -172,7 +176,7 @@ dynamodb_table_name: Optional[str] = None
s3_callback_params: Optional[Dict] = None
generic_logger_headers: Optional[Dict] = None
default_key_generate_params: Optional[Dict] = None
upperbound_key_generate_params: Optional[Dict] = None
upperbound_key_generate_params: Optional[LiteLLM_UpperboundKeyGenerateParams] = None
default_user_params: Optional[Dict] = None
default_team_settings: Optional[List] = None
max_user_budget: Optional[float] = None

View file

@ -24,6 +24,8 @@ model_list:
litellm_settings:
success_callback: ["prometheus"]
upperbound_key_generate_params:
max_budget: os.environ/LITELLM_UPPERBOUND_KEYS_MAX_BUDGET
# litellm_settings:
# drop_params: True

View file

@ -38,6 +38,17 @@ class LiteLLMBase(BaseModel):
protected_namespaces = ()
class LiteLLM_UpperboundKeyGenerateParams(LiteLLMBase):
"""
Set default upperbound to max budget a key called via `/key/generate` can be.
"""
max_budget: Optional[float] = None
max_parallel_requests: Optional[int] = None
tpm_limit: Optional[int] = None
rpm_limit: Optional[int] = None
class LiteLLMRoutes(enum.Enum):
openai_routes: List = [ # chat completions
"/openai/deployments/{model}/chat/completions",

View file

@ -2181,6 +2181,18 @@ class ProxyConfig:
f"{blue_color_code} setting litellm.{key}={value}{reset_color_code}"
)
setattr(litellm, key, value)
elif key == "upperbound_key_generate_params":
if value is not None and isinstance(value, dict):
for _k, _v in value.items():
if isinstance(_v, str) and _v.startswith("os.environ/"):
value[_k] = litellm.get_secret(_v)
litellm.upperbound_key_generate_params = (
LiteLLM_UpperboundKeyGenerateParams(**value)
)
else:
raise Exception(
f"Invalid value set for upperbound_key_generate_params - value={value}"
)
else:
verbose_proxy_logger.debug(
f"{blue_color_code} setting litellm.{key}={value}{reset_color_code}"
@ -4302,7 +4314,11 @@ async def generate_key_fn(
for elem in data:
# if key in litellm.upperbound_key_generate_params, use the min of value and litellm.upperbound_key_generate_params[key]
key, value = elem
if value is not None and key in litellm.upperbound_key_generate_params:
if (
value is not None
and getattr(litellm.upperbound_key_generate_params, key, None)
is not None
):
# if value is float/int
if key in [
"max_budget",
@ -4310,18 +4326,20 @@ async def generate_key_fn(
"tpm_limit",
"rpm_limit",
]:
if value > litellm.upperbound_key_generate_params[key]:
if value > getattr(litellm.upperbound_key_generate_params, key):
raise HTTPException(
status_code=400,
detail={
"error": f"{key} is over max limit set in config - user_value={value}; max_value={litellm.upperbound_key_generate_params[key]}"
"error": f"{key} is over max limit set in config - user_value={value}; max_value={getattr(litellm.upperbound_key_generate_params, key)}"
},
)
elif key == "budget_duration":
# budgets are in 1s, 1m, 1h, 1d, 1m (30s, 30m, 30h, 30d, 30m)
# compare the duration in seconds and max duration in seconds
upperbound_budget_duration = _duration_in_seconds(
duration=litellm.upperbound_key_generate_params[key]
duration=getattr(
litellm.upperbound_key_generate_params, key
)
)
user_set_budget_duration = _duration_in_seconds(duration=value)
if user_set_budget_duration > upperbound_budget_duration: