forked from phoenix/litellm-mirror
fix(proxy_server.py): allow 'upperbound_key_generate' params to be set via 'os.environ/'
This commit is contained in:
parent
fd424a387c
commit
df62f931e7
4 changed files with 41 additions and 6 deletions
|
@ -3,7 +3,11 @@ import threading, requests, os
|
||||||
from typing import Callable, List, Optional, Dict, Union, Any, Literal
|
from typing import Callable, List, Optional, Dict, Union, Any, Literal
|
||||||
from litellm.caching import Cache
|
from litellm.caching import Cache
|
||||||
from litellm._logging import set_verbose, _turn_on_debug, verbose_logger
|
from litellm._logging import set_verbose, _turn_on_debug, verbose_logger
|
||||||
from litellm.proxy._types import KeyManagementSystem, KeyManagementSettings
|
from litellm.proxy._types import (
|
||||||
|
KeyManagementSystem,
|
||||||
|
KeyManagementSettings,
|
||||||
|
LiteLLM_UpperboundKeyGenerateParams,
|
||||||
|
)
|
||||||
import httpx
|
import httpx
|
||||||
import dotenv
|
import dotenv
|
||||||
|
|
||||||
|
@ -172,7 +176,7 @@ dynamodb_table_name: Optional[str] = None
|
||||||
s3_callback_params: Optional[Dict] = None
|
s3_callback_params: Optional[Dict] = None
|
||||||
generic_logger_headers: Optional[Dict] = None
|
generic_logger_headers: Optional[Dict] = None
|
||||||
default_key_generate_params: Optional[Dict] = None
|
default_key_generate_params: Optional[Dict] = None
|
||||||
upperbound_key_generate_params: Optional[Dict] = None
|
upperbound_key_generate_params: Optional[LiteLLM_UpperboundKeyGenerateParams] = None
|
||||||
default_user_params: Optional[Dict] = None
|
default_user_params: Optional[Dict] = None
|
||||||
default_team_settings: Optional[List] = None
|
default_team_settings: Optional[List] = None
|
||||||
max_user_budget: Optional[float] = None
|
max_user_budget: Optional[float] = None
|
||||||
|
|
|
@ -24,6 +24,8 @@ model_list:
|
||||||
|
|
||||||
litellm_settings:
|
litellm_settings:
|
||||||
success_callback: ["prometheus"]
|
success_callback: ["prometheus"]
|
||||||
|
upperbound_key_generate_params:
|
||||||
|
max_budget: os.environ/LITELLM_UPPERBOUND_KEYS_MAX_BUDGET
|
||||||
|
|
||||||
# litellm_settings:
|
# litellm_settings:
|
||||||
# drop_params: True
|
# drop_params: True
|
||||||
|
|
|
@ -38,6 +38,17 @@ class LiteLLMBase(BaseModel):
|
||||||
protected_namespaces = ()
|
protected_namespaces = ()
|
||||||
|
|
||||||
|
|
||||||
|
class LiteLLM_UpperboundKeyGenerateParams(LiteLLMBase):
|
||||||
|
"""
|
||||||
|
Set default upperbound to max budget a key called via `/key/generate` can be.
|
||||||
|
"""
|
||||||
|
|
||||||
|
max_budget: Optional[float] = None
|
||||||
|
max_parallel_requests: Optional[int] = None
|
||||||
|
tpm_limit: Optional[int] = None
|
||||||
|
rpm_limit: Optional[int] = None
|
||||||
|
|
||||||
|
|
||||||
class LiteLLMRoutes(enum.Enum):
|
class LiteLLMRoutes(enum.Enum):
|
||||||
openai_routes: List = [ # chat completions
|
openai_routes: List = [ # chat completions
|
||||||
"/openai/deployments/{model}/chat/completions",
|
"/openai/deployments/{model}/chat/completions",
|
||||||
|
|
|
@ -2181,6 +2181,18 @@ class ProxyConfig:
|
||||||
f"{blue_color_code} setting litellm.{key}={value}{reset_color_code}"
|
f"{blue_color_code} setting litellm.{key}={value}{reset_color_code}"
|
||||||
)
|
)
|
||||||
setattr(litellm, key, value)
|
setattr(litellm, key, value)
|
||||||
|
elif key == "upperbound_key_generate_params":
|
||||||
|
if value is not None and isinstance(value, dict):
|
||||||
|
for _k, _v in value.items():
|
||||||
|
if isinstance(_v, str) and _v.startswith("os.environ/"):
|
||||||
|
value[_k] = litellm.get_secret(_v)
|
||||||
|
litellm.upperbound_key_generate_params = (
|
||||||
|
LiteLLM_UpperboundKeyGenerateParams(**value)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise Exception(
|
||||||
|
f"Invalid value set for upperbound_key_generate_params - value={value}"
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
verbose_proxy_logger.debug(
|
verbose_proxy_logger.debug(
|
||||||
f"{blue_color_code} setting litellm.{key}={value}{reset_color_code}"
|
f"{blue_color_code} setting litellm.{key}={value}{reset_color_code}"
|
||||||
|
@ -4302,7 +4314,11 @@ async def generate_key_fn(
|
||||||
for elem in data:
|
for elem in data:
|
||||||
# if key in litellm.upperbound_key_generate_params, use the min of value and litellm.upperbound_key_generate_params[key]
|
# if key in litellm.upperbound_key_generate_params, use the min of value and litellm.upperbound_key_generate_params[key]
|
||||||
key, value = elem
|
key, value = elem
|
||||||
if value is not None and key in litellm.upperbound_key_generate_params:
|
if (
|
||||||
|
value is not None
|
||||||
|
and getattr(litellm.upperbound_key_generate_params, key, None)
|
||||||
|
is not None
|
||||||
|
):
|
||||||
# if value is float/int
|
# if value is float/int
|
||||||
if key in [
|
if key in [
|
||||||
"max_budget",
|
"max_budget",
|
||||||
|
@ -4310,18 +4326,20 @@ async def generate_key_fn(
|
||||||
"tpm_limit",
|
"tpm_limit",
|
||||||
"rpm_limit",
|
"rpm_limit",
|
||||||
]:
|
]:
|
||||||
if value > litellm.upperbound_key_generate_params[key]:
|
if value > getattr(litellm.upperbound_key_generate_params, key):
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=400,
|
status_code=400,
|
||||||
detail={
|
detail={
|
||||||
"error": f"{key} is over max limit set in config - user_value={value}; max_value={litellm.upperbound_key_generate_params[key]}"
|
"error": f"{key} is over max limit set in config - user_value={value}; max_value={getattr(litellm.upperbound_key_generate_params, key)}"
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
elif key == "budget_duration":
|
elif key == "budget_duration":
|
||||||
# budgets are in 1s, 1m, 1h, 1d, 1m (30s, 30m, 30h, 30d, 30m)
|
# budgets are in 1s, 1m, 1h, 1d, 1m (30s, 30m, 30h, 30d, 30m)
|
||||||
# compare the duration in seconds and max duration in seconds
|
# compare the duration in seconds and max duration in seconds
|
||||||
upperbound_budget_duration = _duration_in_seconds(
|
upperbound_budget_duration = _duration_in_seconds(
|
||||||
duration=litellm.upperbound_key_generate_params[key]
|
duration=getattr(
|
||||||
|
litellm.upperbound_key_generate_params, key
|
||||||
|
)
|
||||||
)
|
)
|
||||||
user_set_budget_duration = _duration_in_seconds(duration=value)
|
user_set_budget_duration = _duration_in_seconds(duration=value)
|
||||||
if user_set_budget_duration > upperbound_budget_duration:
|
if user_set_budget_duration > upperbound_budget_duration:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue