Merge branch 'main' into litellm_edit_teams

This commit is contained in:
Ishaan Jaff 2024-03-30 12:15:53 -07:00 committed by GitHub
commit 58cc11a312
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
52 changed files with 1343 additions and 608 deletions

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1 @@
self.__BUILD_MANIFEST={__rewrites:{afterFiles:[],beforeFiles:[],fallback:[]},"/_error":["static/chunks/pages/_error-d6107f1aac0c574c.js"],sortedPages:["/_app","/_error"]},self.__BUILD_MANIFEST_CB&&self.__BUILD_MANIFEST_CB();

View file

@ -0,0 +1 @@
self.__SSG_MANIFEST=new Set([]);self.__SSG_MANIFEST_CB&&self.__SSG_MANIFEST_CB()

File diff suppressed because one or more lines are too long

View file

@ -1 +0,0 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-d1ad37b1875df240.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a507ee9e75a3be72.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-589b47e7a69d316f.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-d1ad37b1875df240.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f8da5a6a5b29d249.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[90177,[\"798\",\"static/chunks/798-4baed68da0c5497d.js\",\"931\",\"static/chunks/app/page-37392d6753f8a3d0.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f8da5a6a5b29d249.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"L9N6TOWJaqSp22Vj96YE4\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +0,0 @@
2:I[77831,[],""]
3:I[90177,["798","static/chunks/798-4baed68da0c5497d.js","931","static/chunks/app/page-37392d6753f8a3d0.js"],""]
4:I[5613,[],""]
5:I[31778,[],""]
0:["L9N6TOWJaqSp22Vj96YE4",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f8da5a6a5b29d249.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"🚅 LiteLLM"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

View file

@ -5,10 +5,15 @@ model_list:
api_key: my-fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app/
litellm_settings:
max_budget: 600020
budget_duration: 30d
general_settings:
master_key: sk-1234
proxy_batch_write_at: 5 # 👈 Frequency of batch writing logs to server (in seconds)
proxy_batch_write_at: 60 # 👈 Frequency of batch writing logs to server (in seconds)
enable_jwt_auth: True
alerting: ["slack"]
litellm_jwtauth:
admin_jwt_scope: "litellm_proxy_admin"
team_jwt_scope: "litellm_team"

View file

@ -18,6 +18,7 @@ from litellm.proxy._types import (
from typing import Optional, Literal, Union
from litellm.proxy.utils import PrismaClient
from litellm.caching import DualCache
import litellm
all_routes = LiteLLMRoutes.openai_routes.value + LiteLLMRoutes.management_routes.value
@ -26,6 +27,7 @@ def common_checks(
request_body: dict,
team_object: LiteLLM_TeamTable,
end_user_object: Optional[LiteLLM_EndUserTable],
global_proxy_spend: Optional[float],
general_settings: dict,
route: str,
) -> bool:
@ -37,6 +39,7 @@ def common_checks(
3. If team is in budget
4. If end_user ('user' passed to /chat/completions, /embeddings endpoint) is in budget
5. [OPTIONAL] If 'enforce_end_user' enabled - did developer pass in 'user' param for openai endpoints
6. [OPTIONAL] If 'litellm.max_budget' is set (>0), is proxy under budget
"""
_model = request_body.get("model", None)
if team_object.blocked == True:
@ -66,7 +69,7 @@ def common_checks(
end_user_budget = end_user_object.litellm_budget_table.max_budget
if end_user_budget is not None and end_user_object.spend > end_user_budget:
raise Exception(
f"End User={end_user_object.user_id} over budget. Spend={end_user_object.spend}, Budget={end_user_budget}"
f"ExceededBudget: End User={end_user_object.user_id} over budget. Spend={end_user_object.spend}, Budget={end_user_budget}"
)
# 5. [OPTIONAL] If 'enforce_user_param' enabled - did developer pass in 'user' param for openai endpoints
if (
@ -77,7 +80,12 @@ def common_checks(
raise Exception(
f"'user' param not passed in. 'enforce_user_param'={general_settings['enforce_user_param']}"
)
# 6. [OPTIONAL] If 'litellm.max_budget' is set (>0), is proxy under budget
if litellm.max_budget > 0 and global_proxy_spend is not None:
if global_proxy_spend > litellm.max_budget:
raise Exception(
f"ExceededBudget: LiteLLM Proxy has exceeded its budget. Current spend: {global_proxy_spend}; Max Budget: {litellm.max_budget}"
)
return True

View file

@ -114,7 +114,8 @@ class JWTHandler:
public_key: Optional[dict] = None
if len(keys) == 1:
public_key = keys[0]
if kid is None or keys["kid"] == kid:
public_key = keys[0]
elif len(keys) > 1:
for key in keys:
if kid is not None and key["kid"] == kid:

View file

@ -437,12 +437,49 @@ async def user_api_key_auth(
key=end_user_id, value=end_user_object
)
global_proxy_spend = None
if litellm.max_budget > 0: # user set proxy max budget
# check cache
global_proxy_spend = await user_api_key_cache.async_get_cache(
key="{}:spend".format(litellm_proxy_admin_name)
)
if global_proxy_spend is None and prisma_client is not None:
# get from db
sql_query = """SELECT SUM(spend) as total_spend FROM "MonthlyGlobalSpend";"""
response = await prisma_client.db.query_raw(query=sql_query)
global_proxy_spend = response[0]["total_spend"]
await user_api_key_cache.async_set_cache(
key="{}:spend".format(litellm_proxy_admin_name),
value=global_proxy_spend,
ttl=60,
)
if global_proxy_spend is not None:
user_info = {
"user_id": litellm_proxy_admin_name,
"max_budget": litellm.max_budget,
"spend": global_proxy_spend,
"user_email": "",
}
asyncio.create_task(
proxy_logging_obj.budget_alerts(
user_max_budget=litellm.max_budget,
user_current_spend=global_proxy_spend,
type="user_and_proxy_budget",
user_info=user_info,
)
)
# run through common checks
_ = common_checks(
request_body=request_data,
team_object=team_object,
end_user_object=end_user_object,
general_settings=general_settings,
global_proxy_spend=global_proxy_spend,
route=route,
)
# save user object in cache
@ -656,17 +693,8 @@ async def user_api_key_auth(
)
# Check 2. If user_id for this token is in budget
## Check 2.1 If global proxy is in budget
## Check 2.2 [OPTIONAL - checked only if litellm.max_user_budget is not None] If 'user' passed in /chat/completions is in budget
if valid_token.user_id is not None:
user_id_list = [valid_token.user_id, litellm_proxy_budget_name]
if (
litellm.max_user_budget is not None
): # Check if 'user' passed in /chat/completions is in budget, only checked if litellm.max_user_budget is set
user_passed_to_chat_completions = request_data.get("user", None)
if user_passed_to_chat_completions is not None:
user_id_list.append(user_passed_to_chat_completions)
user_id_list = [valid_token.user_id]
for id in user_id_list:
value = user_api_key_cache.get_cache(key=id)
if value is not None:
@ -675,13 +703,12 @@ async def user_api_key_auth(
user_id_information.append(value)
if user_id_information is None or (
isinstance(user_id_information, list)
and len(user_id_information) < 2
and len(user_id_information) < 1
):
if prisma_client is not None:
user_id_information = await prisma_client.get_data(
user_id_list=[
valid_token.user_id,
litellm_proxy_budget_name,
],
table_name="user",
query_type="find_all",
@ -881,11 +908,54 @@ async def user_api_key_auth(
blocked=valid_token.team_blocked,
models=valid_token.team_models,
)
_end_user_object = None
if "user" in request_data:
_id = "end_user_id:{}".format(request_data["user"])
_end_user_object = await user_api_key_cache.async_get_cache(key=_id)
if _end_user_object is not None:
_end_user_object = LiteLLM_EndUserTable(**_end_user_object)
global_proxy_spend = None
if litellm.max_budget > 0: # user set proxy max budget
# check cache
global_proxy_spend = await user_api_key_cache.async_get_cache(
key="{}:spend".format(litellm_proxy_admin_name)
)
if global_proxy_spend is None:
# get from db
sql_query = """SELECT SUM(spend) as total_spend FROM "MonthlyGlobalSpend";"""
response = await prisma_client.db.query_raw(query=sql_query)
global_proxy_spend = response[0]["total_spend"]
await user_api_key_cache.async_set_cache(
key="{}:spend".format(litellm_proxy_admin_name),
value=global_proxy_spend,
ttl=60,
)
if global_proxy_spend is not None:
user_info = {
"user_id": litellm_proxy_admin_name,
"max_budget": litellm.max_budget,
"spend": global_proxy_spend,
"user_email": "",
}
asyncio.create_task(
proxy_logging_obj.budget_alerts(
user_max_budget=litellm.max_budget,
user_current_spend=global_proxy_spend,
type="user_and_proxy_budget",
user_info=user_info,
)
)
_ = common_checks(
request_body=request_data,
team_object=_team_obj,
end_user_object=None,
end_user_object=_end_user_object,
general_settings=general_settings,
global_proxy_spend=global_proxy_spend,
route=route,
)
# Token passed all checks
@ -1553,7 +1623,7 @@ async def update_cache(
async def _update_user_cache():
## UPDATE CACHE FOR USER ID + GLOBAL PROXY
user_ids = [user_id, litellm_proxy_budget_name, end_user_id]
user_ids = [user_id]
try:
for _id in user_ids:
# Fetch the existing cost for the given user
@ -1594,14 +1664,26 @@ async def update_cache(
user_api_key_cache.set_cache(
key=_id, value=existing_spend_obj.json()
)
## UPDATE GLOBAL PROXY ##
global_proxy_spend = await user_api_key_cache.async_get_cache(
key="{}:spend".format(litellm_proxy_admin_name)
)
if global_proxy_spend is None:
await user_api_key_cache.async_set_cache(
key="{}:spend".format(litellm_proxy_admin_name), value=response_cost
)
elif response_cost is not None and global_proxy_spend is not None:
increment = global_proxy_spend + response_cost
await user_api_key_cache.async_set_cache(
key="{}:spend".format(litellm_proxy_admin_name), value=increment
)
except Exception as e:
verbose_proxy_logger.debug(
f"An error occurred updating user cache: {str(e)}\n\n{traceback.format_exc()}"
)
async def _update_end_user_cache():
## UPDATE CACHE FOR USER ID + GLOBAL PROXY
_id = end_user_id
_id = "end_user_id:{}".format(end_user_id)
try:
# Fetch the existing cost for the given user
existing_spend_obj = await user_api_key_cache.async_get_cache(key=_id)
@ -1609,14 +1691,14 @@ async def update_cache(
# if user does not exist in LiteLLM_UserTable, create a new user
existing_spend = 0
max_user_budget = None
if litellm.max_user_budget is not None:
max_user_budget = litellm.max_user_budget
if litellm.max_end_user_budget is not None:
max_end_user_budget = litellm.max_end_user_budget
existing_spend_obj = LiteLLM_EndUserTable(
user_id=_id,
spend=0,
blocked=False,
litellm_budget_table=LiteLLM_BudgetTable(
max_budget=max_user_budget
max_budget=max_end_user_budget
),
)
verbose_proxy_logger.debug(
@ -2909,6 +2991,11 @@ def model_list(
dependencies=[Depends(user_api_key_auth)],
tags=["completions"],
)
@router.post(
"/openai/deployments/{model:path}/completions",
dependencies=[Depends(user_api_key_auth)],
tags=["completions"],
)
async def completion(
request: Request,
fastapi_response: Response,
@ -4049,7 +4136,6 @@ async def generate_key_fn(
)
_budget_id = getattr(_budget, "budget_id", None)
data_json = data.json() # type: ignore
# if we get max_budget passed to /key/generate, then use it as key_max_budget. Since generate_key_helper_fn is used to make new users
if "max_budget" in data_json:
data_json["key_max_budget"] = data_json.pop("max_budget", None)
@ -4108,6 +4194,13 @@ async def update_key_fn(request: Request, data: UpdateKeyRequest):
0,
): # models default to [], spend defaults to 0, we should not reset these values
non_default_values[k] = v
if "duration" in non_default_values:
duration = non_default_values.pop("duration")
duration_s = _duration_in_seconds(duration=duration)
expires = datetime.utcnow() + timedelta(seconds=duration_s)
non_default_values["expires"] = expires
response = await prisma_client.update_data(
token=key, data={**non_default_values, "token": key}
)
@ -6051,7 +6144,7 @@ async def team_member_delete(
-D '{
"team_id": "45e3e396-ee08-4a61-a88e-16b3ce7e0849",
"member": {"role": "user", "user_id": "krrish247652@berri.ai"}
"user_id": "krrish247652@berri.ai"
}'
```
"""

View file

@ -1941,9 +1941,9 @@ async def update_spend(
end_user_id,
response_cost,
) in prisma_client.end_user_list_transactons.items():
max_user_budget = None
if litellm.max_user_budget is not None:
max_user_budget = litellm.max_user_budget
max_end_user_budget = None
if litellm.max_end_user_budget is not None:
max_end_user_budget = litellm.max_end_user_budget
new_user_obj = LiteLLM_EndUserTable(
user_id=end_user_id, spend=response_cost, blocked=False
)