Merge branch 'BerriAI:main' into main

This commit is contained in:
greenscale-nandesh 2024-04-17 12:24:29 -07:00 committed by GitHub
commit 907e3973fd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
44 changed files with 1001 additions and 156 deletions

View file

@ -163,6 +163,7 @@ os.environ["OPENAI_API_BASE"] = "openaiai-api-base" # OPTIONAL
| Model Name | Function Call |
|-----------------------|-----------------------------------------------------------------|
| gpt-4-turbo | `response = completion(model="gpt-4-turbo", messages=messages)` |
| gpt-4-turbo-preview | `response = completion(model="gpt-4-0125-preview", messages=messages)` |
| gpt-4-0125-preview | `response = completion(model="gpt-4-0125-preview", messages=messages)` |
| gpt-4-1106-preview | `response = completion(model="gpt-4-1106-preview", messages=messages)` |
@ -185,6 +186,7 @@ These also support the `OPENAI_API_BASE` environment variable, which can be used
## OpenAI Vision Models
| Model Name | Function Call |
|-----------------------|-----------------------------------------------------------------|
| gpt-4-turbo | `response = completion(model="gpt-4-turbo", messages=messages)` |
| gpt-4-vision-preview | `response = completion(model="gpt-4-vision-preview", messages=messages)` |
#### Usage

View file

@ -25,21 +25,21 @@ class PrometheusLogger:
self.litellm_requests_metric = Counter(
name="litellm_requests_metric",
documentation="Total number of LLM calls to litellm",
labelnames=["end_user", "key", "model", "team"],
labelnames=["end_user", "hashed_api_key", "model", "team"],
)
# Counter for spend
self.litellm_spend_metric = Counter(
"litellm_spend_metric",
"Total spend on LLM requests",
labelnames=["end_user", "key", "model", "team"],
labelnames=["end_user", "hashed_api_key", "model", "team"],
)
# Counter for total_output_tokens
self.litellm_tokens_metric = Counter(
"litellm_total_tokens",
"Total number of input + output tokens from LLM requests",
labelnames=["end_user", "key", "model", "team"],
labelnames=["end_user", "hashed_api_key", "model", "team"],
)
except Exception as e:
print_verbose(f"Got exception on init prometheus client {str(e)}")
@ -75,6 +75,15 @@ class PrometheusLogger:
f"inside track_prometheus_metrics, model {model}, response_cost {response_cost}, tokens_used {tokens_used}, end_user_id {end_user_id}, user_api_key {user_api_key}"
)
if (
user_api_key is not None
and isinstance(user_api_key, str)
and user_api_key.startswith("sk-")
):
from litellm.proxy.utils import hash_token
user_api_key = hash_token(user_api_key)
self.litellm_requests_metric.labels(
end_user_id, user_api_key, model, user_api_team
).inc()

View file

@ -349,8 +349,17 @@ def completion(
print_verbose(
f"VERTEX AI: vertex_project={vertex_project}; vertex_location={vertex_location}"
)
if vertex_credentials is not None and isinstance(vertex_credentials, str):
import google.oauth2.service_account
creds, _ = google.auth.default(quota_project_id=vertex_project)
json_obj = json.loads(vertex_credentials)
creds = google.oauth2.service_account.Credentials.from_service_account_info(
json_obj,
scopes=["https://www.googleapis.com/auth/cloud-platform"],
)
else:
creds, _ = google.auth.default(quota_project_id=vertex_project)
print_verbose(
f"VERTEX AI: creds={creds}; google application credentials: {os.getenv('GOOGLE_APPLICATION_CREDENTIALS')}"
)
@ -1171,6 +1180,7 @@ def embedding(
encoding=None,
vertex_project=None,
vertex_location=None,
vertex_credentials=None,
aembedding=False,
print_verbose=None,
):
@ -1191,7 +1201,17 @@ def embedding(
print_verbose(
f"VERTEX AI: vertex_project={vertex_project}; vertex_location={vertex_location}"
)
creds, _ = google.auth.default(quota_project_id=vertex_project)
if vertex_credentials is not None and isinstance(vertex_credentials, str):
import google.oauth2.service_account
json_obj = json.loads(vertex_credentials)
creds = google.oauth2.service_account.Credentials.from_service_account_info(
json_obj,
scopes=["https://www.googleapis.com/auth/cloud-platform"],
)
else:
creds, _ = google.auth.default(quota_project_id=vertex_project)
print_verbose(
f"VERTEX AI: creds={creds}; google application credentials: {os.getenv('GOOGLE_APPLICATION_CREDENTIALS')}"
)

View file

@ -1710,6 +1710,7 @@ def completion(
encoding=encoding,
vertex_location=vertex_ai_location,
vertex_project=vertex_ai_project,
vertex_credentials=vertex_credentials,
logging_obj=logging,
acompletion=acompletion,
)
@ -2807,6 +2808,11 @@ def embedding(
or litellm.vertex_location
or get_secret("VERTEXAI_LOCATION")
)
vertex_credentials = (
optional_params.pop("vertex_credentials", None)
or optional_params.pop("vertex_ai_credentials", None)
or get_secret("VERTEXAI_CREDENTIALS")
)
response = vertex_ai.embedding(
model=model,
@ -2817,6 +2823,7 @@ def embedding(
model_response=EmbeddingResponse(),
vertex_project=vertex_ai_project,
vertex_location=vertex_ai_location,
vertex_credentials=vertex_credentials,
aembedding=aembedding,
print_verbose=print_verbose,
)

View file

@ -75,7 +75,8 @@
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
"supports_parallel_function_calling": true,
"supports_vision": true
},
"gpt-4-turbo-2024-04-09": {
"max_tokens": 4096,
@ -86,7 +87,8 @@
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
"supports_parallel_function_calling": true,
"supports_vision": true
},
"gpt-4-1106-preview": {
"max_tokens": 4096,
@ -1268,8 +1270,21 @@
"litellm_provider": "gemini",
"mode": "chat",
"supports_function_calling": true,
"supports_vision": true,
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
},
"gemini/gemini-1.5-pro-latest": {
"max_tokens": 8192,
"max_input_tokens": 1048576,
"max_output_tokens": 8192,
"input_cost_per_token": 0,
"output_cost_per_token": 0,
"litellm_provider": "gemini",
"mode": "chat",
"supports_function_calling": true,
"supports_vision": true,
"source": "https://ai.google.dev/models/gemini"
},
"gemini/gemini-pro-vision": {
"max_tokens": 2048,
"max_input_tokens": 30720,

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-59f93936973f5f5a.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-bcf69420342937de.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-442a9c01c3fd20f9.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-096338c8e1915716.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-59f93936973f5f5a.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/11cfce8bfdf6e8f1.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[8251,[\"289\",\"static/chunks/289-04be6cb9636840d2.js\",\"931\",\"static/chunks/app/page-15d0c6c10d700825.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/11cfce8bfdf6e8f1.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"fcTpSzljtxsSagYnqnMB2\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-59f93936973f5f5a.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-bcf69420342937de.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-442a9c01c3fd20f9.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-096338c8e1915716.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-59f93936973f5f5a.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/11cfce8bfdf6e8f1.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[88740,[\"289\",\"static/chunks/289-04be6cb9636840d2.js\",\"931\",\"static/chunks/app/page-2c0827b33aed42d7.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/11cfce8bfdf6e8f1.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"TqpzjpOA_s5IXVzgrYZ-F\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[77831,[],""]
3:I[8251,["289","static/chunks/289-04be6cb9636840d2.js","931","static/chunks/app/page-15d0c6c10d700825.js"],""]
3:I[88740,["289","static/chunks/289-04be6cb9636840d2.js","931","static/chunks/app/page-2c0827b33aed42d7.js"],""]
4:I[5613,[],""]
5:I[31778,[],""]
0:["fcTpSzljtxsSagYnqnMB2",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/11cfce8bfdf6e8f1.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
0:["TqpzjpOA_s5IXVzgrYZ-F",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/11cfce8bfdf6e8f1.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

View file

@ -51,7 +51,8 @@ class LiteLLM_UpperboundKeyGenerateParams(LiteLLMBase):
class LiteLLMRoutes(enum.Enum):
openai_routes: List = [ # chat completions
openai_routes: List = [
# chat completions
"/openai/deployments/{model}/chat/completions",
"/chat/completions",
"/v1/chat/completions",
@ -77,7 +78,14 @@ class LiteLLMRoutes(enum.Enum):
"/v1/models",
]
info_routes: List = ["/key/info", "/team/info", "/user/info", "/model/info"]
info_routes: List = [
"/key/info",
"/team/info",
"/user/info",
"/model/info",
"/v2/model/info",
"/v2/key/info",
]
management_routes: List = [ # key
"/key/generate",
@ -719,6 +727,10 @@ class ConfigYAML(LiteLLMBase):
description="litellm Module settings. See __init__.py for all, example litellm.drop_params=True, litellm.set_verbose=True, litellm.api_base, litellm.cache",
)
general_settings: Optional[ConfigGeneralSettings] = None
router_settings: Optional[dict] = Field(
None,
description="litellm router object settings. See router.py __init__ for all, example router.num_retries=5, router.timeout=5, router.max_retries=5, router.retry_after=5",
)
class Config:
protected_namespaces = ()

View file

@ -1010,48 +1010,52 @@ async def user_api_key_auth(
db=custom_db_client,
)
)
if route in LiteLLMRoutes.info_routes.value and (
not _is_user_proxy_admin(user_id_information)
): # check if user allowed to call an info route
if route == "/key/info":
# check if user can access this route
query_params = request.query_params
key = query_params.get("key")
if (
key is not None
and prisma_client.hash_token(token=key) != api_key
):
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="user not allowed to access this key's info",
)
elif route == "/user/info":
# check if user can access this route
query_params = request.query_params
user_id = query_params.get("user_id")
verbose_proxy_logger.debug(
f"user_id: {user_id} & valid_token.user_id: {valid_token.user_id}"
)
if user_id != valid_token.user_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="key not allowed to access this user's info",
)
elif route == "/model/info":
# /model/info just shows models user has access to
if not _is_user_proxy_admin(user_id_information): # if non-admin
if route in LiteLLMRoutes.openai_routes.value:
pass
elif route == "/team/info":
# check if key can access this team's info
query_params = request.query_params
team_id = query_params.get("team_id")
if team_id != valid_token.team_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="key not allowed to access this team's info",
elif (
route in LiteLLMRoutes.info_routes.value
): # check if user allowed to call an info route
if route == "/key/info":
# check if user can access this route
query_params = request.query_params
key = query_params.get("key")
if (
key is not None
and prisma_client.hash_token(token=key) != api_key
):
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="user not allowed to access this key's info",
)
elif route == "/user/info":
# check if user can access this route
query_params = request.query_params
user_id = query_params.get("user_id")
verbose_proxy_logger.debug(
f"user_id: {user_id} & valid_token.user_id: {valid_token.user_id}"
)
if user_id != valid_token.user_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="key not allowed to access this user's info",
)
elif route == "/model/info":
# /model/info just shows models user has access to
pass
elif route == "/team/info":
# check if key can access this team's info
query_params = request.query_params
team_id = query_params.get("team_id")
if team_id != valid_token.team_id:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="key not allowed to access this team's info",
)
else:
raise Exception(
f"Only master key can be used to generate, delete, update info for new keys/users."
f"Only master key can be used to generate, delete, update info for new keys/users/teams. Route={route}"
)
# check if token is from litellm-ui, litellm ui makes keys to allow users to login with sso. These keys can only be used for LiteLLM UI functions
@ -2406,27 +2410,44 @@ class ProxyConfig:
router = litellm.Router(**router_params, semaphore=semaphore) # type:ignore
return router, model_list, general_settings
async def _delete_deployment(self, db_models: list):
def get_model_info_with_id(self, model) -> RouterModelInfo:
"""
Common logic across add + delete router models
Parameters:
- deployment
Return model info w/ id
"""
if model.model_info is not None and isinstance(model.model_info, dict):
if "id" not in model.model_info:
model.model_info["id"] = model.model_id
_model_info = RouterModelInfo(**model.model_info)
else:
_model_info = RouterModelInfo(id=model.model_id)
return _model_info
async def _delete_deployment(self, db_models: list) -> int:
"""
(Helper function of add deployment) -> combined to reduce prisma db calls
- Create all up list of model id's (db + config)
- Compare all up list to router model id's
- Remove any that are missing
Return:
- int - returns number of deleted deployments
"""
global user_config_file_path, llm_router
combined_id_list = []
if llm_router is None:
return
return 0
## DB MODELS ##
for m in db_models:
if m.model_info is not None and isinstance(m.model_info, dict):
if "id" not in m.model_info:
m.model_info["id"] = m.model_id
combined_id_list.append(m.model_id)
else:
combined_id_list.append(m.model_id)
model_info = self.get_model_info_with_id(model=m)
if model_info.id is not None:
combined_id_list.append(model_info.id)
## CONFIG MODELS ##
config = await self.get_config(config_file_path=user_config_file_path)
model_list = config.get("model_list", None)
@ -2436,21 +2457,73 @@ class ProxyConfig:
for k, v in model["litellm_params"].items():
if isinstance(v, str) and v.startswith("os.environ/"):
model["litellm_params"][k] = litellm.get_secret(v)
litellm_model_name = model["litellm_params"]["model"]
litellm_model_api_base = model["litellm_params"].get("api_base", None)
model_id = litellm.Router()._generate_model_id(
model_id = llm_router._generate_model_id(
model_group=model["model_name"],
litellm_params=model["litellm_params"],
)
combined_id_list.append(model_id) # ADD CONFIG MODEL TO COMBINED LIST
router_model_ids = llm_router.get_model_ids()
# Check for model IDs in llm_router not present in combined_id_list and delete them
deleted_deployments = 0
for model_id in router_model_ids:
if model_id not in combined_id_list:
llm_router.delete_deployment(id=model_id)
is_deleted = llm_router.delete_deployment(id=model_id)
if is_deleted is not None:
deleted_deployments += 1
return deleted_deployments
def _add_deployment(self, db_models: list) -> int:
"""
Iterate through db models
for any not in router - add them.
Return - number of deployments added
"""
import base64
if master_key is None or not isinstance(master_key, str):
raise Exception(
f"Master key is not initialized or formatted. master_key={master_key}"
)
if llm_router is None:
return 0
added_models = 0
## ADD MODEL LOGIC
for m in db_models:
_litellm_params = m.litellm_params
if isinstance(_litellm_params, dict):
# decrypt values
for k, v in _litellm_params.items():
if isinstance(v, str):
# decode base64
decoded_b64 = base64.b64decode(v)
# decrypt value
_litellm_params[k] = decrypt_value(
value=decoded_b64, master_key=master_key
)
_litellm_params = LiteLLM_Params(**_litellm_params)
else:
verbose_proxy_logger.error(
f"Invalid model added to proxy db. Invalid litellm params. litellm_params={_litellm_params}"
)
continue # skip to next model
_model_info = self.get_model_info_with_id(model=m)
added = llm_router.add_deployment(
deployment=Deployment(
model_name=m.model_name,
litellm_params=_litellm_params,
model_info=_model_info,
)
)
if added is not None:
added_models += 1
return added_models
async def add_deployment(
self,
@ -2498,13 +2571,7 @@ class ProxyConfig:
)
continue # skip to next model
if m.model_info is not None and isinstance(m.model_info, dict):
if "id" not in m.model_info:
m.model_info["id"] = m.model_id
_model_info = RouterModelInfo(**m.model_info)
else:
_model_info = RouterModelInfo(id=m.model_id)
_model_info = self.get_model_info_with_id(model=m)
_model_list.append(
Deployment(
model_name=m.model_name,
@ -2522,39 +2589,7 @@ class ProxyConfig:
await self._delete_deployment(db_models=new_models)
## ADD MODEL LOGIC
for m in new_models:
_litellm_params = m.litellm_params
if isinstance(_litellm_params, dict):
# decrypt values
for k, v in _litellm_params.items():
if isinstance(v, str):
# decode base64
decoded_b64 = base64.b64decode(v)
# decrypt value
_litellm_params[k] = decrypt_value(
value=decoded_b64, master_key=master_key
)
_litellm_params = LiteLLM_Params(**_litellm_params)
else:
verbose_proxy_logger.error(
f"Invalid model added to proxy db. Invalid litellm params. litellm_params={_litellm_params}"
)
continue # skip to next model
if m.model_info is not None and isinstance(m.model_info, dict):
if "id" not in m.model_info:
m.model_info["id"] = m.model_id
_model_info = RouterModelInfo(**m.model_info)
else:
_model_info = RouterModelInfo(id=m.model_id)
llm_router.add_deployment(
deployment=Deployment(
model_name=m.model_name,
litellm_params=_litellm_params,
model_info=_model_info,
)
)
self._add_deployment(db_models=new_models)
llm_model_list = llm_router.get_model_list()
@ -2585,6 +2620,9 @@ class ProxyConfig:
general_settings["alerting"] = _general_settings["alerting"]
proxy_logging_obj.alerting = general_settings["alerting"]
# router settings
_router_settings = config_data.get("router_settings", {})
llm_router.update_settings(**_router_settings)
except Exception as e:
verbose_proxy_logger.error(
"{}\nTraceback:{}".format(str(e), traceback.format_exc())
@ -2727,10 +2765,12 @@ async def generate_key_helper_fn(
"model_max_budget": model_max_budget_json,
"budget_id": budget_id,
}
if (
general_settings.get("allow_user_auth", False) == True
or _has_user_setup_sso() == True
):
litellm.get_secret("DISABLE_KEY_NAME", False) == True
): # allow user to disable storing abbreviated key name (shown in UI, to help figure out which key spent how much)
pass
else:
key_data["key_name"] = f"sk-...{token[-4:]}"
saved_token = copy.deepcopy(key_data)
if isinstance(saved_token["aliases"], str):
@ -3216,7 +3256,7 @@ async def startup_event():
scheduler.add_job(
proxy_config.add_deployment,
"interval",
seconds=30,
seconds=10,
args=[prisma_client, proxy_logging_obj],
)
@ -8188,6 +8228,16 @@ async def update_config(config_info: ConfigYAML):
"success_callback"
] = combined_success_callback
# router settings
if config_info.router_settings is not None:
config.setdefault("router_settings", {})
_updated_router_settings = config_info.router_settings
config["router_settings"] = {
**config["router_settings"],
**_updated_router_settings,
}
# Save the updated config
await proxy_config.save_config(new_config=config)
@ -8305,7 +8355,12 @@ async def get_config():
_data_to_return.append({"name": "slack", "variables": _slack_env_vars})
return {"status": "success", "data": _data_to_return}
_router_settings = llm_router.get_settings()
return {
"status": "success",
"data": _data_to_return,
"router_settings": _router_settings,
}
except Exception as e:
traceback.print_exc()
if isinstance(e, HTTPException):

View file

@ -193,8 +193,8 @@ class ProxyLogging:
# Convert the timedelta to float (in seconds)
time_difference_float = time_difference.total_seconds()
litellm_params = kwargs.get("litellm_params", {})
api_base = litellm_params.get("api_base", "")
model = kwargs.get("model", "")
api_base = litellm.get_api_base(model=model, optional_params=litellm_params)
messages = kwargs.get("messages", "")
return time_difference_float, model, api_base, messages

View file

@ -299,6 +299,7 @@ class Router:
verbose_router_logger.info(
f"Intialized router with Routing strategy: {self.routing_strategy}\n\nRouting fallbacks: {self.fallbacks}\n\nRouting context window fallbacks: {self.context_window_fallbacks}"
)
self.routing_strategy_args = routing_strategy_args
def print_deployment(self, deployment: dict):
"""
@ -2271,11 +2272,19 @@ class Router:
return deployment
def add_deployment(self, deployment: Deployment):
def add_deployment(self, deployment: Deployment) -> Optional[Deployment]:
"""
Parameters:
- deployment: Deployment - the deployment to be added to the Router
Returns:
- The added deployment
- OR None (if deployment already exists)
"""
# check if deployment already exists
if deployment.model_info.id in self.get_model_ids():
return
return None
# add to model list
_deployment = deployment.to_json(exclude_none=True)
@ -2286,7 +2295,7 @@ class Router:
# add to model names
self.model_names.append(deployment.model_name)
return
return deployment
def delete_deployment(self, id: str) -> Optional[Deployment]:
"""
@ -2334,6 +2343,48 @@ class Router:
return self.model_list
return None
def get_settings(self):
"""
Get router settings method, returns a dictionary of the settings and their values.
For example get the set values for routing_strategy_args, routing_strategy, allowed_fails, cooldown_time, num_retries, timeout, max_retries, retry_after
"""
_all_vars = vars(self)
_settings_to_return = {}
vars_to_include = [
"routing_strategy_args",
"routing_strategy",
"allowed_fails",
"cooldown_time",
"num_retries",
"timeout",
"max_retries",
"retry_after",
]
for var in vars_to_include:
if var in _all_vars:
_settings_to_return[var] = _all_vars[var]
return _settings_to_return
def update_settings(self, **kwargs):
# only the following settings are allowed to be configured
_allowed_settings = [
"routing_strategy_args",
"routing_strategy",
"allowed_fails",
"cooldown_time",
"num_retries",
"timeout",
"max_retries",
"retry_after",
]
for var in kwargs:
if var in _allowed_settings:
setattr(self, var, kwargs[var])
else:
verbose_router_logger.debug("Setting {} is not allowed".format(var))
def _get_client(self, deployment, kwargs, client_type=None):
"""
Returns the appropriate client based on the given deployment, kwargs, and client_type.

View file

@ -553,7 +553,12 @@ def test_gemini_pro_function_calling():
}
]
messages = [{"role": "user", "content": "What's the weather like in Boston today?"}]
messages = [
{
"role": "user",
"content": "What's the weather like in Boston today in fahrenheit?",
}
]
completion = litellm.completion(
model="gemini-pro", messages=messages, tools=tools, tool_choice="auto"
)
@ -586,7 +591,10 @@ def test_gemini_pro_function_calling():
}
]
messages = [
{"role": "user", "content": "What's the weather like in Boston today?"}
{
"role": "user",
"content": "What's the weather like in Boston today in fahrenheit?",
}
]
completion = litellm.completion(
model="gemini-pro", messages=messages, tools=tools, tool_choice="auto"
@ -594,6 +602,8 @@ def test_gemini_pro_function_calling():
print(f"completion: {completion}")
assert completion.choices[0].message.content is None
assert len(completion.choices[0].message.tool_calls) == 1
except litellm.APIError as e:
pass
except litellm.RateLimitError as e:
pass
except Exception as e:
@ -629,7 +639,12 @@ def test_gemini_pro_function_calling_streaming():
},
}
]
messages = [{"role": "user", "content": "What's the weather like in Boston today?"}]
messages = [
{
"role": "user",
"content": "What's the weather like in Boston today in fahrenheit?",
}
]
try:
completion = litellm.completion(
model="gemini-pro",
@ -643,6 +658,8 @@ def test_gemini_pro_function_calling_streaming():
# assert len(completion.choices[0].message.tool_calls) == 1
for chunk in completion:
print(f"chunk: {chunk}")
except litellm.APIError as e:
pass
except litellm.RateLimitError as e:
pass
@ -675,7 +692,10 @@ async def test_gemini_pro_async_function_calling():
}
]
messages = [
{"role": "user", "content": "What's the weather like in Boston today?"}
{
"role": "user",
"content": "What's the weather like in Boston today in fahrenheit?",
}
]
completion = await litellm.acompletion(
model="gemini-pro", messages=messages, tools=tools, tool_choice="auto"
@ -683,6 +703,8 @@ async def test_gemini_pro_async_function_calling():
print(f"completion: {completion}")
assert completion.choices[0].message.content is None
assert len(completion.choices[0].message.tool_calls) == 1
except litellm.APIError as e:
pass
except litellm.RateLimitError as e:
pass
except Exception as e:

View file

@ -252,7 +252,10 @@ def test_bedrock_claude_3_tool_calling():
}
]
messages = [
{"role": "user", "content": "What's the weather like in Boston today?"}
{
"role": "user",
"content": "What's the weather like in Boston today in fahrenheit?",
}
]
response: ModelResponse = completion(
model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0",

View file

@ -167,7 +167,12 @@ def test_completion_claude_3_function_call():
},
}
]
messages = [{"role": "user", "content": "What's the weather like in Boston today?"}]
messages = [
{
"role": "user",
"content": "What's the weather like in Boston today in Fahrenheit?",
}
]
try:
# test without max tokens
response = completion(
@ -376,7 +381,12 @@ def test_completion_claude_3_function_plus_image():
]
tool_choice = {"type": "function", "function": {"name": "get_current_weather"}}
messages = [{"role": "user", "content": "What's the weather like in Boston today?"}]
messages = [
{
"role": "user",
"content": "What's the weather like in Boston today in Fahrenheit?",
}
]
response = completion(
model="claude-3-sonnet-20240229",

View file

@ -0,0 +1,168 @@
# What is this?
## Unit tests for ProxyConfig class
import sys, os
import traceback
from dotenv import load_dotenv
load_dotenv()
import os, io
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the, system path
import pytest, litellm
from pydantic import BaseModel
from litellm.proxy.proxy_server import ProxyConfig
from litellm.proxy.utils import encrypt_value
from litellm.types.router import Deployment, LiteLLM_Params, ModelInfo
class DBModel(BaseModel):
model_id: str
model_name: str
model_info: dict
litellm_params: dict
@pytest.mark.asyncio
async def test_delete_deployment():
"""
- Ensure the global llm router is not being reset
- Ensure invalid model is deleted
- Check if model id != model_info["id"], the model_info["id"] is picked
"""
import base64
litellm_params = LiteLLM_Params(
model="azure/chatgpt-v-2",
api_key=os.getenv("AZURE_API_KEY"),
api_base=os.getenv("AZURE_API_BASE"),
api_version=os.getenv("AZURE_API_VERSION"),
)
encrypted_litellm_params = litellm_params.dict(exclude_none=True)
master_key = "sk-1234"
setattr(litellm.proxy.proxy_server, "master_key", master_key)
for k, v in encrypted_litellm_params.items():
if isinstance(v, str):
encrypted_value = encrypt_value(v, master_key)
encrypted_litellm_params[k] = base64.b64encode(encrypted_value).decode(
"utf-8"
)
deployment = Deployment(model_name="gpt-3.5-turbo", litellm_params=litellm_params)
deployment_2 = Deployment(
model_name="gpt-3.5-turbo-2", litellm_params=litellm_params
)
llm_router = litellm.Router(
model_list=[
deployment.to_json(exclude_none=True),
deployment_2.to_json(exclude_none=True),
]
)
setattr(litellm.proxy.proxy_server, "llm_router", llm_router)
print(f"llm_router: {llm_router}")
pc = ProxyConfig()
db_model = DBModel(
model_id=deployment.model_info.id,
model_name="gpt-3.5-turbo",
litellm_params=encrypted_litellm_params,
model_info={"id": deployment.model_info.id},
)
db_models = [db_model]
deleted_deployments = await pc._delete_deployment(db_models=db_models)
assert deleted_deployments == 1
assert len(llm_router.model_list) == 1
"""
Scenario 2 - if model id != model_info["id"]
"""
llm_router = litellm.Router(
model_list=[
deployment.to_json(exclude_none=True),
deployment_2.to_json(exclude_none=True),
]
)
print(f"llm_router: {llm_router}")
setattr(litellm.proxy.proxy_server, "llm_router", llm_router)
pc = ProxyConfig()
db_model = DBModel(
model_id="12340523",
model_name="gpt-3.5-turbo",
litellm_params=encrypted_litellm_params,
model_info={"id": deployment.model_info.id},
)
db_models = [db_model]
deleted_deployments = await pc._delete_deployment(db_models=db_models)
assert deleted_deployments == 1
assert len(llm_router.model_list) == 1
@pytest.mark.asyncio
async def test_add_existing_deployment():
"""
- Only add new models
- don't re-add existing models
"""
import base64
litellm_params = LiteLLM_Params(
model="gpt-3.5-turbo",
api_key=os.getenv("AZURE_API_KEY"),
api_base=os.getenv("AZURE_API_BASE"),
api_version=os.getenv("AZURE_API_VERSION"),
)
deployment = Deployment(model_name="gpt-3.5-turbo", litellm_params=litellm_params)
deployment_2 = Deployment(
model_name="gpt-3.5-turbo-2", litellm_params=litellm_params
)
llm_router = litellm.Router(
model_list=[
deployment.to_json(exclude_none=True),
deployment_2.to_json(exclude_none=True),
]
)
print(f"llm_router: {llm_router}")
master_key = "sk-1234"
setattr(litellm.proxy.proxy_server, "llm_router", llm_router)
setattr(litellm.proxy.proxy_server, "master_key", master_key)
pc = ProxyConfig()
encrypted_litellm_params = litellm_params.dict(exclude_none=True)
for k, v in encrypted_litellm_params.items():
if isinstance(v, str):
encrypted_value = encrypt_value(v, master_key)
encrypted_litellm_params[k] = base64.b64encode(encrypted_value).decode(
"utf-8"
)
db_model = DBModel(
model_id=deployment.model_info.id,
model_name="gpt-3.5-turbo",
litellm_params=encrypted_litellm_params,
model_info={"id": deployment.model_info.id},
)
db_models = [db_model]
num_added = pc._add_deployment(db_models=db_models)
assert num_added == 0
@pytest.mark.asyncio
async def test_add_and_delete_deployments():
pass

View file

@ -536,6 +536,55 @@ def test_completion_openai_api_key_exception():
# tesy_async_acompletion()
def test_router_completion_vertex_exception():
try:
import litellm
litellm.set_verbose = True
router = litellm.Router(
model_list=[
{
"model_name": "vertex-gemini-pro",
"litellm_params": {
"model": "vertex_ai/gemini-pro",
"api_key": "good-morning",
},
},
]
)
response = router.completion(
model="vertex-gemini-pro",
messages=[{"role": "user", "content": "hello"}],
vertex_project="bad-project",
)
pytest.fail("Request should have failed - bad api key")
except Exception as e:
print("exception: ", e)
assert "model: vertex_ai/gemini-pro" in str(e)
assert "model_group: vertex-gemini-pro" in str(e)
assert "deployment: vertex_ai/gemini-pro" in str(e)
def test_litellm_completion_vertex_exception():
try:
import litellm
litellm.set_verbose = True
response = completion(
model="vertex_ai/gemini-pro",
api_key="good-morning",
messages=[{"role": "user", "content": "hello"}],
vertex_project="bad-project",
)
pytest.fail("Request should have failed - bad api key")
except Exception as e:
print("exception: ", e)
assert "model: vertex_ai/gemini-pro" in str(e)
assert "model_group" not in str(e)
assert "deployment" not in str(e)
# # test_invalid_request_error(model="command-nightly")
# # Test 3: Rate Limit Errors
# def test_model_call(model):

View file

@ -1587,11 +1587,12 @@ async def test_key_name_null(prisma_client):
"""
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
setattr(litellm.proxy.proxy_server, "general_settings", {"allow_user_auth": False})
os.environ["DISABLE_KEY_NAME"] = "True"
await litellm.proxy.proxy_server.prisma_client.connect()
try:
request = GenerateKeyRequest()
key = await generate_key_fn(request)
print("generated key=", key)
generated_key = key.key
result = await info_key_fn(key=generated_key)
print("result from info_key_fn", result)
@ -1599,6 +1600,8 @@ async def test_key_name_null(prisma_client):
except Exception as e:
print("Got Exception", e)
pytest.fail(f"Got exception {e}")
finally:
os.environ["DISABLE_KEY_NAME"] = "False"
@pytest.mark.asyncio()

View file

@ -0,0 +1,85 @@
#### What this tests ####
# This tests utils used by llm router -> like llmrouter.get_settings()
import sys, os, time
import traceback, asyncio
import pytest
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm
from litellm import Router
from litellm.router import Deployment, LiteLLM_Params, ModelInfo
from concurrent.futures import ThreadPoolExecutor
from collections import defaultdict
from dotenv import load_dotenv
load_dotenv()
def test_returned_settings():
# this tests if the router raises an exception when invalid params are set
# in this test both deployments have bad keys - Keep this test. It validates if the router raises the most recent exception
litellm.set_verbose = True
import openai
try:
print("testing if router raises an exception")
model_list = [
{
"model_name": "gpt-3.5-turbo", # openai model name
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2",
"api_key": "bad-key",
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
},
"tpm": 240000,
"rpm": 1800,
},
{
"model_name": "gpt-3.5-turbo", # openai model name
"litellm_params": { #
"model": "gpt-3.5-turbo",
"api_key": "bad-key",
},
"tpm": 240000,
"rpm": 1800,
},
]
router = Router(
model_list=model_list,
redis_host=os.getenv("REDIS_HOST"),
redis_password=os.getenv("REDIS_PASSWORD"),
redis_port=int(os.getenv("REDIS_PORT")),
routing_strategy="latency-based-routing",
routing_strategy_args={"ttl": 10},
set_verbose=False,
num_retries=3,
retry_after=5,
allowed_fails=1,
cooldown_time=30,
) # type: ignore
settings = router.get_settings()
print(settings)
"""
routing_strategy: "simple-shuffle"
routing_strategy_args: {"ttl": 10} # Average the last 10 calls to compute avg latency per model
allowed_fails: 1
num_retries: 3
retry_after: 5 # seconds to wait before retrying a failed request
cooldown_time: 30 # seconds to cooldown a deployment after failure
"""
assert settings["routing_strategy"] == "latency-based-routing"
assert settings["routing_strategy_args"]["ttl"] == 10
assert settings["allowed_fails"] == 1
assert settings["num_retries"] == 3
assert settings["retry_after"] == 5
assert settings["cooldown_time"] == 30
except:
print(traceback.format_exc())
pytest.fail("An error occurred - " + traceback.format_exc())

View file

@ -2252,7 +2252,12 @@ def test_completion_claude_3_function_call_with_streaming():
},
}
]
messages = [{"role": "user", "content": "What's the weather like in Boston today?"}]
messages = [
{
"role": "user",
"content": "What's the weather like in Boston today in fahrenheit?",
}
]
try:
# test without max tokens
response = completion(
@ -2306,7 +2311,12 @@ async def test_acompletion_claude_3_function_call_with_streaming():
},
}
]
messages = [{"role": "user", "content": "What's the weather like in Boston today?"}]
messages = [
{
"role": "user",
"content": "What's the weather like in Boston today in fahrenheit?",
}
]
try:
# test without max tokens
response = await acompletion(

View file

@ -173,6 +173,22 @@ def test_trimming_should_not_change_original_messages():
assert messages == messages_copy
@pytest.mark.parametrize("model", ["gpt-4-0125-preview", "claude-3-opus-20240229"])
def test_trimming_with_model_cost_max_input_tokens(model):
messages = [
{"role": "system", "content": "This is a normal system message"},
{
"role": "user",
"content": "This is a sentence" * 100000,
},
]
trimmed_messages = trim_messages(messages, model=model)
assert (
get_token_count(trimmed_messages, model=model)
< litellm.model_cost[model]["max_input_tokens"]
)
def test_get_valid_models():
old_environ = os.environ
os.environ = {"OPENAI_API_KEY": "temp"} # mock set only openai key in environ

View file

@ -101,12 +101,39 @@ class LiteLLM_Params(BaseModel):
aws_secret_access_key: Optional[str] = None
aws_region_name: Optional[str] = None
def __init__(self, max_retries: Optional[Union[int, str]] = None, **params):
def __init__(
self,
model: str,
max_retries: Optional[Union[int, str]] = None,
tpm: Optional[int] = None,
rpm: Optional[int] = None,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
api_version: Optional[str] = None,
timeout: Optional[Union[float, str]] = None, # if str, pass in as os.environ/
stream_timeout: Optional[Union[float, str]] = (
None # timeout when making stream=True calls, if str, pass in as os.environ/
),
organization: Optional[str] = None, # for openai orgs
## VERTEX AI ##
vertex_project: Optional[str] = None,
vertex_location: Optional[str] = None,
## AWS BEDROCK / SAGEMAKER ##
aws_access_key_id: Optional[str] = None,
aws_secret_access_key: Optional[str] = None,
aws_region_name: Optional[str] = None,
**params
):
args = locals()
args.pop("max_retries", None)
args.pop("self", None)
args.pop("params", None)
args.pop("__class__", None)
if max_retries is None:
max_retries = 2
elif isinstance(max_retries, str):
max_retries = int(max_retries) # cast to int
super().__init__(max_retries=max_retries, **params)
super().__init__(max_retries=max_retries, **args, **params)
class Config:
extra = "allow"
@ -133,12 +160,23 @@ class Deployment(BaseModel):
litellm_params: LiteLLM_Params
model_info: ModelInfo
def __init__(self, model_info: Optional[Union[ModelInfo, dict]] = None, **params):
def __init__(
self,
model_name: str,
litellm_params: LiteLLM_Params,
model_info: Optional[Union[ModelInfo, dict]] = None,
**params
):
if model_info is None:
model_info = ModelInfo()
elif isinstance(model_info, dict):
model_info = ModelInfo(**model_info)
super().__init__(model_info=model_info, **params)
super().__init__(
model_info=model_info,
model_name=model_name,
litellm_params=litellm_params,
**params
)
def to_json(self, **kwargs):
try:

View file

@ -5436,7 +5436,9 @@ def get_api_base(model: str, optional_params: dict) -> Optional[str]:
get_api_base(model="gemini/gemini-pro")
```
"""
_optional_params = LiteLLM_Params(**optional_params) # convert to pydantic object
_optional_params = LiteLLM_Params(
model=model, **optional_params
) # convert to pydantic object
# get llm provider
try:
model, custom_llm_provider, dynamic_api_key, api_base = get_llm_provider(
@ -7842,6 +7844,19 @@ def exception_type(
response=original_exception.response,
)
elif custom_llm_provider == "vertex_ai":
if completion_kwargs is not None:
# add model, deployment and model_group to the exception message
_model = completion_kwargs.get("model")
_kwargs = completion_kwargs.get("kwargs", {}) or {}
_metadata = _kwargs.get("metadata", {}) or {}
_model_group = _metadata.get("model_group")
_deployment = _metadata.get("deployment")
error_str += f"\nmodel: {_model}\n"
if _model_group is not None:
error_str += f"model_group: {_model_group}\n"
if _deployment is not None:
error_str += f"deployment: {_deployment}\n"
if (
"Vertex AI API has not been used in project" in error_str
or "Unable to find your project" in error_str
@ -10609,16 +10624,16 @@ def trim_messages(
messages = copy.deepcopy(messages)
try:
print_verbose(f"trimming messages")
if max_tokens == None:
if max_tokens is None:
# Check if model is valid
if model in litellm.model_cost:
max_tokens_for_model = litellm.model_cost[model]["max_tokens"]
max_tokens_for_model = litellm.model_cost[model].get("max_input_tokens", litellm.model_cost[model]["max_tokens"])
max_tokens = int(max_tokens_for_model * trim_ratio)
else:
# if user did not specify max tokens
# if user did not specify max (input) tokens
# or passed an llm litellm does not know
# do nothing, just return messages
return
return messages
system_message = ""
for message in messages:

View file

@ -75,7 +75,8 @@
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
"supports_parallel_function_calling": true,
"supports_vision": true
},
"gpt-4-turbo-2024-04-09": {
"max_tokens": 4096,
@ -86,7 +87,8 @@
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true
"supports_parallel_function_calling": true,
"supports_vision": true
},
"gpt-4-1106-preview": {
"max_tokens": 4096,
@ -1268,8 +1270,21 @@
"litellm_provider": "gemini",
"mode": "chat",
"supports_function_calling": true,
"supports_vision": true,
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
},
"gemini/gemini-1.5-pro-latest": {
"max_tokens": 8192,
"max_input_tokens": 1048576,
"max_output_tokens": 8192,
"input_cost_per_token": 0,
"output_cost_per_token": 0,
"litellm_provider": "gemini",
"mode": "chat",
"supports_function_calling": true,
"supports_vision": true,
"source": "https://ai.google.dev/models/gemini"
},
"gemini/gemini-pro-vision": {
"max_tokens": 2048,
"max_input_tokens": 30720,

View file

@ -1,6 +1,6 @@
[tool.poetry]
name = "litellm"
version = "1.35.9"
version = "1.35.10"
description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"]
license = "MIT"
@ -80,7 +80,7 @@ requires = ["poetry-core", "wheel"]
build-backend = "poetry.core.masonry.api"
[tool.commitizen]
version = "1.35.9"
version = "1.35.10"
version_files = [
"pyproject.toml:^version"
]

View file

@ -44,9 +44,13 @@ async def generate_key(
models=["azure-models", "gpt-4", "dall-e-3"],
max_parallel_requests: Optional[int] = None,
user_id: Optional[str] = None,
calling_key="sk-1234",
):
url = "http://0.0.0.0:4000/key/generate"
headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
headers = {
"Authorization": f"Bearer {calling_key}",
"Content-Type": "application/json",
}
data = {
"models": models,
"aliases": {"mistral-7b": "gpt-3.5-turbo"},
@ -80,6 +84,35 @@ async def test_key_gen():
await asyncio.gather(*tasks)
@pytest.mark.asyncio
async def test_key_gen_bad_key():
"""
Test if you can create a key with a non-admin key, even with UI setup
"""
async with aiohttp.ClientSession() as session:
## LOGIN TO UI
form_data = {"username": "admin", "password": "sk-1234"}
async with session.post(
"http://0.0.0.0:4000/login", data=form_data
) as response:
assert (
response.status == 200
) # Assuming the endpoint returns a 500 status code for error handling
text = await response.text()
print(text)
## create user key with admin key -> expect to work
key_data = await generate_key(session=session, i=0, user_id="user-1234")
key = key_data["key"]
## create new key with user key -> expect to fail
try:
await generate_key(
session=session, i=0, user_id="user-1234", calling_key=key
)
pytest.fail("Expected to fail")
except Exception as e:
pass
async def update_key(session, get_key):
"""
Make sure only models user has access to are returned

View file

@ -280,6 +280,7 @@ async def test_add_model_run_health():
async with aiohttp.ClientSession() as session:
key_gen = await generate_key(session=session)
key = key_gen["key"]
master_key = "sk-1234"
model_id = str(uuid.uuid4())
model_name = f"azure-model-health-check-{model_id}"
print("adding model", model_name)
@ -295,7 +296,7 @@ async def test_add_model_run_health():
print("calling /health?model=", model_name)
_health_info = await get_model_health(
session=session, key=key, model_name=model_name
session=session, key=master_key, model_name=model_name
)
_healthy_endpooint = _health_info["healthy_endpoints"][0]

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[165],{83155:function(e,t,n){(window.__NEXT_P=window.__NEXT_P||[]).push(["/_not-found",function(){return n(84032)}])},84032:function(e,t,n){"use strict";Object.defineProperty(t,"__esModule",{value:!0}),Object.defineProperty(t,"default",{enumerable:!0,get:function(){return i}}),n(86921);let o=n(3827);n(64090);let r={error:{fontFamily:'system-ui,"Segoe UI",Roboto,Helvetica,Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji"',height:"100vh",textAlign:"center",display:"flex",flexDirection:"column",alignItems:"center",justifyContent:"center"},desc:{display:"inline-block"},h1:{display:"inline-block",margin:"0 20px 0 0",padding:"0 23px 0 0",fontSize:24,fontWeight:500,verticalAlign:"top",lineHeight:"49px"},h2:{fontSize:14,fontWeight:400,lineHeight:"49px",margin:0}};function i(){return(0,o.jsxs)(o.Fragment,{children:[(0,o.jsx)("title",{children:"404: This page could not be found."}),(0,o.jsx)("div",{style:r.error,children:(0,o.jsxs)("div",{children:[(0,o.jsx)("style",{dangerouslySetInnerHTML:{__html:"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}),(0,o.jsx)("h1",{className:"next-error-h1",style:r.h1,children:"404"}),(0,o.jsx)("div",{style:r.desc,children:(0,o.jsx)("h2",{style:r.h2,children:"This page could not be found."})})]})})]})}("function"==typeof t.default||"object"==typeof t.default&&null!==t.default)&&void 0===t.default.__esModule&&(Object.defineProperty(t.default,"__esModule",{value:!0}),Object.assign(t.default,t),e.exports=t.default)}},function(e){e.O(0,[971,69,744],function(){return e(e.s=83155)}),_N_E=e.O()}]);

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1 @@
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/11cfce8bfdf6e8f1.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-59f93936973f5f5a.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-bcf69420342937de.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-442a9c01c3fd20f9.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-096338c8e1915716.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-59f93936973f5f5a.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/11cfce8bfdf6e8f1.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[8251,[\"289\",\"static/chunks/289-04be6cb9636840d2.js\",\"931\",\"static/chunks/app/page-15d0c6c10d700825.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/11cfce8bfdf6e8f1.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"fcTpSzljtxsSagYnqnMB2\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-59f93936973f5f5a.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-bcf69420342937de.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-442a9c01c3fd20f9.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-096338c8e1915716.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-59f93936973f5f5a.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/11cfce8bfdf6e8f1.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[88740,[\"289\",\"static/chunks/289-04be6cb9636840d2.js\",\"931\",\"static/chunks/app/page-2c0827b33aed42d7.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/11cfce8bfdf6e8f1.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"TqpzjpOA_s5IXVzgrYZ-F\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[77831,[],""]
3:I[8251,["289","static/chunks/289-04be6cb9636840d2.js","931","static/chunks/app/page-15d0c6c10d700825.js"],""]
3:I[88740,["289","static/chunks/289-04be6cb9636840d2.js","931","static/chunks/app/page-2c0827b33aed42d7.js"],""]
4:I[5613,[],""]
5:I[31778,[],""]
0:["fcTpSzljtxsSagYnqnMB2",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/11cfce8bfdf6e8f1.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
0:["TqpzjpOA_s5IXVzgrYZ-F",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/11cfce8bfdf6e8f1.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

View file

@ -8,6 +8,7 @@ import ViewUserDashboard from "@/components/view_users";
import Teams from "@/components/teams";
import AdminPanel from "@/components/admins";
import Settings from "@/components/settings";
import GeneralSettings from "@/components/general_settings";
import ChatUI from "@/components/chat_ui";
import Sidebar from "../components/leftnav";
import Usage from "../components/usage";
@ -169,6 +170,12 @@ const CreateKeyPage = () => {
userRole={userRole}
accessToken={accessToken}
/>
) : page == "general-settings" ? (
<GeneralSettings
userID={userID}
userRole={userRole}
accessToken={accessToken}
/>
) : (
<Usage
userID={userID}

View file

@ -0,0 +1,136 @@
import React, { useState, useEffect } from "react";
import {
Card,
Title,
Subtitle,
Table,
TableHead,
TableRow,
Badge,
TableHeaderCell,
TableCell,
TableBody,
Metric,
Text,
Grid,
Button,
TextInput,
Col,
} from "@tremor/react";
import { getCallbacksCall, setCallbacksCall, serviceHealthCheck } from "./networking";
import { Modal, Form, Input, Select, Button as Button2, message } from "antd";
import StaticGenerationSearchParamsBailoutProvider from "next/dist/client/components/static-generation-searchparams-bailout-provider";
interface GeneralSettingsPageProps {
accessToken: string | null;
userRole: string | null;
userID: string | null;
}
const GeneralSettings: React.FC<GeneralSettingsPageProps> = ({
accessToken,
userRole,
userID,
}) => {
const [routerSettings, setRouterSettings] = useState<{ [key: string]: any }>({});
const [isModalVisible, setIsModalVisible] = useState(false);
const [form] = Form.useForm();
const [selectedCallback, setSelectedCallback] = useState<string | null>(null);
useEffect(() => {
if (!accessToken || !userRole || !userID) {
return;
}
getCallbacksCall(accessToken, userID, userRole).then((data) => {
console.log("callbacks", data);
let router_settings = data.router_settings;
setRouterSettings(router_settings);
});
}, [accessToken, userRole, userID]);
const handleAddCallback = () => {
console.log("Add callback clicked");
setIsModalVisible(true);
};
const handleCancel = () => {
setIsModalVisible(false);
form.resetFields();
setSelectedCallback(null);
};
const handleSaveChanges = (router_settings: any) => {
if (!accessToken) {
return;
}
console.log("router_settings", router_settings);
const updatedVariables = Object.fromEntries(
Object.entries(router_settings).map(([key, value]) => [key, (document.querySelector(`input[name="${key}"]`) as HTMLInputElement)?.value || value])
);
console.log("updatedVariables", updatedVariables);
const payload = {
router_settings: updatedVariables
};
try {
setCallbacksCall(accessToken, payload);
} catch (error) {
message.error("Failed to update router settings: " + error, 20);
}
message.success("router settings updated successfully");
};
if (!accessToken) {
return null;
}
return (
<div className="w-full mx-4">
<Grid numItems={1} className="gap-2 p-8 w-full mt-2">
<Title>Router Settings</Title>
<Card >
<Table>
<TableHead>
<TableRow>
<TableHeaderCell>Setting</TableHeaderCell>
<TableHeaderCell>Value</TableHeaderCell>
</TableRow>
</TableHead>
<TableBody>
{Object.entries(routerSettings).map(([param, value]) => (
<TableRow key={param}>
<TableCell>
<Text>{param}</Text>
</TableCell>
<TableCell>
<TextInput
name={param}
defaultValue={
typeof value === 'object' ? JSON.stringify(value, null, 2) : value.toString()
}
/>
</TableCell>
</TableRow>
))}
</TableBody>
</Table>
</Card>
<Col>
<Button className="mt-2" onClick={() => handleSaveChanges(routerSettings)}>
Save Changes
</Button>
</Col>
</Grid>
</div>
);
};
export default GeneralSettings;

View file

@ -92,6 +92,11 @@ const Sidebar: React.FC<SidebarProps> = ({
Integrations
</Text>
</Menu.Item>
<Menu.Item key="9" onClick={() => setPage("general-settings")}>
<Text>
Settings
</Text>
</Menu.Item>
{userRole == "Admin" ? (
<Menu.Item key="7" onClick={() => setPage("admin-panel")}>
<Text>

View file

@ -94,6 +94,9 @@ const ViewKeyTable: React.FC<ViewKeyTableProps> = ({
const [editModalVisible, setEditModalVisible] = useState(false);
const [selectedToken, setSelectedToken] = useState<ItemData | null>(null);
const [userModels, setUserModels] = useState([]);
const initialKnownTeamIDs: Set<string> = new Set();
const [knownTeamIDs, setKnownTeamIDs] = useState(initialKnownTeamIDs);
useEffect(() => {
const fetchUserModels = async () => {
@ -118,6 +121,16 @@ const ViewKeyTable: React.FC<ViewKeyTableProps> = ({
fetchUserModels();
}, [accessToken, userID, userRole]);
useEffect(() => {
if (teams) {
const teamIDSet: Set<string> = new Set();
teams.forEach((team: any, index: number) => {
const team_obj: string = team.team_id
teamIDSet.add(team_obj);
});
setKnownTeamIDs(teamIDSet)
}
}, [teams])
const EditKeyModal: React.FC<EditKeyModalProps> = ({ visible, onCancel, token, onSubmit }) => {
const [form] = Form.useForm();
const [keyTeam, setKeyTeam] = useState(selectedTeam);
@ -277,12 +290,12 @@ const ViewKeyTable: React.FC<ViewKeyTableProps> = ({
setEditModalVisible(true);
};
const handleEditCancel = () => {
setEditModalVisible(false);
setSelectedToken(null);
};
const handleEditCancel = () => {
setEditModalVisible(false);
setSelectedToken(null);
};
const handleEditSubmit = async (formValues: Record<string, any>) => {
const handleEditSubmit = async (formValues: Record<string, any>) => {
/**
* Call API to update team with teamId and values
*
@ -311,7 +324,7 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
setEditModalVisible(false);
setSelectedToken(null);
};
};
@ -419,12 +432,8 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
<TableHeaderCell>Secret Key</TableHeaderCell>
<TableHeaderCell>Spend (USD)</TableHeaderCell>
<TableHeaderCell>Budget (USD)</TableHeaderCell>
{/* <TableHeaderCell>Spend Report</TableHeaderCell> */}
{/* <TableHeaderCell>Team</TableHeaderCell> */}
{/* <TableHeaderCell>Metadata</TableHeaderCell> */}
<TableHeaderCell>Models</TableHeaderCell>
<TableHeaderCell>TPM / RPM Limits</TableHeaderCell>
{/* <TableHeaderCell>Expires</TableHeaderCell> */}
</TableRow>
</TableHead>
<TableBody>
@ -435,9 +444,17 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
return null;
}
if (selectedTeam) {
if (item.team_id != selectedTeam.team_id) {
/**
* if selected team id is null -> show the keys with no team id or team id's that don't exist in db
*/
console.log(`item team id: ${item.team_id}, knownTeamIDs.has(item.team_id): ${knownTeamIDs.has(item.team_id)}, selectedTeam id: ${selectedTeam.team_id}`)
if (selectedTeam.team_id == null && item.team_id !== null && !knownTeamIDs.has(item.team_id)) {
// do nothing -> returns a row with this key
}
else if (item.team_id != selectedTeam.team_id) {
return null;
}
console.log(`item team id: ${item.team_id}, is returned`)
}
return (
<TableRow key={item.token}>